diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
73 files changed, 3889 insertions, 1084 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h deleted file mode 100644 index f69257eaa1cc..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef DEBUGFS_ENGINES_H -#define DEBUGFS_ENGINES_H - -struct intel_gt; -struct dentry; - -void debugfs_engines_register(struct intel_gt *gt, struct dentry *root); - -#endif /* DEBUGFS_ENGINES_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c deleted file mode 100644 index 591eb60785db..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.c +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/debugfs.h> - -#include "debugfs_engines.h" -#include "debugfs_gt.h" -#include "debugfs_gt_pm.h" -#include "intel_sseu_debugfs.h" -#include "uc/intel_uc_debugfs.h" -#include "i915_drv.h" - -void debugfs_gt_register(struct intel_gt *gt) -{ - struct dentry *root; - - if (!gt->i915->drm.primary->debugfs_root) - return; - - root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); - if (IS_ERR(root)) - return; - - debugfs_engines_register(gt, root); - debugfs_gt_pm_register(gt, root); - intel_sseu_debugfs_register(gt, root); - - intel_uc_debugfs_register(>->uc, root); -} - -void intel_gt_debugfs_register_files(struct dentry *root, - const struct debugfs_gt_file *files, - unsigned long count, void *data) -{ - while (count--) { - umode_t mode = files->fops->write ? 0644 : 0444; - - if (!files->eval || files->eval(data)) - debugfs_create_file(files->name, - mode, root, data, - files->fops); - - files++; - } -} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h deleted file mode 100644 index 4cf5f5c9da7d..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef DEBUGFS_GT_PM_H -#define DEBUGFS_GT_PM_H - -struct intel_gt; -struct dentry; - -void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root); - -#endif /* DEBUGFS_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1aee5e6b1b23..890191f286e3 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) mutex_init(&ppgtt->flush); mutex_init(&ppgtt->pin_mutex); - ppgtt_init(&ppgtt->base, gt); + ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6e0e52eeb87a..037a9a6e4889 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + clflush_cache_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); + clflush_cache_range(vaddr, PAGE_SIZE); } } @@ -751,7 +753,8 @@ err_pd: * space. * */ -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct i915_ppgtt *ppgtt; int err; @@ -760,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) if (!ppgtt) return ERR_PTR(-ENOMEM); - ppgtt_init(ppgtt, gt); + ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h index b9028c2ad3c7..f541d19264b4 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -12,7 +12,9 @@ struct i915_address_space; struct intel_gt; enum i915_cache_level; -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); + u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 17ca4dc4d0cb..5634d14052bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, if (err) goto err_post_unpin; + intel_engine_pm_might_get(ce->engine); + if (unlikely(intel_context_is_closed(ce))) { err = -ENOENT; goto err_unlock; @@ -395,19 +397,22 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); + INIT_LIST_HEAD(&ce->guc_state.requests); + + ce->guc_id.id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id.link); - spin_lock_init(&ce->guc_active.lock); - INIT_LIST_HEAD(&ce->guc_active.requests); + INIT_LIST_HEAD(&ce->destroyed_link); - ce->guc_id = GUC_INVALID_LRC_ID; - INIT_LIST_HEAD(&ce->guc_id_link); + INIT_LIST_HEAD(&ce->parallel.child_list); /* * Initialize fence to be complete as this is expected to be complete * unless there is a pending schedule disable outstanding. */ - i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_init(&ce->guc_state.blocked, + sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_state.blocked); i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); @@ -415,13 +420,20 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) void intel_context_fini(struct intel_context *ce) { + struct intel_context *child, *next; + if (ce->timeline) intel_timeline_put(ce->timeline); i915_vm_put(ce->vm); + /* Need to put the creation ref for the children */ + if (intel_context_is_parent(ce)) + for_each_child_safe(ce, child, next) + intel_context_put(child); + mutex_destroy(&ce->pin_mutex); i915_active_fini(&ce->active); - i915_sw_fence_fini(&ce->guc_blocked); + i915_sw_fence_fini(&ce->guc_state.blocked); } void i915_context_module_exit(void) @@ -517,24 +529,53 @@ retry: struct i915_request *intel_context_find_active_request(struct intel_context *ce) { + struct intel_context *parent = intel_context_to_parent(ce); struct i915_request *rq, *active = NULL; unsigned long flags; GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); - spin_lock_irqsave(&ce->guc_active.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_active.requests, + /* + * We search the parent list to find an active request on the submitted + * context. The parent list contains the requests for all the contexts + * in the relationship so we have to do a compare of each request's + * context. + */ + spin_lock_irqsave(&parent->guc_state.lock, flags); + list_for_each_entry_reverse(rq, &parent->guc_state.requests, sched.link) { + if (rq->context != ce) + continue; if (i915_request_completed(rq)) break; active = rq; } - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock_irqrestore(&parent->guc_state.lock, flags); return active; } +void intel_context_bind_parent_child(struct intel_context *parent, + struct intel_context *child) +{ + /* + * Callers responsibility to validate that this function is used + * correctly but we use GEM_BUG_ON here ensure that they do. + */ + GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); + GEM_BUG_ON(intel_context_is_pinned(parent)); + GEM_BUG_ON(intel_context_is_child(parent)); + GEM_BUG_ON(intel_context_is_pinned(child)); + GEM_BUG_ON(intel_context_is_child(child)); + GEM_BUG_ON(intel_context_is_parent(child)); + + parent->parallel.child_index = parent->parallel.number_children++; + list_add_tail(&child->parallel.child_link, + &parent->parallel.child_list); + child->parallel.parent = parent; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index c41098950746..246c37d72cd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce); int intel_context_reconfigure_sseu(struct intel_context *ce, const struct intel_sseu sseu); +#define PARENT_SCRATCH_SIZE PAGE_SIZE + +static inline bool intel_context_is_child(struct intel_context *ce) +{ + return !!ce->parallel.parent; +} + +static inline bool intel_context_is_parent(struct intel_context *ce) +{ + return !!ce->parallel.number_children; +} + +static inline bool intel_context_is_pinned(struct intel_context *ce); + +static inline struct intel_context * +intel_context_to_parent(struct intel_context *ce) +{ + if (intel_context_is_child(ce)) { + /* + * The parent holds ref count to the child so it is always safe + * for the parent to access the child, but the child has a + * pointer to the parent without a ref. To ensure this is safe + * the child should only access the parent pointer while the + * parent is pinned. + */ + GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); + + return ce->parallel.parent; + } else { + return ce; + } +} + +static inline bool intel_context_is_parallel(struct intel_context *ce) +{ + return intel_context_is_child(ce) || intel_context_is_parent(ce); +} + +void intel_context_bind_parent_child(struct intel_context *parent, + struct intel_context *child); + +#define for_each_child(parent, ce)\ + list_for_each_entry(ce, &(parent)->parallel.child_list,\ + parallel.child_link) +#define for_each_child_safe(parent, ce, cn)\ + list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\ + parallel.child_link) + /** * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context * @ce - the context @@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce) struct intel_timeline *tl = ce->timeline; int err; - err = mutex_lock_interruptible(&tl->mutex); + if (intel_context_is_parent(ce)) + err = mutex_lock_interruptible_nested(&tl->mutex, 0); + else if (intel_context_is_child(ce)) + err = mutex_lock_interruptible_nested(&tl->mutex, + ce->parallel.child_index + 1); + else + err = mutex_lock_interruptible(&tl->mutex); if (err) return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..9e0177dc5484 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -55,9 +55,13 @@ struct intel_context_ops { void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); - /* virtual engine/context interface */ + /* virtual/parallel engine/context interface */ struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, - unsigned int count); + unsigned int count, + unsigned long flags); + struct intel_context *(*create_parallel)(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width); struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, unsigned int sibling); }; @@ -112,6 +116,8 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_GUC_INIT 10 +#define CONTEXT_PERMA_PIN 11 struct { u64 timeout_us; @@ -152,52 +158,141 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; + /** + * pinned_contexts_link: List link for the engine's pinned contexts. + * This is only used if this is a perma-pinned kernel context and + * the list is assumed to only be manipulated during driver load + * or unload time so no mutex protection currently. + */ + struct list_head pinned_contexts_link; + u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { - /** lock: protects everything in guc_state */ + /** @lock: protects everything in guc_state */ spinlock_t lock; /** - * sched_state: scheduling state of this context using GuC + * @sched_state: scheduling state of this context using GuC * submission */ - u16 sched_state; + u32 sched_state; /* - * fences: maintains of list of requests that have a submit - * fence related to GuC submission + * @fences: maintains a list of requests that are currently + * being fenced until a GuC operation completes */ struct list_head fences; + /** + * @blocked: fence used to signal when the blocking of a + * context's submissions is complete. + */ + struct i915_sw_fence blocked; + /** @number_committed_requests: number of committed requests */ + int number_committed_requests; + /** @requests: list of active requests on this context */ + struct list_head requests; + /** @prio: the context's current guc priority */ + u8 prio; + /** + * @prio_count: a counter of the number requests in flight in + * each priority bucket + */ + u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_state; struct { - /** lock: protects everything in guc_active */ - spinlock_t lock; - /** requests: active requests on this context */ - struct list_head requests; - } guc_active; - - /* GuC scheduling state flags that do not require a lock. */ - atomic_t guc_sched_state_no_lock; - - /* GuC LRC descriptor ID */ - u16 guc_id; + /** + * @id: handle which is used to uniquely identify this context + * with the GuC, protected by guc->submission_state.lock + */ + u16 id; + /** + * @ref: the number of references to the guc_id, when + * transitioning in and out of zero protected by + * guc->submission_state.lock + */ + atomic_t ref; + /** + * @link: in guc->guc_id_list when the guc_id has no refs but is + * still valid, protected by guc->submission_state.lock + */ + struct list_head link; + } guc_id; - /* GuC LRC descriptor reference count */ - atomic_t guc_id_ref; + /** + * @destroyed_link: link in guc->submission_state.destroyed_contexts, in + * list when context is pending to be destroyed (deregistered with the + * GuC), protected by guc->submission_state.lock + */ + struct list_head destroyed_link; - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC + /** @parallel: sub-structure for parallel submission members */ + struct { + union { + /** + * @child_list: parent's list of children + * contexts, no protection as immutable after context + * creation + */ + struct list_head child_list; + /** + * @child_link: child's link into parent's list of + * children + */ + struct list_head child_link; + }; + /** @parent: pointer to parent if child */ + struct intel_context *parent; + /** + * @last_rq: last request submitted on a parallel context, used + * to insert submit fences between requests in the parallel + * context + */ + struct i915_request *last_rq; + /** + * @fence_context: fence context composite fence when doing + * parallel submission + */ + u64 fence_context; + /** + * @seqno: seqno for composite fence when doing parallel + * submission + */ + u32 seqno; + /** @number_children: number of children if parent */ + u8 number_children; + /** @child_index: index into child_list if child */ + u8 child_index; + /** @guc: GuC specific members for parallel submission */ + struct { + /** @wqi_head: head pointer in work queue */ + u16 wqi_head; + /** @wqi_tail: tail pointer in work queue */ + u16 wqi_tail; + /** + * @parent_page: page in context state (ce->state) used + * by parent for work queue, process descriptor + */ + u8 parent_page; + } guc; + } parallel; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @drop_schedule_enable: Force drop of schedule enable G2H for selftest */ - struct list_head guc_id_link; + bool drop_schedule_enable; - /* GuC context blocked fence */ - struct i915_sw_fence guc_blocked; + /** + * @drop_schedule_disable: Force drop of schedule disable G2H for + * selftest + */ + bool drop_schedule_disable; - /* - * GuC priority management + /** + * @drop_deregister: Force drop of deregister G2H for selftest */ - u8 guc_prio; - u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 87579affb952..08559ace0ada 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -2,6 +2,7 @@ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ +#include <asm/cacheflush.h> #include <drm/drm_util.h> #include <linux/hashtable.h> @@ -175,6 +176,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) +#define I915_GEM_HWS_PXP 0x60 +#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 @@ -273,15 +276,25 @@ static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine) static inline bool intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return false; return intel_engine_has_preemption(engine); } +#define FORCE_VIRTUAL BIT(0) struct intel_context * intel_engine_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); + unsigned int count, unsigned long flags); + +static inline struct intel_context * +intel_engine_create_parallel(struct intel_engine_cs **engines, + unsigned int num_engines, + unsigned int width) +{ + GEM_BUG_ON(!engines[0]->cops->create_parallel); + return engines[0]->cops->create_parallel(engines, num_engines, width); +} static inline bool intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) @@ -300,7 +313,7 @@ intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) static inline bool intel_engine_has_heartbeat(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return false; if (intel_engine_is_virtual(engine)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84..ff6753ccb129 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) GEM_DEBUG_WARN_ON(iir); } -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, + u8 logical_instance) { const struct engine_info *info = &intel_engines[id]; struct drm_i915_private *i915 = gt->i915; @@ -320,6 +321,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + INIT_LIST_HEAD(&engine->pinned_contexts_list); engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); @@ -334,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->class = info->class; engine->instance = info->instance; + engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); engine->props.heartbeat_interval_ms = @@ -398,7 +401,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { - if (GRAPHICS_VER(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9 && + engine->gt->info.sfc_mask & BIT(engine->instance)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } @@ -474,18 +478,25 @@ void intel_engines_free(struct intel_gt *gt) } static -bool gen11_vdbox_has_sfc(struct drm_i915_private *i915, +bool gen11_vdbox_has_sfc(struct intel_gt *gt, unsigned int physical_vdbox, unsigned int logical_vdbox, u16 vdbox_mask) { + struct drm_i915_private *i915 = gt->i915; + /* * In Gen11, only even numbered logical VDBOXes are hooked * up to an SFC (Scaler & Format Converter) unit. * In Gen12, Even numbered physical instance always are connected * to an SFC. Odd numbered physical instances have SFC only if * previous even instance is fused off. + * + * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field + * in the fuse register that tells us whether a specific SFC is present. */ - if (GRAPHICS_VER(i915) == 12) + if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0) + return false; + else if (GRAPHICS_VER(i915) == 12) return (physical_vdbox % 2 == 0) || !(BIT(physical_vdbox - 1) & vdbox_mask); else if (GRAPHICS_VER(i915) == 11) @@ -512,7 +523,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; unsigned int logical_vdbox = 0; unsigned int i; - u32 media_fuse; + u32 media_fuse, fuse1; u16 vdbox_mask; u16 vebox_mask; @@ -534,6 +545,13 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> GEN11_GT_VEBOX_DISABLE_SHIFT; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1); + gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); + } else { + gt->info.sfc_mask = ~0; + } + for (i = 0; i < I915_MAX_VCS; i++) { if (!HAS_ENGINE(gt, _VCS(i))) { vdbox_mask &= ~BIT(i); @@ -546,7 +564,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) continue; } - if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask)) + if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask)) gt->info.vdbox_sfc_access |= BIT(i); logical_vdbox++; } @@ -572,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) return info->engine_mask; } +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, + u8 class, const u8 *map, u8 num_instances) +{ + int i, j; + u8 current_logical_id = 0; + + for (j = 0; j < num_instances; ++j) { + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { + if (!HAS_ENGINE(gt, i) || + intel_engines[i].class != class) + continue; + + if (intel_engines[i].instance == map[j]) { + logical_ids[intel_engines[i].instance] = + current_logical_id++; + break; + } + } + } +} + +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) +{ + int i; + u8 map[MAX_ENGINE_INSTANCE + 1]; + + for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) + map[i] = i; + populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map)); +} + /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @gt: pointer to struct intel_gt @@ -583,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; const unsigned int engine_mask = init_engine_mask(gt); unsigned int mask = 0; - unsigned int i; + unsigned int i, class; + u8 logical_ids[MAX_ENGINE_INSTANCE + 1]; int err; drm_WARN_ON(&i915->drm, engine_mask == 0); @@ -593,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt) if (i915_inject_probe_failure(i915)) return -ENODEV; - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(gt, i)) - continue; + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + setup_logical_ids(gt, logical_ids, class); - err = intel_engine_setup(gt, i); - if (err) - goto cleanup; + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { + u8 instance = intel_engines[i].instance; - mask |= BIT(i); + if (intel_engines[i].class != class || + !HAS_ENGINE(gt, i)) + continue; + + err = intel_engine_setup(gt, i, + logical_ids[instance]); + if (err) + goto cleanup; + + mask |= BIT(i); + } } /* @@ -875,6 +933,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, return ERR_PTR(err); } + list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list); + /* * Give our perma-pinned kernel timelines a separate lockdep class, * so that we can use them from within the normal user timelines @@ -897,6 +957,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce) list_del(&ce->timeline->engine_link); mutex_unlock(&hwsp->vm->mutex); + list_del(&ce->pinned_contexts_link); intel_context_unpin(ce); intel_context_put(ce); } @@ -1163,16 +1224,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, u32 mmio_base = engine->mmio_base; int slice; int subslice; + int iter; memset(instdone, 0, sizeof(*instdone)); - switch (GRAPHICS_VER(i915)) { - default: + if (GRAPHICS_VER(i915) >= 8) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1182,21 +1243,39 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common_extra[1] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } + } else { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } } - break; - case 7: + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + instdone->geom_svg[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + XEHPG_INSTDONE_GEOM_SVG); + } + } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1204,22 +1283,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); instdone->row[0][0] = intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: + } else if (GRAPHICS_VER(i915) >= 4) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: + } else { instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; } } @@ -1881,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) struct intel_context * intel_engine_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) + unsigned int count, unsigned long flags) { if (count == 0) return ERR_PTR(-EINVAL); - if (count == 1) + if (count == 1 && !(flags & FORCE_VIRTUAL)) return intel_context_create(siblings[0]); GEM_BUG_ON(!siblings[0]->cops->create_virtual); - return siblings[0]->cops->create_virtual(siblings, count); + return siblings[0]->cops->create_virtual(siblings, count, flags); } struct i915_request * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 74775ae961b2..a3698f611f45 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -207,7 +207,7 @@ out: void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return; next_heartbeat(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 1f07ac4e0672..a1334b48dde7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) unsigned long flags; bool result = true; + /* + * This is execlist specific behaviour intended to ensure the GPU is + * idle by switching to a known 'safe' context. With GuC submission, the + * same idle guarantee is achieved by other means (disabling + * scheduling). Further, switching to a 'safe' context has no effect + * with GuC submission as the scheduler can just switch back again. + * + * FIXME: Move this backend scheduler specific behaviour into the + * scheduler backend. + */ + if (intel_engine_uses_guc(engine)) + return true; + /* GPU is pointing to the void, as good as in the kernel context. */ if (intel_gt_is_wedged(engine->gt)) return true; @@ -298,6 +311,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_engine_init_heartbeat(engine); } +/** + * intel_engine_reset_pinned_contexts - Reset the pinned contexts of + * an engine. + * @engine: The engine whose pinned contexts we want to reset. + * + * Typically the pinned context LMEM images lose or get their content + * corrupted on suspend. This function resets their images. + */ +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) { + /* kernel context gets reset at __engine_unpark() */ + if (ce == engine->kernel_context) + continue; + + dbg_poison_ce(ce); + ce->ops->reset(ce); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_engine_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 70ea46d6cfb0..d68675925b79 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -6,9 +6,11 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "i915_drv.h" #include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" +#include "intel_gt_pm.h" static inline bool intel_engine_pm_is_awake(const struct intel_engine_cs *engine) @@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine) return intel_wakeref_is_active(&engine->wakeref); } +static inline void __intel_engine_pm_get(struct intel_engine_cs *engine) +{ + __intel_wakeref_get(&engine->wakeref); +} + static inline void intel_engine_pm_get(struct intel_engine_cs *engine) { intel_wakeref_get(&engine->wakeref); @@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) return intel_wakeref_get_if_active(&engine->wakeref); } +static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine) +{ + if (!intel_engine_is_virtual(engine)) { + intel_wakeref_might_get(&engine->wakeref); + } else { + struct intel_gt *gt = engine->gt; + struct intel_engine_cs *tengine; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(tengine, gt, mask, tmp) + intel_wakeref_might_get(&tengine->wakeref); + } + intel_gt_pm_might_get(engine->gt); +} + static inline void intel_engine_pm_put(struct intel_engine_cs *engine) { intel_wakeref_put(&engine->wakeref); @@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) intel_wakeref_unlock_wait(&engine->wakeref); } +static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine) +{ + if (!intel_engine_is_virtual(engine)) { + intel_wakeref_might_put(&engine->wakeref); + } else { + struct intel_gt *gt = engine->gt; + struct intel_engine_cs *tengine; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(tengine, gt, mask, tmp) + intel_wakeref_might_put(&tengine->wakeref); + } + intel_gt_pm_might_put(engine->gt); +} + static inline struct i915_request * intel_engine_create_kernel_request(struct intel_engine_cs *engine) { @@ -69,4 +106,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine) void intel_engine_init__pm(struct intel_engine_cs *engine); +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine); + #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb..e0f773585c29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -67,8 +67,11 @@ struct intel_instdone { /* The following exist only in the RCS engine */ u32 slice_common; u32 slice_common_extra[2]; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + + /* Added in XeHPG */ + u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* @@ -266,6 +269,13 @@ struct intel_engine_cs { unsigned int guc_id; intel_engine_mask_t mask; + /** + * @logical_mask: logical mask of engine, reported to user space via + * query IOCTL and used to communicate with the GuC in logical space. + * The logical instance of a physical engine can change based on product + * and fusing. + */ + intel_engine_mask_t logical_mask; u8 class; u8 instance; @@ -304,6 +314,13 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ + /** + * pinned_contexts_list: List of pinned contexts. This list is only + * assumed to be manipulated during driver load- or unload time and + * does therefore not have any additional protection. + */ + struct list_head pinned_contexts_list; + intel_engine_mask_t saturated; /* submitting semaphores too late? */ struct { @@ -546,7 +563,7 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) static inline bool intel_engine_has_timeslices(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return false; return engine->flags & I915_ENGINE_HAS_TIMESLICES; @@ -578,4 +595,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ (instdone_has_subslice(dev_priv_, sseu_, slice_, \ subslice_))) + +#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ + for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ + (iter_) < GEN_MAX_SUBSLICES; \ + (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ + (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ + for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) + #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..bedb80057046 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) } static struct intel_context * -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags); static struct i915_request * __active_request(const struct intel_timeline * const tl, @@ -2140,10 +2141,6 @@ static void __execlists_unhold(struct i915_request *rq) if (p->flags & I915_DEPENDENCY_WEAK) continue; - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - if (w->engine != rq->engine) continue; @@ -2565,7 +2562,7 @@ __execlists_context_pre_pin(struct intel_context *ce, if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { lrc_init_state(ce, engine, *vaddr); - __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); + __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); } return 0; @@ -2791,6 +2788,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void enable_error_interrupt(struct intel_engine_cs *engine) @@ -3341,7 +3340,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_SEMAPHORES; if (can_preempt(engine)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; - if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (CONFIG_DRM_I915_TIMESLICE_DURATION) engine->flags |= I915_ENGINE_HAS_TIMESLICES; } } @@ -3786,7 +3785,8 @@ unlock: } static struct intel_context * -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags) { struct virtual_engine *ve; unsigned int n; @@ -3879,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) ve->siblings[ve->num_siblings++] = sibling; ve->base.mask |= sibling->mask; + ve->base.logical_mask |= sibling->logical_mask; /* * All physical engines must be compatible for their emission diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..57c97554393b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) struct i915_ppgtt *ppgtt; int err; - ppgtt = i915_ppgtt_create(ggtt->vm.gt); + ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); - rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ flush_workqueue(ggtt->vm.i915->wq); mutex_lock(&ggtt->vm.mutex); @@ -814,6 +813,21 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) return 0; } +static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) +{ + /* + * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset + * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset + */ + GEM_BUG_ON(GRAPHICS_VER(i915) < 6); + return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; +} + +static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) +{ + return gen6_gttmmadr_size(i915) / 2; +} + static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { struct drm_i915_private *i915 = ggtt->vm.i915; @@ -822,8 +836,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) u32 pte_flags; int ret; - /* For Modern GENs the PTEs and register space are split in the BAR */ - phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; + GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); /* * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range @@ -910,6 +924,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) size = gen8_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.cleanup = gen6_gmch_remove; @@ -1373,13 +1388,31 @@ err_st_alloc: } static struct scatterlist * -remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, +remap_pages(struct drm_i915_gem_object *obj, + unsigned int offset, unsigned int alignment_pad, unsigned int width, unsigned int height, unsigned int src_stride, unsigned int dst_stride, struct sg_table *st, struct scatterlist *sg) { unsigned int row; + if (!width || !height) + return sg; + + if (alignment_pad) { + st->nents++; + + /* + * The DE ignores the PTEs for the padding tiles, the sg entry + * here is just a convenience to indicate how many padding PTEs + * to insert at this spot. + */ + sg_set_page(sg, NULL, alignment_pad * 4096, 0); + sg_dma_address(sg) = 0; + sg_dma_len(sg) = alignment_pad * 4096; + sg = sg_next(sg); + } + for (row = 0; row < height; row++) { unsigned int left = width * I915_GTT_PAGE_SIZE; @@ -1439,6 +1472,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int gtt_offset = 0; int ret = -ENOMEM; int i; @@ -1455,10 +1489,19 @@ intel_remap_pages(struct intel_remapped_info *rem_info, sg = st->sgl; for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { - sg = remap_pages(obj, rem_info->plane[i].offset, + unsigned int alignment_pad = 0; + + if (rem_info->plane_alignment) + alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset; + + sg = remap_pages(obj, + rem_info->plane[i].offset, alignment_pad, rem_info->plane[i].width, rem_info->plane[i].height, rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride, st, sg); + + gtt_offset += alignment_pad + + rem_info->plane[i].dst_stride * rem_info->plane[i].height; } i915_sg_trim(st); diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 1c3af0fc0456..f8253012d166 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -28,10 +28,13 @@ #define INSTR_26_TO_24_MASK 0x7000000 #define INSTR_26_TO_24_SHIFT 24 +#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) + /* * Memory interface instructions used by the kernel */ -#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +#define MI_INSTR(opcode, flags) \ + (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags)) /* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ #define MI_GLOBAL_GTT (1<<22) @@ -57,6 +60,7 @@ #define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) #define MI_SUSPEND_FLUSH_EN (1<<0) #define MI_SET_APPID MI_INSTR(0x0e, 0) +#define MI_SET_APPID_SESSION_ID(x) ((x) << 0) #define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) #define MI_OVERLAY_CONTINUE (0x0<<21) #define MI_OVERLAY_ON (0x1<<21) @@ -146,6 +150,7 @@ #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ +#define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22) #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) #define MI_FLUSH_DW_OP_STOREDW (1<<14) @@ -273,6 +278,19 @@ #define MI_MATH_REG_CF 0x33 /* + * Media instructions used by the kernel + */ +#define MEDIA_INSTR(pipe, op, sub_op, flags) \ + (__INSTR(INSTR_RC_CLIENT) | (pipe) << INSTR_SUBCLIENT_SHIFT | \ + (op) << INSTR_26_TO_24_SHIFT | (sub_op) << 16 | (flags)) + +#define MFX_WAIT MEDIA_INSTR(1, 0, 0, 0) +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8) +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9) + +#define CRYPTO_KEY_EXCHANGE MEDIA_INSTR(2, 6, 9, 0) + +/* * Commands used only by the command parser */ #define MI_SET_PREDICATE MI_INSTR(0x01, 0) @@ -328,8 +346,6 @@ #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) -#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) - #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c986642..1cb1948ac959 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,7 +3,7 @@ * Copyright © 2019 Intel Corporation */ -#include "debugfs_gt.h" +#include "intel_gt_debugfs.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" @@ -15,12 +15,13 @@ #include "intel_gt_requests.h" #include "intel_migrate.h" #include "intel_mocs.h" +#include "intel_pm.h" #include "intel_rc6.h" #include "intel_renderstate.h" #include "intel_rps.h" #include "intel_uncore.h" -#include "intel_pm.h" #include "shmem_utils.h" +#include "pxp/intel_pxp.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -434,7 +435,7 @@ void intel_gt_driver_register(struct intel_gt *gt) { intel_rps_driver_register(>->rps); - debugfs_gt_register(gt); + intel_gt_debugfs_register(gt); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -481,7 +482,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt) static struct i915_address_space *kernel_vm(struct intel_gt *gt) { if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) - return &i915_ppgtt_create(gt)->vm; + return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm; else return i915_vm_get(>->ggtt->vm); } @@ -660,6 +661,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) return err; + intel_gt_init_workarounds(gt); + /* * This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs @@ -682,6 +685,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + intel_set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; @@ -710,6 +715,8 @@ int intel_gt_init(struct intel_gt *gt) intel_migrate_init(>->migrate, gt); + intel_pxp_init(>->pxp); + goto out_fw; err_gt: __intel_gt_disable(gt); @@ -737,6 +744,8 @@ void intel_gt_driver_remove(struct intel_gt *gt) intel_uc_driver_remove(>->uc); intel_engines_release(gt); + + intel_gt_flush_buffer_pool(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) @@ -745,12 +754,14 @@ void intel_gt_driver_unregister(struct intel_gt *gt) intel_rps_driver_unregister(>->rps); + intel_pxp_fini(>->pxp); + /* * Upon unregistering the device to prevent any new users, cancel * all in-flight requests so that we can quickly unbind the active * resources. */ - intel_gt_set_wedged(gt); + intel_gt_set_wedged_on_fini(gt); /* Scrub all HW state upon release */ with_intel_runtime_pm(gt->uncore->rpm, wakeref) @@ -765,6 +776,7 @@ void intel_gt_driver_release(struct intel_gt *gt) if (vm) /* FIXME being called twice on error paths :( */ i915_vm_put(vm); + intel_wa_list_free(>->wa_list); intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); intel_gt_fini_buffer_pool(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index aa0a59c5b614..acc49c56a9f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -245,8 +245,6 @@ void intel_gt_fini_buffer_pool(struct intel_gt *gt) struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; - intel_gt_flush_buffer_pool(gt); - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) GEM_BUG_ON(!list_empty(&pool->cache_list[n])); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c new file mode 100644 index 000000000000..f103664b71d4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "i915_drv.h" +#include "intel_gt_debugfs.h" +#include "intel_gt_engines_debugfs.h" +#include "intel_gt_pm_debugfs.h" +#include "intel_sseu_debugfs.h" +#include "pxp/intel_pxp_debugfs.h" +#include "uc/intel_uc_debugfs.h" + +int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val) +{ + int ret = intel_gt_terminally_wedged(gt); + + switch (ret) { + case -EIO: + *val = 1; + return 0; + case 0: + *val = 0; + return 0; + default: + return ret; + } +} + +int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val) +{ + /* Flush any previous reset before applying for a new one */ + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); + + intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE, + "Manually reset engine mask to %llx", val); + return 0; +} + +/* + * keep the interface clean where the first parameter + * is a 'struct intel_gt *' instead of 'void *' + */ +static int __intel_gt_debugfs_reset_show(void *data, u64 *val) +{ + return intel_gt_debugfs_reset_show(data, val); +} + +static int __intel_gt_debugfs_reset_store(void *data, u64 val) +{ + return intel_gt_debugfs_reset_store(data, val); +} + +DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show, + __intel_gt_debugfs_reset_store, "%llu\n"); + +static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "reset", &reset_fops, NULL }, + }; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); +} + +void intel_gt_debugfs_register(struct intel_gt *gt) +{ + struct dentry *root; + + if (!gt->i915->drm.primary->debugfs_root) + return; + + root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + if (IS_ERR(root)) + return; + + gt_debugfs_register(gt, root); + + intel_gt_engines_debugfs_register(gt, root); + intel_gt_pm_debugfs_register(gt, root); + intel_sseu_debugfs_register(gt, root); + + intel_uc_debugfs_register(>->uc, root); + intel_pxp_debugfs_register(>->pxp, root); +} + +void intel_gt_debugfs_register_files(struct dentry *root, + const struct intel_gt_debugfs_file *files, + unsigned long count, void *data) +{ + while (count--) { + umode_t mode = files->fops->write ? 0644 : 0444; + + if (!files->eval || files->eval(data)) + debugfs_create_file(files->name, + mode, root, data, + files->fops); + + files++; + } +} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h index f77540f727e9..e307ceb99031 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -3,14 +3,14 @@ * Copyright © 2019 Intel Corporation */ -#ifndef DEBUGFS_GT_H -#define DEBUGFS_GT_H +#ifndef INTEL_GT_DEBUGFS_H +#define INTEL_GT_DEBUGFS_H #include <linux/file.h> struct intel_gt; -#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \ +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ @@ -23,16 +23,20 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } -void debugfs_gt_register(struct intel_gt *gt); +void intel_gt_debugfs_register(struct intel_gt *gt); -struct debugfs_gt_file { +struct intel_gt_debugfs_file { const char *name; const struct file_operations *fops; bool (*eval)(void *data); }; void intel_gt_debugfs_register_files(struct dentry *root, - const struct debugfs_gt_file *files, + const struct intel_gt_debugfs_file *files, unsigned long count, void *data); -#endif /* DEBUGFS_GT_H */ +/* functions that need to be accessed by the upper level non-gt interfaces */ +int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val); +int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val); + +#endif /* INTEL_GT_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c index 5e3725e62241..8f9b874fdc9c 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c @@ -6,10 +6,10 @@ #include <drm/drm_print.h> -#include "debugfs_engines.h" -#include "debugfs_gt.h" #include "i915_drv.h" /* for_each_engine! */ #include "intel_engine.h" +#include "intel_gt_debugfs.h" +#include "intel_gt_engines_debugfs.h" static int engines_show(struct seq_file *m, void *data) { @@ -24,11 +24,11 @@ static int engines_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(engines); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines); -void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) +void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "engines", &engines_fops }, }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h new file mode 100644 index 000000000000..dda113452da9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_ENGINES_DEBUGFS_H +#define INTEL_GT_ENGINES_DEBUGFS_H + +struct intel_gt; +struct dentry; + +void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root); + +#endif /* INTEL_GT_ENGINES_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index b2de83be4d97..699a74582d32 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -13,6 +13,7 @@ #include "intel_lrc_reg.h" #include "intel_uncore.h" #include "intel_rps.h" +#include "pxp/intel_pxp_irq.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -64,6 +65,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); + if (instance == OTHER_KCR_INSTANCE) + return intel_pxp_irq_handler(>->pxp, iir); + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); } @@ -196,6 +200,9 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); + + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_MASK, ~0); } void gen11_gt_irq_postinstall(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index dea8e2479897..524eaf678790 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -18,6 +18,9 @@ #include "intel_rc6.h" #include "intel_rps.h" #include "intel_wakeref.h" +#include "pxp/intel_pxp_pm.h" + +#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) static void user_forcewake(struct intel_gt *gt, bool suspend) { @@ -262,6 +265,8 @@ int intel_gt_resume(struct intel_gt *gt) intel_uc_resume(>->uc); + intel_pxp_resume(>->pxp); + user_forcewake(gt, false); out_fw: @@ -279,7 +284,7 @@ static void wait_for_suspend(struct intel_gt *gt) if (!intel_gt_pm_is_awake(gt)) return; - if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GT_SUSPEND_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. @@ -296,7 +301,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) user_forcewake(gt, true); wait_for_suspend(gt); - intel_uc_suspend(>->uc); + intel_pxp_suspend(>->pxp, false); } static suspend_state_t pm_suspend_target(void) @@ -320,6 +325,8 @@ void intel_gt_suspend_late(struct intel_gt *gt) GEM_BUG_ON(gt->awake); + intel_uc_suspend(>->uc); + /* * On disabling the device, we want to turn off HW access to memory * that we no longer own. @@ -346,6 +353,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { + intel_pxp_suspend(>->pxp, true); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -353,11 +361,19 @@ void intel_gt_runtime_suspend(struct intel_gt *gt) int intel_gt_runtime_resume(struct intel_gt *gt) { + int ret; + GT_TRACE(gt, "\n"); intel_gt_init_swizzling(gt); intel_ggtt_restore_fences(gt->ggtt); - return intel_uc_runtime_resume(>->uc); + ret = intel_uc_runtime_resume(>->uc); + if (ret) + return ret; + + intel_pxp_resume(>->pxp); + + return 0; } static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index d0588d8aaa44..bc898df7a48c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) return intel_wakeref_get_if_active(>->wakeref); } +static inline void intel_gt_pm_might_get(struct intel_gt *gt) +{ + intel_wakeref_might_get(>->wakeref); +} + static inline void intel_gt_pm_put(struct intel_gt *gt) { intel_wakeref_put(>->wakeref); @@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt) intel_wakeref_put_async(>->wakeref); } +static inline void intel_gt_pm_might_put(struct intel_gt *gt) +{ + intel_wakeref_might_put(>->wakeref); +} + +#define with_intel_gt_pm(gt, tmp) \ + for (tmp = 1, intel_gt_pm_get(gt); tmp; \ + intel_gt_pm_put(gt), tmp = 0) + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index d6f5836396f8..404dfa7673c6 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -6,18 +6,59 @@ #include <linux/seq_file.h> -#include "debugfs_gt.h" -#include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_debugfs.h" #include "intel_gt_pm.h" +#include "intel_gt_pm_debugfs.h" #include "intel_llc.h" +#include "intel_pcode.h" #include "intel_rc6.h" #include "intel_rps.h" #include "intel_runtime_pm.h" -#include "intel_sideband.h" #include "intel_uncore.h" +#include "vlv_sideband.h" + +int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) +{ + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (GRAPHICS_VER(gt->i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); + + return 0; +} + +int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) >= 6) + intel_uncore_forcewake_user_put(gt->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); + + return 0; +} + +static int forcewake_user_open(struct inode *inode, struct file *file) +{ + struct intel_gt *gt = inode->i_private; + + return intel_gt_pm_debugfs_forcewake_user_open(gt); +} + +static int forcewake_user_release(struct inode *inode, struct file *file) +{ + struct intel_gt *gt = inode->i_private; + + return intel_gt_pm_debugfs_forcewake_user_release(gt); +} + +static const struct file_operations forcewake_user_fops = { + .owner = THIS_MODULE, + .open = forcewake_user_open, + .release = forcewake_user_release, +}; static int fw_domains_show(struct seq_file *m, void *data) { @@ -36,7 +77,7 @@ static int fw_domains_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); static void print_rc6_res(struct seq_file *m, const char *title, @@ -238,11 +279,10 @@ static int drpc_show(struct seq_file *m, void *unused) return err; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc); -static int frequency_show(struct seq_file *m, void *unused) +void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) { - struct intel_gt *gt = m->private; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_rps *rps = >->rps; @@ -254,21 +294,21 @@ static int frequency_show(struct seq_file *m, void *unused) u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); - seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); - seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); - seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + drm_printf(p, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + drm_printf(p, "Requested VID: %d\n", rgvswctl & 0x3f); + drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); - seq_printf(m, "Current P-state: %d\n", + drm_printf(p, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", + drm_printf(p, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", + drm_printf(p, "HW control enabled: %s\n", yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", + drm_printf(p, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); @@ -276,25 +316,25 @@ static int frequency_show(struct seq_file *m, void *unused) freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); - seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); - seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq); - seq_printf(m, "actual GPU freq: %d MHz\n", + drm_printf(p, "actual GPU freq: %d MHz\n", intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); - seq_printf(m, "current GPU freq: %d MHz\n", + drm_printf(p, "current GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "max GPU freq: %d MHz\n", + drm_printf(p, "max GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, "min GPU freq: %d MHz\n", + drm_printf(p, "min GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "idle GPU freq: %d MHz\n", + drm_printf(p, "idle GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else if (GRAPHICS_VER(i915) >= 6) { u32 rp_state_limits; @@ -309,13 +349,11 @@ static int frequency_show(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(i915)) { - rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -376,113 +414,121 @@ static int frequency_show(struct seq_file *m, void *unused) } pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); - seq_printf(m, "Video Turbo Mode: %s\n", + drm_printf(p, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", + drm_printf(p, "HW control enabled: %s\n", yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", + drm_printf(p, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); - seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_mask); if (GRAPHICS_VER(i915) <= 10) - seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", pm_isr, pm_iir); - seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - seq_printf(m, "Render p-state ratio: %d\n", + drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + drm_printf(p, "Render p-state ratio: %d\n", (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - seq_printf(m, "Render p-state VID: %d\n", + drm_printf(p, "Render p-state VID: %d\n", gt_perf_status & 0xff); - seq_printf(m, "Render p-state limit: %d\n", + drm_printf(p, "Render p-state limit: %d\n", rp_state_limits & 0xff); - seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); - seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); - seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); - seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); - seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", + drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); + drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); + drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); + drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); + drm_printf(p, "CAGF: %dMHz\n", cagf); + drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", rpcurupei, intel_gt_pm_interval_to_ns(gt, rpcurupei)); - seq_printf(m, "RP CUR UP: %d (%lldns)\n", + drm_printf(p, "RP CUR UP: %d (%lldns)\n", rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%lldns)\n", + drm_printf(p, "RP PREV UP: %d (%lldns)\n", rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", + drm_printf(p, "Up threshold: %d%%\n", rps->power.up_threshold); - seq_printf(m, "RP UP EI: %d (%lldns)\n", + drm_printf(p, "RP UP EI: %d (%lldns)\n", rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n", + drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", + drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", rpcurdownei, intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", + drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", rpcurdown, intel_gt_pm_interval_to_ns(gt, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", + drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", rpprevdown, intel_gt_pm_interval_to_ns(gt, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", + drm_printf(p, "Down threshold: %d%%\n", rps->power.down_threshold); - seq_printf(m, "RP DOWN EI: %d (%lldns)\n", + drm_printf(p, "RP DOWN EI: %d (%lldns)\n", rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n", + drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); - seq_printf(m, "Max overclocked frequency: %dMHz\n", + drm_printf(p, "Max overclocked frequency: %dMHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, "Current freq: %d MHz\n", + drm_printf(p, "Current freq: %d MHz\n", intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "Actual freq: %d MHz\n", cagf); - seq_printf(m, "Idle freq: %d MHz\n", + drm_printf(p, "Actual freq: %d MHz\n", cagf); + drm_printf(p, "Idle freq: %d MHz\n", intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "Min freq: %d MHz\n", + drm_printf(p, "Min freq: %d MHz\n", intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "Boost freq: %d MHz\n", + drm_printf(p, "Boost freq: %d MHz\n", intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Max freq: %d MHz\n", + drm_printf(p, "Max freq: %d MHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, + drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else { - seq_puts(m, "no P-state info available\n"); + drm_puts(p, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); - seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); - seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); intel_runtime_pm_put(uncore->rpm, wakeref); +} + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + intel_gt_pm_frequency_dump(gt, &p); return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency); static int llc_show(struct seq_file *m, void *data) { @@ -535,7 +581,7 @@ static bool llc_eval(void *data) return HAS_LLC(gt->i915); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(llc); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc); static const char *rps_power_to_str(unsigned int power) { @@ -614,14 +660,15 @@ static bool rps_eval(void *data) return HAS_RPS(gt->i915); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); -void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) +void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "drpc", &drpc_fops, NULL }, { "frequency", &frequency_fops, NULL }, { "forcewake", &fw_domains_fops, NULL }, + { "forcewake_user", &forcewake_user_fops, NULL}, { "llc", &llc_fops, llc_eval }, { "rps_boost", &rps_boost_fops, rps_eval }, }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h new file mode 100644 index 000000000000..a8457887ec65 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_DEBUGFS_H +#define INTEL_GT_PM_DEBUGFS_H + +struct intel_gt; +struct dentry; +struct drm_printer; + +void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); +void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m); + +/* functions that need to be accessed by the upper level non-gt interfaces */ +int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt); +int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt); + +#endif /* INTEL_GT_PM_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1..14216cc471b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -26,6 +26,7 @@ #include "intel_rps_types.h" #include "intel_migrate_types.h" #include "intel_wakeref.h" +#include "pxp/intel_pxp_types.h" struct drm_i915_private; struct i915_ggtt; @@ -72,6 +73,8 @@ struct intel_gt { struct intel_uc uc; + struct i915_wa_list wa_list; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; @@ -184,6 +187,9 @@ struct intel_gt { u8 num_engines; + /* General presence of SFC units */ + u8 sfc_mask; + /* Media engine access to SFC per instance */ u8 vdbox_sfc_access; @@ -192,6 +198,12 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct { + u8 uc_index; + } mocs; + + struct intel_pxp pxp; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..67d14afa6623 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -28,7 +28,8 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) * used the passed in size for the page size, which should ensure it * also has the same alignment. */ - obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); + obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, + vm->lmem_pt_obj_flags); /* * Ensure all paging structures for this vm share the same dma-resv * object underneath, with the idea that one object_lock() will lock @@ -155,7 +156,7 @@ void i915_vm_resv_release(struct kref *kref) static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); + container_of(work, struct i915_address_space, release_work); vm->cleanup(vm); i915_address_space_fini(vm); @@ -171,7 +172,7 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - queue_rcu_work(vm->i915->wq, &vm->rcu); + queue_work(vm->i915->wq, &vm->release_work); } void i915_address_space_init(struct i915_address_space *vm, int subclass) @@ -185,7 +186,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); - INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); /* diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc7153018ebd..bc6750263359 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -213,7 +213,7 @@ struct i915_vma_ops { struct i915_address_space { struct kref ref; - struct rcu_work rcu; + struct work_struct release_work; struct drm_mm mm; struct intel_gt *gt; @@ -260,6 +260,9 @@ struct i915_address_space { u8 pd_shift; u8 scratch_order; + /* Flags used when creating page-table objects for this vm */ + unsigned long lmem_pt_obj_flags; + struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); @@ -519,7 +522,8 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]); } -void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); @@ -537,7 +541,8 @@ static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) int i915_ppgtt_init_hw(struct intel_gt *gt); -struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index eb1a15deed22..08d7d5ae263a 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -3,12 +3,13 @@ * Copyright © 2019 Intel Corporation */ +#include <asm/tsc.h> #include <linux/cpufreq.h> #include "i915_drv.h" #include "intel_gt.h" #include "intel_llc.h" -#include "intel_sideband.h" +#include "intel_pcode.h" struct ia_constants { unsigned int min_gpu_freq; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..56156cf18c41 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = { END }; +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = { END }; +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; @@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_xcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_xcs_offsets; @@ -861,7 +942,13 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } - obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); + if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { + ce->parallel.guc.parent_page = context_size / PAGE_SIZE; + context_size += PARENT_SCRATCH_SIZE; + } + + obj = i915_gem_object_create_lmem(engine->i915, context_size, + I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 1dac21aa7e5c..afb1cce9a352 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -78,7 +78,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * TODO: Add support for huge LMEM PTEs */ - vm = i915_ppgtt_create(gt); + vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY); if (IS_ERR(vm)) return ERR_CAST(vm); diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d..15f9ada28a7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,8 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -40,6 +42,8 @@ struct drm_i915_mocs_table { #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) #define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ @@ -88,18 +92,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -282,17 +293,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -314,6 +317,83 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry gen12_mocs_table[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)), + + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)), + /* UC - Non-Coherent; GO:L3 */ + MOCS_ENTRY(4, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)), + + /* HW Reserved - SW program but never use. */ + MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)), + MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)), + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -340,14 +420,45 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; - if (IS_DG1(i915)) { + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + + table->unused_entries_index = I915_MOCS_PTE; + if (IS_DG2(i915)) { + if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); + table->table = dg2_mocs_table_g10_ax; + } else { + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; + } + table->uc_index = 1; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 3; + } else if (IS_XEHPSDV(i915)) { + table->size = ARRAY_SIZE(xehpsdv_mocs_table); + table->table = xehpsdv_mocs_table; + table->uc_index = 2; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 5; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (GRAPHICS_VER(i915) >= 12) { + table->uc_index = 1; + table->unused_entries_index = 5; + } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + /* For TGL/RKL, Can't be changed now for ABI reasons */ table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + } else if (GRAPHICS_VER(i915) >= 12) { + table->size = ARRAY_SIZE(gen12_mocs_table); + table->table = gen12_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -393,16 +504,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + return table->table[table->unused_entries_index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -417,6 +528,8 @@ static void __init_mocs_table(struct intel_uncore *uncore, unsigned int i; u32 mocs; + drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index, + "Unused entries index should have been defined\n"); for_each_mocs(mocs, table, i) intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); } @@ -443,16 +556,16 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; + return table->table[table->unused_entries_index].l3cc_value; } static u32 l3cc_combine(u16 low, u16 high) @@ -468,10 +581,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -496,7 +608,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, &table); + init_l3cc_table(engine->uncore, &table); } static u32 global_mocs_offset(void) @@ -504,6 +616,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void intel_set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, &table); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -515,6 +635,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, &table); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163b..76db827210c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void intel_set_mocs_index(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 886060f7e6fc..4396bfd630d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -155,19 +155,20 @@ int i915_ppgtt_init_hw(struct intel_gt *gt) } static struct i915_ppgtt * -__ppgtt_create(struct intel_gt *gt) +__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags) { if (GRAPHICS_VER(gt->i915) < 8) return gen6_ppgtt_create(gt); else - return gen8_ppgtt_create(gt); + return gen8_ppgtt_create(gt, lmem_pt_obj_flags); } -struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct i915_ppgtt *ppgtt; - ppgtt = __ppgtt_create(gt); + ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags); if (IS_ERR(ppgtt)) return ppgtt; @@ -298,7 +299,8 @@ int ppgtt_set_pages(struct i915_vma *vma) return 0; } -void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct drm_i915_private *i915 = gt->i915; @@ -306,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.i915 = i915; ppgtt->vm.dma = i915->drm.dev; ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags; dma_resv_init(&ppgtt->vm._resv); i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 799d382eea79..43093dd2d0c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -9,8 +9,8 @@ #include "i915_vgpu.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_pcode.h" #include "intel_rc6.h" -#include "intel_sideband.h" /** * DOC: RC6 diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index a74b72f50cc9..afb35d2e5c73 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -32,7 +32,7 @@ static int init_fake_lmem_bar(struct intel_memory_region *mem) mem->remap_addr = dma_map_resource(i915->drm.dev, mem->region.start, mem->fake_mappable.size, - PCI_DMA_BIDIRECTIONAL, + DMA_BIDIRECTIONAL, DMA_ATTR_FORCE_CONTIGUOUS); if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) { drm_mm_remove_node(&mem->fake_mappable); @@ -62,7 +62,7 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) dma_unmap_resource(mem->i915->drm.dev, mem->remap_addr, mem->fake_mappable.size, - PCI_DMA_BIDIRECTIONAL, + DMA_BIDIRECTIONAL, DMA_ATTR_FORCE_CONTIGUOUS); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 7c4d5158e03b..2fdd52b62092 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -112,7 +112,8 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE); + obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2958e2fae380..586dca1731ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -17,6 +17,7 @@ #include "intel_ring.h" #include "shmem_utils.h" #include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -291,7 +292,9 @@ static void xcs_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine); /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void reset_prepare(struct intel_engine_cs *engine) @@ -1265,7 +1268,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) int size, err; if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) - return 0; + return NULL; err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); if (err < 0) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0a03fbed9f9b..5e275f8dda8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -11,8 +11,9 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" +#include "intel_pcode.h" #include "intel_rps.h" -#include "intel_sideband.h" +#include "vlv_sideband.h" #include "../../../platform/x86/intel_ips.h" #define BUSY_MAX_EI 20u /* ms */ @@ -994,20 +995,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val) static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = intel_rps_read_state_cap(rps); /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 16) & 0xff; @@ -2144,6 +2141,19 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u32 intel_rps_read_state_cap(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4213bcce1667..11960d64ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_state_cap(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bbd272943c3f..bdf09051b8a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask) + u8 *subslice_mask, u32 ss_mask) { int offset = slice * sseu->ss_stride; - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int @@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u8 s_en, u32 ss_en, u16 eu_en) +static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +{ + u32 ss_mask; + + ss_mask = ss_en >> (s * sseu->max_subslices); + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + + return ss_mask; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, + u32 g_ss_en, u32 c_ss_en, u16 eu_en) { int s, ss; - /* ss_en represents entire subslice mask across all slices */ + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(ss_en) * BITS_PER_BYTE); + sizeof(g_ss_en) * BITS_PER_BYTE); for (s = 0; s < sseu->max_slices; s++) { if ((s_en & BIT(s)) == 0) @@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->slice_mask |= BIT(s); - intel_sseu_set_subslices(sseu, s, ss_en); + /* + * XeHP introduces the concept of compute vs geometry DSS. To + * reduce variation between GENs around subslice usage, store a + * mask for both the geometry and compute enabled masks since + * userspace will need to be able to query these masks + * independently. Also compute a total enabled subslice count + * for the purposes of selecting subslices to use in a + * particular GEM context. + */ + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, + get_ss_stride_mask(sseu, s, c_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, + get_ss_stride_mask(sseu, s, g_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + get_ss_stride_mask(sseu, s, + g_ss_en | c_ss_en)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, s, ss)) @@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 dss_en; + u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt) s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) @@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - intel_sseu_set_subslices(sseu, 0, subslice_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -485,10 +514,9 @@ static void hsw_sseu_info_init(struct intel_gt *gt) } fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); - switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { + switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { default: - MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> - HSW_F1_EU_DIS_SHIFT); + MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); fallthrough; case HSW_F1_EU_DIS_10EUS: sseu->eu_per_subslice = 10; @@ -506,7 +534,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 22fef98887c0..60882a74741e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -26,9 +26,14 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 +#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; @@ -78,6 +83,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, u8 mask; int ss_idx = subslice / BITS_PER_BYTE; + if (slice >= sseu->max_slices || + subslice >= sseu->max_subslices) + return false; + GEM_BUG_ON(ss_idx >= sseu->ss_stride); mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; @@ -97,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask); + u8 *subslice_mask, u32 ss_mask); void intel_sseu_info_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 1ba8b7da9d37..8bb3a91dad82 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -4,9 +4,9 @@ * Copyright © 2020 Intel Corporation */ -#include "debugfs_gt.h" -#include "intel_sseu_debugfs.h" #include "i915_drv.h" +#include "intel_gt_debugfs.h" +#include "intel_sseu_debugfs.h" static void sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice, u8 *to_mask) @@ -282,7 +282,7 @@ static int sseu_status_show(struct seq_file *m, void *unused) return intel_sseu_status(m, gt); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status); static int rcs_topology_show(struct seq_file *m, void *unused) { @@ -293,11 +293,11 @@ static int rcs_topology_show(struct seq_file *m, void *unused) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology); void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "sseu_status", &sseu_status_fops, NULL }, { "rcs_topology", &rcs_topology_fops, NULL }, }; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 1257f4f11e66..438bbc7b8147 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline) timeline->hwsp_map = vaddr; timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); - clflush(vaddr + ofs); + drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); return 0; } @@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl) memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); WRITE_ONCE(*hwsp_seqno, tl->seqno); - clflush(hwsp_seqno); + drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); } void intel_timeline_enter(struct intel_timeline *tl) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aae609d7d85d..e1f362530889 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -644,6 +644,72 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + /* + * This is a "fake" workaround defined by software to ensure we + * maintain reliable, backward-compatible behavior for userspace with + * regards to how nested MI_BATCH_BUFFER_START commands are handled. + * + * The per-context setting of MI_MODE[12] determines whether the bits + * of a nested MI_BATCH_BUFFER_START instruction should be interpreted + * in the traditional manner or whether they should instead use a new + * tgl+ meaning that breaks backward compatibility, but allows nesting + * into 3rd-level batchbuffers. When this new capability was first + * added in TGL, it remained off by default unless a context + * intentionally opted in to the new behavior. However Xe_HPG now + * flips this on by default and requires that we explicitly opt out if + * we don't want the new behavior. + * + * From a SW perspective, we want to maintain the backward-compatible + * behavior for userspace, so we'll apply a fake workaround to set it + * back to the legacy behavior on platforms where the hardware default + * is to break compatibility. At the moment there is no Linux + * userspace that utilizes third-level batchbuffers, so this will avoid + * userspace from needing to make any changes. using the legacy + * meaning is the correct thing to do. If/when we have userspace + * consumers that want to utilize third-level batch nesting, we can + * provide a context parameter to allow them to opt-in. + */ + wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); +} + +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + if (engine->class == COPY_ENGINE_CLASS) { + mocs = engine->gt->mocs.uc_index; + wa_write_clr_set(wal, + BLIT_CCTL(engine->mmio_base), + BLIT_CCTL_MASK, + BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround + * defined by the hardware team, but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -651,11 +717,19 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; - if (engine->class != RENDER_CLASS) - return; - wa_init_start(wal, name, engine->name); + /* Applies to all engines */ + /* + * Fake workarounds are not the actual workaround but + * programming of context registers using workaround framework. + */ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); + + if (engine->class != RENDER_CLASS) + goto done; + if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) @@ -685,6 +759,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, else MISSING_CASE(GRAPHICS_VER(i915)); +done: wa_init_finish(wal); } @@ -729,7 +804,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void -gen4_gt_workarounds_init(struct drm_i915_private *i915, +gen4_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ @@ -737,29 +812,29 @@ gen4_gt_workarounds_init(struct drm_i915_private *i915, } static void -g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen4_gt_workarounds_init(i915, wal); + gen4_gt_workarounds_init(gt, wal); /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); } static void -ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - g4x_gt_workarounds_init(i915, wal); + g4x_gt_workarounds_init(gt, wal); wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); } static void -snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { } static void -ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ wa_masked_dis(wal, @@ -775,7 +850,7 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* WaForceL3Serialization:vlv */ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); @@ -788,7 +863,7 @@ vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* L3 caching of data atomics doesn't work -- disable it. */ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); @@ -803,8 +878,10 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, @@ -829,9 +906,9 @@ gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal } static void -skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableGafsUnitClkGating:skl */ wa_write_or(wal, @@ -839,19 +916,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0)) + if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } static void -kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(i915, 0, STEP_C0)) + if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -868,15 +945,15 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); } static void -cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableGafsUnitClkGating:cfl */ wa_write_or(wal, @@ -901,21 +978,21 @@ static void __set_mcr_steering(struct i915_wa_list *wal, wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); } -static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, +static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, unsigned int slice, unsigned int subslice) { - drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); + drm_dbg(>->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); } static void -icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { - const struct sseu_dev_info *sseu = &i915->gt.info.sseu; + const struct sseu_dev_info *sseu = >->info.sseu; unsigned int slice, subslice; - GEM_BUG_ON(GRAPHICS_VER(i915) < 11); + GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11); GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); slice = 0; @@ -935,16 +1012,15 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * then we can just rely on the default steering and won't need to * worry about explicitly re-steering L3BANK reads later. */ - if (i915->gt.info.l3bank_mask & BIT(subslice)) - i915->gt.steering_table[L3BANK] = NULL; + if (gt->info.l3bank_mask & BIT(subslice)) + gt->steering_table[L3BANK] = NULL; - __add_mcr_wa(i915, wal, slice, subslice); + __add_mcr_wa(gt, wal, slice, subslice); } static void xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = gt->i915; const struct sseu_dev_info *sseu = >->info.sseu; unsigned long slice, subslice = 0, slice_mask = 0; u64 dss_mask = 0; @@ -1008,7 +1084,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) WARN_ON(subslice > GEN_DSS_PER_GSLICE); WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); - __add_mcr_wa(i915, wal, slice, subslice); + __add_mcr_wa(gt, wal, slice, subslice); /* * SQIDI ranges are special because they use different steering @@ -1024,9 +1100,11 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } static void -icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - icl_wa_init_mcr(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + icl_wa_init_mcr(gt, wal); /* WaModifyGamTlbPartitioning:icl */ wa_write_clr_set(wal, @@ -1077,10 +1155,9 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) * the engine-specific workaround list. */ static void -wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal) +wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal) { struct intel_engine_cs *engine; - struct intel_gt *gt = &i915->gt; int id; for_each_engine(engine, gt, id) { @@ -1094,22 +1171,23 @@ wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -gen12_gt_workarounds_init(struct drm_i915_private *i915, - struct i915_wa_list *wal) +gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - icl_wa_init_mcr(i915, wal); + icl_wa_init_mcr(gt, wal); /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ - wa_14011060649(i915, wal); + wa_14011060649(gt, wal); /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void -tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen12_gt_workarounds_init(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + gen12_gt_workarounds_init(gt, wal); /* Wa_1409420604:tgl */ if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) @@ -1130,9 +1208,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen12_gt_workarounds_init(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + gen12_gt_workarounds_init(gt, wal); /* Wa_1607087056:dg1 */ if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) @@ -1154,60 +1234,62 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - xehp_init_mcr(&i915->gt, wal); + xehp_init_mcr(gt, wal); } static void -gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) +gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + if (IS_XEHPSDV(i915)) - xehpsdv_gt_workarounds_init(i915, wal); + xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) - dg1_gt_workarounds_init(i915, wal); + dg1_gt_workarounds_init(gt, wal); else if (IS_TIGERLAKE(i915)) - tgl_gt_workarounds_init(i915, wal); + tgl_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 12) - gen12_gt_workarounds_init(i915, wal); + gen12_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 11) - icl_gt_workarounds_init(i915, wal); + icl_gt_workarounds_init(gt, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - cfl_gt_workarounds_init(i915, wal); + cfl_gt_workarounds_init(gt, wal); else if (IS_GEMINILAKE(i915)) - glk_gt_workarounds_init(i915, wal); + glk_gt_workarounds_init(gt, wal); else if (IS_KABYLAKE(i915)) - kbl_gt_workarounds_init(i915, wal); + kbl_gt_workarounds_init(gt, wal); else if (IS_BROXTON(i915)) - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); else if (IS_SKYLAKE(i915)) - skl_gt_workarounds_init(i915, wal); + skl_gt_workarounds_init(gt, wal); else if (IS_HASWELL(i915)) - hsw_gt_workarounds_init(i915, wal); + hsw_gt_workarounds_init(gt, wal); else if (IS_VALLEYVIEW(i915)) - vlv_gt_workarounds_init(i915, wal); + vlv_gt_workarounds_init(gt, wal); else if (IS_IVYBRIDGE(i915)) - ivb_gt_workarounds_init(i915, wal); + ivb_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 6) - snb_gt_workarounds_init(i915, wal); + snb_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 5) - ilk_gt_workarounds_init(i915, wal); + ilk_gt_workarounds_init(gt, wal); else if (IS_G4X(i915)) - g4x_gt_workarounds_init(i915, wal); + g4x_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 4) - gen4_gt_workarounds_init(i915, wal); + gen4_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) <= 8) ; else MISSING_CASE(GRAPHICS_VER(i915)); } -void intel_gt_init_workarounds(struct drm_i915_private *i915) +void intel_gt_init_workarounds(struct intel_gt *gt) { - struct i915_wa_list *wal = &i915->gt_wa_list; + struct i915_wa_list *wal = >->wa_list; wa_init_start(wal, "GT", "global"); - gt_init_workarounds(i915, wal); + gt_init_workarounds(gt, wal); wa_init_finish(wal); } @@ -1278,7 +1360,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt, >->i915->gt_wa_list); + wa_list_apply(gt, >->wa_list); } static bool wa_list_verify(struct intel_gt *gt, @@ -1310,7 +1392,7 @@ static bool wa_list_verify(struct intel_gt *gt, bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(gt, >->i915->gt_wa_list, from); + return wa_list_verify(gt, >->wa_list, from); } __maybe_unused @@ -1604,6 +1686,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of an official workaround defined by the + * hardware team. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * RING_CMD_CCTL are need to be programed to un-cached + * for memory writes and reads outputted by Command + * Streamers on Gen12 onward platforms. + */ + if (GRAPHICS_VER(engine->i915) >= 12) { + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2044,6 +2151,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else @@ -2067,12 +2176,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->gt, &engine->wa_list); } -struct mcr_range { - u32 start; - u32 end; -}; - -static const struct mcr_range mcr_ranges_gen8[] = { +static const struct i915_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2081,7 +2185,7 @@ static const struct mcr_range mcr_ranges_gen8[] = { {}, }; -static const struct mcr_range mcr_ranges_gen12[] = { +static const struct i915_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2090,7 +2194,7 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; -static const struct mcr_range mcr_ranges_xehp[] = { +static const struct i915_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2109,7 +2213,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct mcr_range *mcr_ranges; + const struct i915_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h index 15abb68b6c00..9beaab77c7f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -24,7 +24,7 @@ static inline void intel_wa_list_free(struct i915_wa_list *wal) void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); int intel_engine_emit_ctx_wa(struct i915_request *rq); -void intel_gt_init_workarounds(struct drm_i915_private *i915); +void intel_gt_init_workarounds(struct intel_gt *gt); void intel_gt_apply_workarounds(struct intel_gt *gt); bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2c1af030310c..8b89215afe46 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; + INIT_LIST_HEAD(&engine->pinned_contexts_list); + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); if (!engine->sched_engine) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 317eebf086c3..6e6e4d747cca 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -290,7 +290,7 @@ static int live_heartbeat_fast(void *arg) int err = 0; /* Check that the heartbeat ticks at the desired rate. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return 0; for_each_engine(engine, gt, id) { @@ -352,7 +352,7 @@ static int live_heartbeat_off(void *arg) int err = 0; /* Check that we can turn off heartbeat and not interrupt VIP */ - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return 0; for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..b367ecfa42de 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -992,7 +992,7 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); @@ -1122,7 +1122,7 @@ static int live_timeslice_rewind(void *arg) * but only a few of those requests, forcing us to rewind the * RING_TAIL of the original request. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; for_each_engine(engine, gt, id) { @@ -1299,7 +1299,7 @@ static int live_timeslice_queue(void *arg) * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.) */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); @@ -1420,7 +1420,7 @@ static int live_timeslice_nopreempt(void *arg) * We should not timeslice into a request that is marked with * I915_REQUEST_NOPREEMPT. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; if (igt_spinner_init(&spin, gt)) @@ -2260,7 +2260,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) int err; /* Preempt cancel non-preemptible spinner in ELSP0 */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(arg->engine->gt)) @@ -2316,7 +2316,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) struct i915_request *rq; int err; - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(engine->gt)) @@ -3375,7 +3375,7 @@ static int live_preempt_timeout(void *arg) * Check that we force preemption to occur by cancelling the previous * context if it refuses to yield the GPU. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(gt)) @@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (batch) { struct i915_address_space *vm; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(batch, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) @@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ve[n] = intel_engine_create_virtual(siblings, nsibling); + ve[n] = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve[n])) { err = PTR_ERR(ve[n]); nctx = n; @@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, i915_request_add(rq); } - ce = intel_engine_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, /* XXX We do not handle oversubscription and fairness with normal rq */ for (n = 0; n < nsibling; n++) { - ce = intel_engine_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (err) goto out_scratch; - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_spin; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2c1ed32ca5ac..7e2d99dd012d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -117,7 +117,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } skip: @@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } count++; @@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt, pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id); + rq->fence.seqno, rq->context->guc_id.id); i915_request_put(rq); GEM_TRACE_DUMP(); @@ -1596,7 +1596,7 @@ static int igt_reset_evict_ppgtt(void *arg) if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) return 0; - ppgtt = i915_ppgtt_create(gt); + ppgtt = i915_ppgtt_create(gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index e623ac45f4aa..962e91ba3be4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -66,7 +66,7 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); - gt_init_workarounds(gt->i915, &lists->gt_wa_list); + gt_init_workarounds(gt, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 8ff582222aff..ba10bd374cee 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index fbfcae727d7f..6e228343e8cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -3,6 +3,7 @@ * Copyright © 2014-2019 Intel Corporation */ +#include "gem/i915_gem_lmem.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -647,7 +648,14 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) u64 flags; int ret; - obj = i915_gem_object_create_shmem(gt->i915, size); + if (HAS_LMEM(gt->i915)) + obj = i915_gem_object_create_lmem(gt->i915, size, + I915_BO_ALLOC_CPU_CLEAR | + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_PM_EARLY); + else + obj = i915_gem_object_create_shmem(gt->i915, size); + if (IS_ERR(obj)) return ERR_CAST(obj); @@ -748,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* + * Ensure intel_uncore_write_fw can be used rather than + * intel_uncore_write. + */ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* + * This register is used by the i915 and GuC for MMIO based + * communication. Once we are in this code CTBs are the only + * method the i915 uses to communicate with the GuC so it is + * safe to write to this register (a value of 0 is NOP for MMIO + * communication). If we ever start mixing CTBs and MMIOs a new + * register will have to be chosen. This function is also used + * to enforce ordering of a work queue item write and an update + * to the process descriptor. When a work queue is being used, + * CTBs are also the only mechanism of communication. + */ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2e27fe59786b..31cf9fb48c7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -22,74 +22,155 @@ struct __guc_ads_blob; -/* - * Top level structure of GuC. It handles firmware loading and manages client - * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList - * submission. +/** + * struct intel_guc - Top level structure of GuC. + * + * It handles firmware loading and manages client pool. intel_guc owns an + * i915_sched_engine for submission. */ struct intel_guc { + /** @fw: the GuC firmware */ struct intel_uc_fw fw; + /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @ct: the command transport communication channel */ struct intel_guc_ct ct; + /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; - /* Global engine used to submit requests to GuC */ + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; + /** + * @stalled_request: if GuC can't process a request for any reason, we + * save it until GuC restarts processing. No other request can be + * submitted until the stalled request is processed. + */ struct i915_request *stalled_request; + /** + * @submission_stall_reason: reason why submission is stalled + */ + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ + /** @irq_lock: protects GuC irq state */ spinlock_t irq_lock; + /** + * @msg_enabled_mask: mask of events that are processed when receiving + * an INTEL_GUC_ACTION_DEFAULT G2H message. + */ unsigned int msg_enabled_mask; + /** + * @outstanding_submission_g2h: number of outstanding GuC to Host + * responses related to GuC submission, used to determine if the GT is + * idle + */ atomic_t outstanding_submission_g2h; + /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; - /* - * contexts_lock protects the pool of free guc ids and a linked list of - * guc ids available to be stolen + /** + * @submission_state: sub-structure for submission state protected by + * single lock + */ + struct { + /** + * @lock: protects everything in submission_state, + * ce->guc_id.id, and ce->guc_id.ref when transitioning in and + * out of zero + */ + spinlock_t lock; + /** + * @guc_ids: used to allocate new guc_ids, single-lrc + */ + struct ida guc_ids; + /** + * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc + */ + unsigned long *guc_ids_bitmap; + /** + * @guc_id_list: list of intel_context with valid guc_ids but no + * refs + */ + struct list_head guc_id_list; + /** + * @destroyed_contexts: list of contexts waiting to be destroyed + * (deregistered with the GuC) + */ + struct list_head destroyed_contexts; + /** + * @destroyed_worker: worker to deregister contexts, need as we + * need to take a GT PM reference and can't from destroy + * function as it might be in an atomic context (no sleeping) + */ + struct work_struct destroyed_worker; + } submission_state; + + /** + * @submission_supported: tracks whether we support GuC submission on + * the current platform */ - spinlock_t contexts_lock; - struct ida guc_ids; - struct list_head guc_id_list; - bool submission_supported; + /** @submission_selected: tracks whether the user enabled GuC submission */ bool submission_selected; + /** + * @rc_supported: tracks whether we support GuC rc on the current platform + */ bool rc_supported; + /** @rc_selected: tracks whether the user enabled GuC rc */ bool rc_selected; + /** @ads_vma: object allocated to hold the GuC ADS */ struct i915_vma *ads_vma; + /** @ads_blob: contents of the GuC ADS */ struct __guc_ads_blob *ads_blob; + /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; + /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ void *lrc_desc_pool_vaddr; - /* guc_id to intel_context lookup */ + /** + * @context_lookup: used to resolve intel_context from guc_id, if a + * context is present in this structure it is registered with the GuC + */ struct xarray context_lookup; - /* Control params for fw initialization */ + /** @params: Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; - /* GuC's FW specific registers used in MMIO send */ + /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */ struct { u32 base; unsigned int count; enum forcewake_domains fw_domains; } send_regs; - /* register used to send interrupts to the GuC FW */ + /** @notify_reg: register used to send interrupts to the GuC FW */ i915_reg_t notify_reg; - /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + /** + * @mmio_msg: notification bitmask that the GuC writes in one of its + * registers when the CT channel is disabled, to be processed when the + * channel is back up. + */ u32 mmio_msg; - /* To serialize the intel_guc_send actions */ + /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; }; @@ -295,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 6926919bcac6..621c893a009f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt, for_each_engine(engine, gt, id) { u8 guc_class = engine_class_to_guc_class(engine->class); - system_info->mapping_table[guc_class][engine->instance] = + system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] = engine->instance; } } @@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt, info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); } +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) static int guc_prep_golden_context(struct intel_guc *guc, struct __guc_ads_blob *blob) { @@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc, if (!blob) continue; - blob->ads.eng_state_size[guc_class] = real_size; + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists context. This is required to + * allow the GuC to restore just the engine state when a + * watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; blob->ads.golden_context_lrca[guc_class] = addr_ggtt; addr_ggtt += alloc_size; } @@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc) u8 engine_class, guc_class; u8 *ptr; - /* Skip execlist and PPGTT registers + HWSP */ - const u32 lr_hw_context_size = 80 * sizeof(u32); - const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + - lr_hw_context_size; - if (!intel_uc_uses_guc_submission(>->uc)) return; @@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc) continue; } - GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != + real_size - LRC_SKIP_SIZE); GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); addr_ggtt += alloc_size; - shmem_read(engine->default_state, skip_size, ptr + skip_size, - real_size - skip_size); + shmem_read(engine->default_state, 0, ptr, real_size); ptr += alloc_size; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 22b4733b55e2..a0cc34be7b56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -168,12 +168,15 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), }; + int ret; GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); GEM_BUG_ON(size % SZ_4K); /* CT registration must go over MMIO */ - return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + + return ret > 0 ? -EPROTO : ret; } static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, @@ -188,8 +191,8 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, err = guc_action_register_ct_buffer(ct_to_guc(ct), type, desc_addr, buff_addr, size); if (unlikely(err)) - CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", - guc_ct_buffer_type_to_str(type), err); + CT_ERROR(ct, "Failed to register %s buffer (%pe)\n", + guc_ct_buffer_type_to_str(type), ERR_PTR(err)); return err; } @@ -201,11 +204,14 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), }; + int ret; GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); /* CT deregistration must go over MMIO */ - return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + + return ret > 0 ? -EPROTO : ret; } static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) @@ -213,8 +219,8 @@ static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type); if (unlikely(err)) - CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n", - guc_ct_buffer_type_to_str(type), err); + CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n", + guc_ct_buffer_type_to_str(type), ERR_PTR(err)); return err; } @@ -377,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* - * This register is used by the i915 and GuC for MMIO based - * communication. Once we are in this code CTBs are the only - * method the i915 uses to communicate with the GuC so it is - * safe to write to this register (a value of 0 is NOP for MMIO - * communication). If we ever start mixing CTBs and MMIOs a new - * register will have to be chosen. - */ - intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); - } else { - /* wmb() sufficient for a barrier if in smem */ - wmb(); - } -} - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, @@ -468,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct, * make sure H2G buffer update and LRC tail update (if this triggering a * submission) are visible before updating the descriptor tail */ - write_barrier(ct); + intel_guc_write_barrier(ct_to_guc(ct)); /* update local copies */ ctb->tail = tail; @@ -522,9 +506,6 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done - if (unlikely(err)) - DRM_ERROR("CT: fence %u err %d\n", req->fence, err); - *status = req->status; return err; } @@ -722,8 +703,11 @@ retry: err = wait_for_ct_request_update(&request, status); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - if (unlikely(err)) + if (unlikely(err)) { + CT_ERROR(ct, "No response for request %#x (fence %u)\n", + action[0], request.fence); goto unlink; + } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; @@ -775,8 +759,8 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { - CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", - action[0], ret, status); + CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", + action[0], ERR_PTR(ret), status); } else if (unlikely(ret)) { CT_DEBUG(ct, "send action %#x returned %d (%#x)\n", action[0], ret, ret); @@ -1042,9 +1026,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w) container_of(w, struct intel_guc_ct, requests.worker); bool done; - done = ct_process_incoming_requests(ct); - if (!done) - queue_work(system_unbound_wq, &ct->requests.worker); + do { + done = ct_process_incoming_requests(ct); + } while (!done); } static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 887c8c8f35db..25f09a420561 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -5,14 +5,14 @@ #include <drm/drm_print.h> -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" +#include "gt/uc/intel_guc_ads.h" +#include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_slpc.h" +#include "gt/uc/intel_guc_submission.h" #include "intel_guc.h" #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" -#include "gt/uc/intel_guc_ct.h" -#include "gt/uc/intel_guc_ads.h" -#include "gt/uc/intel_guc_submission.h" -#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -35,7 +35,7 @@ static int guc_info_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_info); static int guc_registered_contexts_show(struct seq_file *m, void *data) { @@ -49,7 +49,7 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); static int guc_slpc_info_show(struct seq_file *m, void *unused) { @@ -62,7 +62,7 @@ static int guc_slpc_info_show(struct seq_file *m, void *unused) return intel_guc_slpc_print_info(slpc, &p); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); static bool intel_eval_slpc_support(void *data) { @@ -73,7 +73,7 @@ static bool intel_eval_slpc_support(void *data) void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 76fe766ad1bc..196424be0998 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -41,18 +41,21 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) } /* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, - struct intel_uncore *uncore) +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; size_t copied; int i; copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa)); - GEM_BUG_ON(copied < sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM; for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; } /* @@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(&guc->fw, uncore); + ret = guc_xfer_rsa(&guc->fw, uncore); + if (ret) + goto out; /* * Current uCode expects the code to be loaded at 8k; locations below diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index fa4be13c8854..722933e26347 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -52,27 +52,27 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_WQ_SIZE (PAGE_SIZE * 2) - -/* Work queue item header definitions */ +/* + * Work queue item header definitions + * + * Work queue is circular buffer used to submit complex (multi-lrc) submissions + * to the GuC. A work queue item is an entry in the circular buffer. + */ #define WQ_STATUS_ACTIVE 1 #define WQ_STATUS_SUSPENDED 2 #define WQ_STATUS_CMD_ERROR 3 #define WQ_STATUS_ENGINE_ID_NOT_USED 4 #define WQ_STATUS_SUSPENDED_FROM_RESET 5 -#define WQ_TYPE_SHIFT 0 -#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT) -#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT) -#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT) -#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT) -#define WQ_TARGET_SHIFT 10 -#define WQ_LEN_SHIFT 16 -#define WQ_NO_WCFLUSH_WAIT (1 << 27) -#define WQ_PRESENT_WORKLOAD (1 << 28) - -#define WQ_RING_TAIL_SHIFT 20 -#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ -#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT) +#define WQ_TYPE_BATCH_BUF 0x1 +#define WQ_TYPE_PSEUDO 0x2 +#define WQ_TYPE_INORDER 0x3 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) #define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) #define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) @@ -186,7 +186,7 @@ struct guc_process_desc { u32 wq_status; u32 engine_presence; u32 priority; - u32 reserved[30]; + u32 reserved[36]; } __packed; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c index 64e0b86bf258..46026c2c1722 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -6,7 +6,7 @@ #include <linux/fs.h> #include <drm/drm_print.h> -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_guc.h" #include "intel_guc_log.h" #include "intel_guc_log_debugfs.h" @@ -17,7 +17,7 @@ static int guc_log_dump_show(struct seq_file *m, void *data) return intel_guc_log_dump(m->private, &p, false); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); static int guc_load_err_log_dump_show(struct seq_file *m, void *data) { @@ -25,7 +25,7 @@ static int guc_load_err_log_dump_show(struct seq_file *m, void *data) return intel_guc_log_dump(m->private, &p, true); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); static int guc_log_level_get(void *data, u64 *val) { @@ -109,7 +109,7 @@ static const struct file_operations guc_log_relay_fops = { void intel_guc_log_debugfs_register(struct intel_guc_log *log, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "guc_log_dump", &guc_log_dump_fops, NULL }, { "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL }, { "guc_log_level", &guc_log_level_fops, NULL }, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 87d8dc8f51b9..38b47e73e35d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -11,6 +11,7 @@ #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_heartbeat.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" @@ -28,21 +29,6 @@ /** * DOC: GuC-based command submission * - * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC - * firmware is moving to an updated submission interface and we plan to - * turn submission back on when that lands. The below documentation (and related - * code) matches the old submission model and will be updated as part of the - * upgrade to the new flow. - * - * GuC stage descriptor: - * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. Currently, we only use one - * descriptor. This stage descriptor lets the GuC know about the workqueue and - * process descriptor. Theoretically, it also lets the GuC know about our HW - * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead. This is called - * a "proxy" submission. - * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes * a value to the action register (SOFT_SCRATCH_0) along with any data. It then @@ -51,14 +37,85 @@ * processes the request. The kernel driver polls waiting for this update and * then proceeds. * - * Work Items: - * There are several types of work items that the host may place into a - * workqueue, each with its own requirements and limitations. Currently only - * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which - * represents in-order queue. The kernel driver packs ring tail pointer and an - * ELSP context descriptor dword into Work Item. - * See guc_add_request() + * Command Transport buffers (CTBs): + * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host + * - G2H) are a message interface between the i915 and GuC. + * + * Context registration: + * Before a context can be submitted it must be registered with the GuC via a + * H2G. A unique guc_id is associated with each context. The context is either + * registered at request creation time (normal operation) or at submission time + * (abnormal operation, e.g. after a reset). + * + * Context submission: + * The i915 updates the LRC tail value in memory. The i915 must enable the + * scheduling of the context within the GuC for the GuC to actually consider it. + * Therefore, the first time a disabled context is submitted we use a schedule + * enable H2G, while follow up submissions are done via the context submit H2G, + * which informs the GuC that a previously enabled context has new work + * available. + * + * Context unpin: + * To unpin a context a H2G is used to disable scheduling. When the + * corresponding G2H returns indicating the scheduling disable operation has + * completed it is safe to unpin the context. While a disable is in flight it + * isn't safe to resubmit the context so a fence is used to stall all future + * requests of that context until the G2H is returned. + * + * Context deregistration: + * Before a context can be destroyed or if we steal its guc_id we must + * deregister the context with the GuC via H2G. If stealing the guc_id it isn't + * safe to submit anything to this guc_id until the deregister completes so a + * fence is used to stall all requests associated with this guc_id until the + * corresponding G2H returns indicating the guc_id has been deregistered. + * + * submission_state.guc_ids: + * Unique number associated with private GuC context data passed in during + * context registration / submission / deregistration. 64k available. Simple ida + * is used for allocation. + * + * Stealing guc_ids: + * If no guc_ids are available they can be stolen from another context at + * request creation time if that context is unpinned. If a guc_id can't be found + * we punt this problem to the user as we believe this is near impossible to hit + * during normal use cases. + * + * Locking: + * In the GuC submission code we have 3 basic spin locks which protect + * everything. Details about each below. + * + * sched_engine->lock + * This is the submission lock for all contexts that share an i915 schedule + * engine (sched_engine), thus only one of the contexts which share a + * sched_engine can be submitting at a time. Currently only one sched_engine is + * used for all of GuC submission but that could change in the future. + * + * guc->submission_state.lock + * Global lock for GuC submission state. Protects guc_ids and destroyed contexts + * list. + * + * ce->guc_state.lock + * Protects everything under ce->guc_state. Ensures that a context is in the + * correct state before issuing a H2G. e.g. We don't issue a schedule disable + * on a disabled context (bad idea), we don't issue a schedule enable when a + * schedule disable is in flight, etc... Also protects list of inflight requests + * on the context and the priority management state. Lock is individual to each + * context. + * + * Lock ordering rules: + * sched_engine->lock -> ce->guc_state.lock + * guc->submission_state.lock -> ce->guc_state.lock * + * Reset races: + * When a full GT reset is triggered it is assumed that some G2H responses to + * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be + * fatal as we do certain operations upon receiving a G2H (e.g. destroy + * contexts, release guc_ids, etc...). When this occurs we can scrub the + * context state and cleanup appropriately, however this is quite racey. + * To avoid races, the reset code must disable submission before scrubbing for + * the missing G2H, while the submission code must check for submission being + * disabled and skip sending H2Gs and updating context states when it is. Both + * sides must also make sure to hold the relevant locks. */ /* GuC Virtual Engine */ @@ -68,91 +125,56 @@ struct guc_virtual_engine { }; static struct intel_context * -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags); + +static struct intel_context * +guc_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width); #define GUC_REQUEST_SIZE 64 /* bytes */ /* - * Below is a set of functions which control the GuC scheduling state which do - * not require a lock as all state transitions are mutually exclusive. i.e. It - * is not possible for the context pinning code and submission, for the same - * context, to be executing simultaneously. We still need an atomic as it is - * possible for some of the bits to changing at the same time though. + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous + * per the GuC submission interface. A different allocation algorithm is used + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to + * partition the guc_id space. We believe the number of multi-lrc contexts in + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for + * multi-lrc. */ -#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) -static inline bool context_enabled(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_ENABLED); -} - -static inline void set_context_enabled(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_enabled(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_pending_enable(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_PENDING_ENABLE); -} - -static inline void set_context_pending_enable(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_pending_enable(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_registered(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_REGISTERED); -} - -static inline void set_context_registered(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_registered(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} +#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) /* * Below is a set of functions which control the GuC scheduling state which - * require a lock, aside from the special case where the functions are called - * from guc_lrc_desc_pin(). In that case it isn't possible for any other code - * path to be executing on the context. + * require a lock. */ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3) -#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_ENABLED BIT(4) +#define SCHED_STATE_PENDING_ENABLE BIT(5) +#define SCHED_STATE_REGISTERED BIT(6) +#define SCHED_STATE_BLOCKED_SHIFT 7 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) + static inline void init_sched_state(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() */ - atomic_set(&ce->guc_sched_state_no_lock, 0); - ce->guc_state.sched_state = 0; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; +} + +__maybe_unused +static bool sched_state_is_init(struct intel_context *ce) +{ + /* + * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after + * suspend. + */ + return !(ce->guc_state.sched_state &= + ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); } static inline bool @@ -165,7 +187,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce) static inline void set_context_wait_for_deregister_to_register(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() without lock */ + lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state |= SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; } @@ -225,6 +247,57 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; } +static inline bool context_enabled(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_ENABLED; +} + +static inline void set_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_ENABLED; +} + +static inline void clr_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; +} + +static inline bool context_pending_enable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; +} + +static inline void set_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; +} + +static inline bool context_registered(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; +} + +static inline void set_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -233,7 +306,6 @@ static inline u32 context_blocked(struct intel_context *ce) static inline void incr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state += SCHED_STATE_BLOCKED; @@ -243,7 +315,6 @@ static inline void incr_context_blocked(struct intel_context *ce) static inline void decr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ @@ -251,14 +322,39 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } +static inline bool context_has_committed_requests(struct intel_context *ce) +{ + return !!ce->guc_state.number_committed_requests; +} + +static inline void incr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ++ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + +static inline void decr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + --ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + +static struct intel_context * +request_to_scheduling_context(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { - return ce->guc_id == GUC_INVALID_LRC_ID; + return ce->guc_id.id == GUC_INVALID_LRC_ID; } static inline void set_context_guc_id_invalid(struct intel_context *ce) { - ce->guc_id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_LRC_ID; } static inline struct intel_guc *ce_to_guc(struct intel_context *ce) @@ -271,6 +367,104 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } +/* + * When using multi-lrc submission a scratch memory area is reserved in the + * parent's context state for the process descriptor, work queue, and handshake + * between the parent + children contexts to insert safe preemption points + * between each of the BBs. Currently the scratch area is sized to a page. + * + * The layout of this scratch area is below: + * 0 guc_process_desc + * + sizeof(struct guc_process_desc) child go + * + CACHELINE_BYTES child join[0] + * ... + * + CACHELINE_BYTES child join[n - 1] + * ... unused + * PARENT_SCRATCH_SIZE / 2 work queue start + * ... work queue + * PARENT_SCRATCH_SIZE - 1 work queue end + */ +#define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +struct parent_scratch { + struct guc_process_desc pdesc; + + struct sync_semaphore go; + struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; + + u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - + sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; + + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +static u32 __get_parent_scratch_offset(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->parallel.guc.parent_page); + + return ce->parallel.guc.parent_page * PAGE_SIZE; +} + +static u32 __get_wq_offset(struct intel_context *ce) +{ + BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); + + return __get_parent_scratch_offset(ce) + WQ_OFFSET; +} + +static struct parent_scratch * +__get_parent_scratch(struct intel_context *ce) +{ + BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); + BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); + + /* + * Need to subtract LRC_STATE_OFFSET here as the + * parallel.guc.parent_page is the offset into ce->state while + * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. + */ + return (struct parent_scratch *) + (ce->lrc_reg_state + + ((__get_parent_scratch_offset(ce) - + LRC_STATE_OFFSET) / sizeof(u32))); +} + +static struct guc_process_desc * +__get_process_desc(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->pdesc; +} + +static u32 *get_wq_pointer(struct guc_process_desc *desc, + struct intel_context *ce, + u32 wqi_size) +{ + /* + * Check for space in work queue. Caching a value of head pointer in + * intel_context structure in order reduce the number accesses to shared + * GPU memory which may be across a PCIe bus. + */ +#define AVAILABLE_SPACE \ + CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) + if (wqi_size > AVAILABLE_SPACE) { + ce->parallel.guc.wqi_head = READ_ONCE(desc->head); + + if (wqi_size > AVAILABLE_SPACE) + return NULL; + } +#undef AVAILABLE_SPACE + + return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; +} + static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) { struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; @@ -352,20 +546,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + static int guc_submission_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, u32 g2h_len_dw, bool loop) { - int err; - - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + /* + * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), + * so we don't handle the case where we don't get a reply because we + * aborted the send due to the channel being busy. + */ + GEM_BUG_ON(g2h_len_dw && !loop); - if (!err && g2h_len_dw) + if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return err; + return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -421,15 +624,17 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); -static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) +static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) { int err = 0; - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u32 action[3]; int len = 0; u32 g2h_len_dw = 0; bool enabled; + lockdep_assert_held(&rq->engine->sched_engine->lock); + /* * Corner case where requests were sitting in the priority list or a * request resubmitted after the context was banned. @@ -437,41 +642,34 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(intel_context_is_banned(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine); - goto out; + return 0; } - GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); - /* - * Corner case where the GuC firmware was blown away and reloaded while - * this context was pinned. - */ - if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { - err = guc_lrc_desc_pin(ce, false); - if (unlikely(err)) - goto out; - } + spin_lock(&ce->guc_state.lock); /* * The request / context will be run on the hardware when scheduling - * gets enabled in the unblock. + * gets enabled in the unblock. For multi-lrc we still submit the + * context to move the LRC tails. */ - if (unlikely(context_blocked(ce))) + if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) goto out; - enabled = context_enabled(ce); + enabled = context_enabled(ce) || context_blocked(ce); if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; action[len++] = GUC_CONTEXT_ENABLE; set_context_pending_enable(ce); intel_context_get(ce); g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; } err = intel_guc_send_nb(guc, action, len, g2h_len_dw); @@ -479,6 +677,18 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_intel_context_sched_enable(ce); atomic_inc(&guc->outstanding_submission_g2h); set_context_enabled(ce); + + /* + * Without multi-lrc KMD does the submission step (moving the + * lrc tail) so enabling scheduling is sufficient to submit the + * context. This isn't the case in multi-lrc submission as the + * GuC needs to move the tails, hence the need for another H2G + * to submit a multi-lrc context after enabling scheduling. + */ + if (intel_context_is_parent(ce)) { + action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; + err = intel_guc_send_nb(guc, action, len - 1, 0); + } } else if (!enabled) { clr_context_pending_enable(ce); intel_context_put(ce); @@ -487,9 +697,22 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_i915_request_guc_submit(rq); out: + spin_unlock(&ce->guc_state.lock); return err; } +static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) +{ + int ret = __guc_add_request(guc, rq); + + if (unlikely(ret == -EBUSY)) { + guc->stalled_request = rq; + guc->submission_stall_reason = STALL_ADD_REQUEST; + } + + return ret; +} + static inline void guc_set_lrc_tail(struct i915_request *rq) { rq->context->lrc_reg_state[CTX_RING_TAIL] = @@ -501,6 +724,135 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } +static bool is_multi_lrc_rq(struct i915_request *rq) +{ + return intel_context_is_parallel(rq->context); +} + +static bool can_merge_rq(struct i915_request *rq, + struct i915_request *last) +{ + return request_to_scheduling_context(rq) == + request_to_scheduling_context(last); +} + +static u32 wq_space_until_wrap(struct intel_context *ce) +{ + return (WQ_SIZE - ce->parallel.guc.wqi_tail); +} + +static void write_wqi(struct guc_process_desc *desc, + struct intel_context *ce, + u32 wqi_size) +{ + BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); + + /* + * Ensure WQI are visible before updating tail + */ + intel_guc_write_barrier(ce_to_guc(ce)); + + ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & + (WQ_SIZE - 1); + WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); +} + +static int guc_wq_noop_append(struct intel_context *ce) +{ + struct guc_process_desc *desc = __get_process_desc(ce); + u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); + u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; + + if (!wqi) + return -EBUSY; + + GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + ce->parallel.guc.wqi_tail = 0; + + return 0; +} + +static int __guc_wq_item_append(struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + struct intel_context *child; + struct guc_process_desc *desc = __get_process_desc(ce); + unsigned int wqi_size = (ce->parallel.number_children + 4) * + sizeof(u32); + u32 *wqi; + u32 len_dw = (wqi_size / sizeof(u32)) - 1; + int ret; + + /* Ensure context is in correct state updating work queue */ + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); + GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); + + /* Insert NOOP if this work queue item will wrap the tail pointer. */ + if (wqi_size > wq_space_until_wrap(ce)) { + ret = guc_wq_noop_append(ce); + if (ret) + return ret; + } + + wqi = get_wq_pointer(desc, ce, wqi_size); + if (!wqi) + return -EBUSY; + + GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + *wqi++ = ce->lrc.lrca; + *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | + FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); + *wqi++ = 0; /* fence_id */ + for_each_child(ce, child) + *wqi++ = child->ring->tail / sizeof(u64); + + write_wqi(desc, ce, wqi_size); + + return 0; +} + +static int guc_wq_item_append(struct intel_guc *guc, + struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + int ret = 0; + + if (likely(!intel_context_is_banned(ce))) { + ret = __guc_wq_item_append(rq); + + if (unlikely(ret == -EBUSY)) { + guc->stalled_request = rq; + guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; + } + } + + return ret; +} + +static bool multi_lrc_submit(struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + + intel_ring_set_tail(rq->ring, rq->tail); + + /* + * We expect the front end (execbuf IOCTL) to set this flag on the last + * request generated from a multi-BB submission. This indicates to the + * backend (GuC interface) that we should submit this context thus + * submitting all the requests generated in parallel. + */ + return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || + intel_context_is_banned(ce); +} + static int guc_dequeue_one_context(struct intel_guc *guc) { struct i915_sched_engine * const sched_engine = guc->sched_engine; @@ -514,7 +866,17 @@ static int guc_dequeue_one_context(struct intel_guc *guc) if (guc->stalled_request) { submit = true; last = guc->stalled_request; - goto resubmit; + + switch (guc->submission_stall_reason) { + case STALL_REGISTER_CONTEXT: + goto register_context; + case STALL_MOVE_LRC_TAIL: + goto move_lrc_tail; + case STALL_ADD_REQUEST: + goto add_request; + default: + MISSING_CASE(guc->submission_stall_reason); + } } while ((rb = rb_first_cached(&sched_engine->queue))) { @@ -522,8 +884,8 @@ static int guc_dequeue_one_context(struct intel_guc *guc) struct i915_request *rq, *rn; priolist_for_each_request_consume(rq, rn, p) { - if (last && rq->context != last->context) - goto done; + if (last && !can_merge_rq(rq, last)) + goto register_context; list_del_init(&rq->sched.link); @@ -531,33 +893,84 @@ static int guc_dequeue_one_context(struct intel_guc *guc) trace_i915_request_in(rq, 0); last = rq; - submit = true; + + if (is_multi_lrc_rq(rq)) { + /* + * We need to coalesce all multi-lrc requests in + * a relationship into a single H2G. We are + * guaranteed that all of these requests will be + * submitted sequentially. + */ + if (multi_lrc_submit(rq)) { + submit = true; + goto register_context; + } + } else { + submit = true; + } } rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } -done: + +register_context: if (submit) { - guc_set_lrc_tail(last); -resubmit: + struct intel_context *ce = request_to_scheduling_context(last); + + if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && + !intel_context_is_banned(ce))) { + ret = guc_lrc_desc_pin(ce, false); + if (unlikely(ret == -EPIPE)) { + goto deadlk; + } else if (ret == -EBUSY) { + guc->stalled_request = last; + guc->submission_stall_reason = + STALL_REGISTER_CONTEXT; + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ + goto deadlk; + } + } + +move_lrc_tail: + if (is_multi_lrc_rq(last)) { + ret = guc_wq_item_append(guc, last); + if (ret == -EBUSY) { + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ + goto deadlk; + } + } else { + guc_set_lrc_tail(last); + } + +add_request: ret = guc_add_request(guc, last); - if (unlikely(ret == -EPIPE)) + if (unlikely(ret == -EPIPE)) { + goto deadlk; + } else if (ret == -EBUSY) { + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ goto deadlk; - else if (ret == -EBUSY) { - tasklet_schedule(&sched_engine->tasklet); - guc->stalled_request = last; - return false; } } guc->stalled_request = NULL; + guc->submission_stall_reason = STALL_NONE; return submit; deadlk: sched_engine->tasklet.callback = NULL; tasklet_disable_nosync(&sched_engine->tasklet); return false; + +schedule_tasklet: + tasklet_schedule(&sched_engine->tasklet); + return false; } static void guc_submission_tasklet(struct tasklet_struct *t) @@ -596,10 +1009,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) unsigned long index, flags; bool pending_disable, pending_enable, deregister, destroyed, banned; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - /* Flush context */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + /* + * Corner case where the ref count on the object is zero but and + * deregister G2H was lost. In this case we don't touch the ref + * count and finish the destroy of the context. + */ + bool do_put = kref_get_unless_zero(&ce->ref); + + xa_unlock(&guc->context_lookup); + + spin_lock(&ce->guc_state.lock); /* * Once we are at this point submission_disabled() is guaranteed @@ -615,11 +1036,16 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); + spin_unlock(&ce->guc_state.lock); + + GEM_BUG_ON(!do_put && !destroyed); + if (pending_enable || destroyed || deregister) { - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); if (deregister) guc_signal_context_fence(ce); if (destroyed) { + intel_gt_pm_put_async(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -635,14 +1061,20 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) intel_engine_signal_breadcrumbs(ce->engine); } intel_context_sched_disable_unpin(ce); - atomic_dec(&guc->outstanding_submission_g2h); - spin_lock_irqsave(&ce->guc_state.lock, flags); + decr_outstanding_submission_g2h(guc); + + spin_lock(&ce->guc_state.lock); guc_blocked_fence_complete(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); intel_context_put(ce); } + + if (do_put) + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static inline bool @@ -692,6 +1124,8 @@ static void guc_flush_submissions(struct intel_guc *guc) spin_unlock_irqrestore(&sched_engine->lock, flags); } +static void guc_flush_destroyed_contexts(struct intel_guc *guc); + void intel_guc_submission_reset_prepare(struct intel_guc *guc) { int i; @@ -710,6 +1144,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_unlock_irq(&guc_to_gt(guc)->irq_lock); guc_flush_submissions(guc); + guc_flush_destroyed_contexts(guc); /* * Handle any outstanding G2Hs before reset. Call IRQ handler directly @@ -725,6 +1160,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); } + scrub_guc_desc_for_outstanding_g2h(guc); } @@ -796,16 +1232,14 @@ __unwind_incomplete_requests(struct intel_context *ce) unsigned long flags; spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry_safe(rq, rn, - &ce->guc_active.requests, - sched.link) { + spin_lock(&ce->guc_state.lock); + list_for_each_entry_safe_reverse(rq, rn, + &ce->guc_state.requests, + sched.link) { if (i915_request_completed(rq)) continue; list_del_init(&rq->sched.link); - spin_unlock(&ce->guc_active.lock); - __i915_request_unsubmit(rq); /* Push the request back into the queue for later resubmission. */ @@ -816,64 +1250,111 @@ __unwind_incomplete_requests(struct intel_context *ce) } GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); - list_add_tail(&rq->sched.link, pl); + list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - spin_lock(&ce->guc_active.lock); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } static void __guc_reset_context(struct intel_context *ce, bool stalled) { + bool local_stalled; struct i915_request *rq; + unsigned long flags; u32 head; + int i, number_children = ce->parallel.number_children; + bool skip = false; + struct intel_context *parent = ce; + + GEM_BUG_ON(intel_context_is_child(ce)); intel_context_get(ce); /* - * GuC will implicitly mark the context as non-schedulable - * when it sends the reset notification. Make sure our state - * reflects this change. The context will be marked enabled - * on resubmission. + * GuC will implicitly mark the context as non-schedulable when it sends + * the reset notification. Make sure our state reflects this change. The + * context will be marked enabled on resubmission. + * + * XXX: If the context is reset as a result of the request cancellation + * this G2H is received after the schedule disable complete G2H which is + * wrong as this creates a race between the request cancellation code + * re-submitting the context and this G2H handler. This is a bug in the + * GuC but can be worked around in the meantime but converting this to a + * NOP if a pending enable is in flight as this indicates that a request + * cancellation has occurred. */ - clr_context_enabled(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (likely(!context_pending_enable(ce))) + clr_context_enabled(ce); + else + skip = true; + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(skip)) + goto out_put; - rq = intel_context_find_active_request(ce); - if (!rq) { - head = ce->ring->tail; - stalled = false; - goto out_replay; - } + /* + * For each context in the relationship find the hanging request + * resetting each context / request as needed + */ + for (i = 0; i < number_children + 1; ++i) { + if (!intel_context_is_pinned(ce)) + goto next_context; - if (!i915_request_started(rq)) - stalled = false; + local_stalled = false; + rq = intel_context_find_active_request(ce); + if (!rq) { + head = ce->ring->tail; + goto out_replay; + } - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - head = intel_ring_wrap(ce->ring, rq->head); - __i915_request_reset(rq, stalled); + if (i915_request_started(rq)) + local_stalled = true; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + head = intel_ring_wrap(ce->ring, rq->head); + + __i915_request_reset(rq, local_stalled && stalled); out_replay: - guc_reset_state(ce, head, stalled); - __unwind_incomplete_requests(ce); - intel_context_put(ce); + guc_reset_state(ce, head, local_stalled && stalled); +next_context: + if (i != number_children) + ce = list_next_entry(ce, parallel.child_link); + } + + __unwind_incomplete_requests(parent); +out_put: + intel_context_put(parent); } void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) { struct intel_context *ce; unsigned long index; + unsigned long flags; if (unlikely(!guc_submission_initialized(guc))) { /* Reset called during driver load? GuC not yet initialised! */ return; } - xa_for_each(&guc->context_lookup, index, ce) - if (intel_context_is_pinned(ce)) + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); + + if (intel_context_is_pinned(ce) && + !intel_context_is_child(ce)) __guc_reset_context(ce, stalled); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); } @@ -886,10 +1367,10 @@ static void guc_cancel_context_requests(struct intel_context *ce) /* Mark all executing requests as skipped. */ spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + spin_lock(&ce->guc_state.lock); + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -948,11 +1429,25 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) { struct intel_context *ce; unsigned long index; + unsigned long flags; - xa_for_each(&guc->context_lookup, index, ce) - if (intel_context_is_pinned(ce)) + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); + + if (intel_context_is_pinned(ce) && + !intel_context_is_child(ce)) guc_cancel_context_requests(ce); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + guc_cancel_sched_engine_requests(guc->sched_engine); /* GuC is blown away, drop all references to contexts */ @@ -981,6 +1476,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) intel_gt_unpark_heartbeats(guc_to_gt(guc)); } +static void destroyed_worker_func(struct work_struct *w); + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1003,9 +1500,17 @@ int intel_guc_submission_init(struct intel_guc *guc) xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); - spin_lock_init(&guc->contexts_lock); - INIT_LIST_HEAD(&guc->guc_id_list); - ida_init(&guc->guc_ids); + spin_lock_init(&guc->submission_state.lock); + INIT_LIST_HEAD(&guc->submission_state.guc_id_list); + ida_init(&guc->submission_state.guc_ids); + INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); + INIT_WORK(&guc->submission_state.destroyed_worker, + destroyed_worker_func); + + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; return 0; } @@ -1015,8 +1520,10 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->lrc_desc_pool) return; + guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); + bitmap_free(guc->submission_state.guc_ids_bitmap); } static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1027,21 +1534,28 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, list_add_tail(&rq->sched.link, i915_sched_lookup_priolist(sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + tasklet_hi_schedule(&sched_engine->tasklet); } static int guc_bypass_tasklet_submit(struct intel_guc *guc, struct i915_request *rq) { - int ret; + int ret = 0; __i915_request_submit(rq); trace_i915_request_in(rq, 0); - guc_set_lrc_tail(rq); - ret = guc_add_request(guc, rq); - if (ret == -EBUSY) - guc->stalled_request = rq; + if (is_multi_lrc_rq(rq)) { + if (multi_lrc_submit(rq)) { + ret = guc_wq_item_append(guc, rq); + if (!ret) + ret = guc_add_request(guc, rq); + } + } else { + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + } if (unlikely(ret == -EPIPE)) disable_submission(guc); @@ -1049,6 +1563,16 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc, return ret; } +static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) +{ + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_context *ce = request_to_scheduling_context(rq); + + return submission_disabled(guc) || guc->stalled_request || + !i915_sched_engine_is_empty(sched_engine) || + !lrc_desc_registered(guc, ce->guc_id.id); +} + static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; @@ -1058,8 +1582,7 @@ static void guc_submit_request(struct i915_request *rq) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&sched_engine->lock, flags); - if (submission_disabled(guc) || guc->stalled_request || - !i915_sched_engine_is_empty(sched_engine)) + if (need_tasklet(guc, rq)) queue_request(sched_engine, rq, rq_prio(rq)); else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) tasklet_hi_schedule(&sched_engine->tasklet); @@ -1067,72 +1590,117 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static int new_guc_id(struct intel_guc *guc) +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { - return ida_simple_get(&guc->guc_ids, 0, - GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | - __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + int ret; + + GEM_BUG_ON(intel_context_is_child(ce)); + + if (intel_context_is_parent(ce)) + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, + NUMBER_MULTI_LRC_GUC_ID, + order_base_2(ce->parallel.number_children + + 1)); + else + ret = ida_simple_get(&guc->submission_state.guc_ids, + NUMBER_MULTI_LRC_GUC_ID, + GUC_MAX_LRC_DESCRIPTORS, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN); + if (unlikely(ret < 0)) + return ret; + + ce->guc_id.id = ret; + return 0; } static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) { + GEM_BUG_ON(intel_context_is_child(ce)); + if (!context_guc_id_invalid(ce)) { - ida_simple_remove(&guc->guc_ids, ce->guc_id); - reset_lrc_desc(guc, ce->guc_id); + if (intel_context_is_parent(ce)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + ce->guc_id.id, + order_base_2(ce->parallel.number_children + + 1)); + else + ida_simple_remove(&guc->submission_state.guc_ids, + ce->guc_id.id); + reset_lrc_desc(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); } static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - spin_lock_irqsave(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); __release_guc_id(guc, ce); - spin_unlock_irqrestore(&guc->contexts_lock, flags); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int steal_guc_id(struct intel_guc *guc) +static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) { - struct intel_context *ce; - int guc_id; + struct intel_context *cn; - lockdep_assert_held(&guc->contexts_lock); + lockdep_assert_held(&guc->submission_state.lock); + GEM_BUG_ON(intel_context_is_child(ce)); + GEM_BUG_ON(intel_context_is_parent(ce)); - if (!list_empty(&guc->guc_id_list)) { - ce = list_first_entry(&guc->guc_id_list, + if (!list_empty(&guc->submission_state.guc_id_list)) { + cn = list_first_entry(&guc->submission_state.guc_id_list, struct intel_context, - guc_id_link); + guc_id.link); - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); - GEM_BUG_ON(context_guc_id_invalid(ce)); + GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); + GEM_BUG_ON(context_guc_id_invalid(cn)); + GEM_BUG_ON(intel_context_is_child(cn)); + GEM_BUG_ON(intel_context_is_parent(cn)); - list_del_init(&ce->guc_id_link); - guc_id = ce->guc_id; - clr_context_registered(ce); - set_context_guc_id_invalid(ce); - return guc_id; + list_del_init(&cn->guc_id.link); + ce->guc_id = cn->guc_id; + + spin_lock(&ce->guc_state.lock); + clr_context_registered(cn); + spin_unlock(&ce->guc_state.lock); + + set_context_guc_id_invalid(cn); + + return 0; } else { return -EAGAIN; } } -static int assign_guc_id(struct intel_guc *guc, u16 *out) +static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; - lockdep_assert_held(&guc->contexts_lock); + lockdep_assert_held(&guc->submission_state.lock); + GEM_BUG_ON(intel_context_is_child(ce)); - ret = new_guc_id(guc); + ret = new_guc_id(guc, ce); if (unlikely(ret < 0)) { - ret = steal_guc_id(guc); + if (intel_context_is_parent(ce)) + return -ENOSPC; + + ret = steal_guc_id(guc, ce); if (ret < 0) return ret; } - *out = ret; + if (intel_context_is_parent(ce)) { + struct intel_context *child; + int i = 1; + + for_each_child(ce, child) + child->guc_id.id = ce->guc_id.id + i++; + } + return 0; } @@ -1142,26 +1710,28 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) int ret = 0; unsigned long flags, tries = PIN_GUC_ID_TRIES; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); try_again: - spin_lock_irqsave(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); + + might_lock(&ce->guc_state.lock); if (context_guc_id_invalid(ce)) { - ret = assign_guc_id(guc, &ce->guc_id); + ret = assign_guc_id(guc, ce); if (ret) goto out_unlock; ret = 1; /* Indidcates newly assigned guc_id */ } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); - atomic_inc(&ce->guc_id_ref); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + atomic_inc(&ce->guc_id.ref); out_unlock: - spin_unlock_irqrestore(&guc->contexts_lock, flags); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); /* - * -EAGAIN indicates no guc_ids are available, let's retire any + * -EAGAIN indicates no guc_id are available, let's retire any * outstanding requests to see if that frees up a guc_id. If the first * retire didn't help, insert a sleep with the timeslice duration before * attempting to retire more requests. Double the sleep period each @@ -1189,16 +1759,43 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); + GEM_BUG_ON(intel_context_is_child(ce)); - if (unlikely(context_guc_id_invalid(ce))) + if (unlikely(context_guc_id_invalid(ce) || + intel_context_is_parent(ce))) return; - spin_lock_irqsave(&guc->contexts_lock, flags); - if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && - !atomic_read(&ce->guc_id_ref)) - list_add_tail(&ce->guc_id_link, &guc->guc_id_list); - spin_unlock_irqrestore(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && + !atomic_read(&ce->guc_id.ref)) + list_add_tail(&ce->guc_id.link, + &guc->submission_state.guc_id_list); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static int __guc_action_register_multi_lrc(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); } static int __guc_action_register_context(struct intel_guc *guc, @@ -1220,21 +1817,31 @@ static int register_context(struct intel_context *ce, bool loop) { struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id * sizeof(struct guc_lrc_desc); + ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); - if (likely(!ret)) + if (intel_context_is_parent(ce)) + ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, + offset, loop); + else + ret = __guc_action_register_context(guc, ce->guc_id.id, offset, + loop); + if (likely(!ret)) { + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } return ret; } static int __guc_action_deregister_context(struct intel_guc *guc, - u32 guc_id, - bool loop) + u32 guc_id) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_CONTEXT, @@ -1243,33 +1850,38 @@ static int __guc_action_deregister_context(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, - loop); + true); } -static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_deregister(ce); - return __guc_action_deregister_context(guc, guc_id, loop); + return __guc_action_deregister_context(guc, guc_id); } -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) +static inline void clear_children_join_go_memory(struct intel_context *ce) { - switch (class) { - case RENDER_CLASS: - return mask >> RCS0; - case VIDEO_ENHANCEMENT_CLASS: - return mask >> VECS0; - case VIDEO_DECODE_CLASS: - return mask >> VCS0; - case COPY_ENGINE_CLASS: - return mask >> BCS0; - default: - MISSING_CASE(class); - return 0; - } + struct parent_scratch *ps = __get_parent_scratch(ce); + int i; + + ps->go.semaphore = 0; + for (i = 0; i < ce->parallel.number_children + 1; ++i) + ps->join[i].semaphore = 0; +} + +static inline u32 get_children_go_value(struct intel_context *ce) +{ + return __get_parent_scratch(ce)->go.semaphore; +} + +static inline u32 get_children_join_value(struct intel_context *ce, + u8 child_index) +{ + return __get_parent_scratch(ce)->join[child_index].semaphore; } static void guc_context_policy_init(struct intel_engine_cs *engine, @@ -1285,22 +1897,20 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } -static inline u8 map_i915_prio_to_guc_prio(int prio); - static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; struct intel_guc *guc = &engine->gt->uc.guc; - u32 desc_idx = ce->guc_id; + u32 desc_idx = ce->guc_id.id; struct guc_lrc_desc *desc; - const struct i915_gem_context *ctx; - int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; + struct intel_context *child; int ret = 0; GEM_BUG_ON(!engine->mask); + GEM_BUG_ON(!sched_state_is_init(ce)); /* * Ensure LRC + CT vmas are is same region as write barrier is done @@ -1311,25 +1921,53 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); - rcu_read_lock(); - ctx = rcu_dereference(ce->gem_context); - if (ctx) - prio = ctx->sched.priority; - rcu_read_unlock(); - reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); desc = __get_lrc_desc(guc, desc_idx); desc->engine_class = engine_class_to_guc_class(engine->class); - desc->engine_submit_mask = adjust_engine_mask(engine->class, - engine->mask); + desc->engine_submit_mask = engine->logical_mask; desc->hw_context_desc = ce->lrc.lrca; - ce->guc_prio = map_i915_prio_to_guc_prio(prio); - desc->priority = ce->guc_prio; + desc->priority = ce->guc_state.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); - init_sched_state(ce); + + /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + for_each_child(ce, child) { + desc = __get_lrc_desc(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init(engine, desc); + } + + clear_children_join_go_memory(ce); + } /* * The context_lookup xarray is used to determine if the hardware @@ -1340,26 +1978,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * registering this context. */ if (context_registered) { + bool disabled; + unsigned long flags; + trace_intel_context_steal_guc_id(ce); - if (!loop) { + GEM_BUG_ON(!loop); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { set_context_wait_for_deregister_to_register(ce); intel_context_get(ce); - } else { - bool disabled; - unsigned long flags; - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) { - set_context_wait_for_deregister_to_register(ce); - intel_context_get(ce); - } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - reset_lrc_desc(guc, desc_idx); - return 0; /* Will get registered later */ - } + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ } /* @@ -1367,20 +2002,18 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id, loop); - if (unlikely(ret == -EBUSY)) { - clr_context_wait_for_deregister_to_register(ce); - intel_context_put(ce); - } else if (unlikely(ret == -ENODEV)) { + ret = deregister_context(ce, ce->guc_id.id); + if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ - } } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); - if (unlikely(ret == -EBUSY)) + if (unlikely(ret == -EBUSY)) { + reset_lrc_desc(guc, desc_idx); + } else if (unlikely(ret == -ENODEV)) { reset_lrc_desc(guc, desc_idx); - else if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ + } } return ret; @@ -1419,7 +2052,12 @@ static int guc_context_pre_pin(struct intel_context *ce, static int guc_context_pin(struct intel_context *ce, void *vaddr) { - return __guc_context_pin(ce, ce->engine, vaddr); + int ret = __guc_context_pin(ce, ce->engine, vaddr); + + if (likely(!ret && !intel_context_is_barrier(ce))) + intel_engine_pm_get(ce->engine); + + return ret; } static void guc_context_unpin(struct intel_context *ce) @@ -1428,6 +2066,9 @@ static void guc_context_unpin(struct intel_context *ce) unpin_guc_id(guc, ce); lrc_unpin(ce); + + if (likely(!intel_context_is_barrier(ce))) + intel_engine_pm_put_async(ce->engine); } static void guc_context_post_unpin(struct intel_context *ce) @@ -1440,7 +2081,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - ce->guc_id, + ce->guc_id.id, GUC_CONTEXT_ENABLE }; @@ -1456,12 +2097,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - guc_id, /* ce->guc_id not stable */ + guc_id, /* ce->guc_id.id not stable */ GUC_CONTEXT_DISABLE }; GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_sched_disable(ce); guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), @@ -1472,24 +2114,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - if (!i915_sw_fence_done(&ce->guc_blocked)) - i915_sw_fence_complete(&ce->guc_blocked); + if (!i915_sw_fence_done(&ce->guc_state.blocked)) + i915_sw_fence_complete(&ce->guc_state.blocked); } static void guc_blocked_fence_reinit(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); /* * This fence is always complete unless a pending schedule disable is * outstanding. We arm the fence here and complete it when we receive * the pending schedule disable complete message. */ - i915_sw_fence_fini(&ce->guc_blocked); - i915_sw_fence_reinit(&ce->guc_blocked); - i915_sw_fence_await(&ce->guc_blocked); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_fini(&ce->guc_state.blocked); + i915_sw_fence_reinit(&ce->guc_state.blocked); + i915_sw_fence_await(&ce->guc_state.blocked); + i915_sw_fence_commit(&ce->guc_state.blocked); } static u16 prep_context_pending_disable(struct intel_context *ce) @@ -1501,35 +2143,30 @@ static u16 prep_context_pending_disable(struct intel_context *ce) guc_blocked_fence_reinit(ce); intel_context_get(ce); - return ce->guc_id; + return ce->guc_id.id; } static struct i915_sw_fence *guc_context_block(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; u16 guc_id; bool enabled; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); - /* - * Sync with submission path, increment before below changes to context - * state. - */ - spin_lock(&sched_engine->lock); incr_context_blocked(ce); - spin_unlock(&sched_engine->lock); enabled = context_enabled(ce); if (unlikely(!enabled || submission_disabled(guc))) { if (enabled) clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } /* @@ -1545,26 +2182,41 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_sched_disable(guc, ce, guc_id); - return &ce->guc_blocked; + return &ce->guc_state.blocked; +} + +#define SCHED_STATE_MULTI_BLOCKED_MASK \ + (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) +#define SCHED_STATE_NO_UNBLOCK \ + (SCHED_STATE_MULTI_BLOCKED_MASK | \ + SCHED_STATE_PENDING_DISABLE | \ + SCHED_STATE_BANNED) + +static bool context_cant_unblock(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || + context_guc_id_invalid(ce) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || + !intel_context_is_pinned(ce); } static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; bool enable; GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_child(ce)); spin_lock_irqsave(&ce->guc_state.lock, flags); if (unlikely(submission_disabled(guc) || - !intel_context_is_pinned(ce) || - context_pending_disable(ce) || - context_blocked(ce) > 1)) { + context_cant_unblock(ce))) { enable = false; } else { enable = true; @@ -1573,13 +2225,7 @@ static void guc_context_unblock(struct intel_context *ce) intel_context_get(ce); } - /* - * Sync with submission path, decrement after above changes to context - * state. - */ - spin_lock(&sched_engine->lock); decr_context_blocked(ce); - spin_unlock(&sched_engine->lock); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1592,16 +2238,29 @@ static void guc_context_unblock(struct intel_context *ce) static void guc_context_cancel_request(struct intel_context *ce, struct i915_request *rq) { + struct intel_context *block_context = + request_to_scheduling_context(rq); + if (i915_sw_fence_signaled(&rq->submit)) { - struct i915_sw_fence *fence = guc_context_block(ce); + struct i915_sw_fence *fence; + intel_context_get(ce); + fence = guc_context_block(block_context); i915_sw_fence_wait(fence); if (!i915_request_completed(rq)) { __i915_request_skip(rq); guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), true); } - guc_context_unblock(ce); + + /* + * XXX: Racey if context is reset, see comment in + * __guc_reset_context(). + */ + flush_work(&ce_to_guc(ce)->ct.requests.worker); + + guc_context_unblock(block_context); + intel_context_put(ce); } } @@ -1626,6 +2285,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) intel_wakeref_t wakeref; unsigned long flags; + GEM_BUG_ON(intel_context_is_child(ce)); + guc_flush_submissions(guc); spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -1662,7 +2323,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) if (!context_guc_id_invalid(ce)) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_set_preemption_timeout(guc, - ce->guc_id, + ce->guc_id.id, 1); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } @@ -1675,40 +2336,24 @@ static void guc_context_sched_disable(struct intel_context *ce) struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; - bool enabled; - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !lrc_desc_registered(guc, ce->guc_id)) { - clr_context_enabled(ce); - goto unpin; - } - - if (!context_enabled(ce)) - goto unpin; + GEM_BUG_ON(intel_context_is_child(ce)); spin_lock_irqsave(&ce->guc_state.lock, flags); /* - * We have to check if the context has been disabled by another thread. - * We also have to check if the context has been pinned again as another - * pin operation is allowed to pass this function. Checking the pin - * count, within ce->guc_state.lock, synchronizes this function with - * guc_request_alloc ensuring a request doesn't slip through the - * 'context_pending_disable' fence. Checking within the spin lock (can't - * sleep) ensures another process doesn't pin this context and generate - * a request before we set the 'context_pending_disable' flag here. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - enabled = context_enabled(ce); - if (unlikely(!enabled || submission_disabled(guc))) { - if (enabled) - clr_context_enabled(ce); + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } - if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return; - } guc_id = prep_context_pending_disable(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1724,21 +2369,41 @@ unpin: static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); + struct intel_gt *gt = guc_to_gt(guc); + unsigned long flags; + bool disabled; - GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); - GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + lockdep_assert_held(&guc->submission_state.lock); + GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); - clr_context_registered(ce); - deregister_context(ce, ce->guc_id, true); + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { + __intel_gt_pm_get(gt); + set_context_destroyed(ce); + clr_context_registered(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + __release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + + deregister_context(ce, ce->guc_id.id); } static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -1756,76 +2421,86 @@ static void __guc_context_destroy(struct intel_context *ce) } } +static void guc_flush_destroyed_contexts(struct intel_guc *guc) +{ + struct intel_context *ce, *cn; + unsigned long flags; + + GEM_BUG_ON(!submission_disabled(guc) && + guc_submission_initialized(guc)); + + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_for_each_entry_safe(ce, cn, + &guc->submission_state.destroyed_contexts, + destroyed_link) { + list_del_init(&ce->destroyed_link); + __release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static void deregister_destroyed_contexts(struct intel_guc *guc) +{ + struct intel_context *ce, *cn; + unsigned long flags; + + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_for_each_entry_safe(ce, cn, + &guc->submission_state.destroyed_contexts, + destroyed_link) { + list_del_init(&ce->destroyed_link); + guc_lrc_desc_unpin(ce); + } + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static void destroyed_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, + submission_state.destroyed_worker); + struct intel_gt *gt = guc_to_gt(guc); + int tmp; + + with_intel_gt_pm(gt, tmp) + deregister_destroyed_contexts(guc); +} + static void guc_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); - struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; struct intel_guc *guc = ce_to_guc(ce); - intel_wakeref_t wakeref; unsigned long flags; - bool disabled; + bool destroy; /* * If the guc_id is invalid this context has been stolen and we can free * it immediately. Also can be freed immediately if the context is not * registered with the GuC or the GuC is in the middle of a reset. */ - if (context_guc_id_invalid(ce)) { - __guc_context_destroy(ce); - return; - } else if (submission_disabled(guc) || - !lrc_desc_registered(guc, ce->guc_id)) { - release_guc_id(guc, ce); - __guc_context_destroy(ce); - return; - } - - /* - * We have to acquire the context spinlock and check guc_id again, if it - * is valid it hasn't been stolen and needs to be deregistered. We - * delete this context from the list of unpinned guc_ids available to - * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB - * returns indicating this context has been deregistered the guc_id is - * returned to the pool of available guc_ids. - */ - spin_lock_irqsave(&guc->contexts_lock, flags); - if (context_guc_id_invalid(ce)) { - spin_unlock_irqrestore(&guc->contexts_lock, flags); - __guc_context_destroy(ce); - return; + spin_lock_irqsave(&guc->submission_state.lock, flags); + destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || + !lrc_desc_registered(guc, ce->guc_id.id); + if (likely(!destroy)) { + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + list_add_tail(&ce->destroyed_link, + &guc->submission_state.destroyed_contexts); + } else { + __release_guc_id(guc, ce); } - - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); - spin_unlock_irqrestore(&guc->contexts_lock, flags); - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) - set_context_destroyed(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - release_guc_id(guc, ce); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + if (unlikely(destroy)) { __guc_context_destroy(ce); return; } /* - * We defer GuC context deregistration until the context is destroyed - * in order to save on CTBs. With this optimization ideally we only need - * 1 CTB to register the context during the first pin and 1 CTB to - * deregister the context when the context is destroyed. Without this - * optimization, a CTB would be needed every pin & unpin. - * - * XXX: Need to acqiure the runtime wakeref as this can be triggered - * from context_free_worker when runtime wakeref is not held. - * guc_lrc_desc_unpin requires the runtime as a GuC register is written - * in H2G CTB to deregister the context. A future patch may defer this - * H2G CTB if the runtime wakeref is zero. + * We use a worker to issue the H2G to deregister the context as we can + * take the GT PM for the first time which isn't allowed from an atomic + * context. */ - with_intel_runtime_pm(runtime_pm, wakeref) - guc_lrc_desc_unpin(ce); + queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); } static int guc_context_alloc(struct intel_context *ce) @@ -1839,20 +2514,23 @@ static void guc_context_set_prio(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id, + ce->guc_id.id, prio, }; GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); + lockdep_assert_held(&ce->guc_state.lock); - if (ce->guc_prio == prio || submission_disabled(guc) || - !context_registered(ce)) + if (ce->guc_state.prio == prio || submission_disabled(guc) || + !context_registered(ce)) { + ce->guc_state.prio = prio; return; + } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_prio = prio; + ce->guc_state.prio = prio; trace_intel_context_set_prio(ce); } @@ -1871,25 +2549,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio) static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); - ++ce->guc_prio_count[guc_prio]; + ++ce->guc_state.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); - --ce->guc_prio_count[guc_prio]; + --ce->guc_state.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1900,10 +2578,10 @@ static inline void update_context_prio(struct intel_context *ce) BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { - if (ce->guc_prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { + if (ce->guc_state.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -1918,13 +2596,14 @@ static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) static void add_to_context(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + GEM_BUG_ON(intel_context_is_child(ce)); GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); - spin_lock(&ce->guc_active.lock); - list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_lock(&ce->guc_state.lock); + list_move_tail(&rq->sched.link, &ce->guc_state.requests); if (rq->guc_prio == GUC_PRIO_INIT) { rq->guc_prio = new_guc_prio; @@ -1936,12 +2615,12 @@ static void add_to_context(struct i915_request *rq) } update_context_prio(ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) { - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI) { @@ -1953,9 +2632,11 @@ static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) static void remove_from_context(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); - spin_lock_irq(&ce->guc_active.lock); + GEM_BUG_ON(intel_context_is_child(ce)); + + spin_lock_irq(&ce->guc_state.lock); list_del_init(&rq->sched.link); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -1965,9 +2646,11 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - spin_unlock_irq(&ce->guc_active.lock); + decr_context_committed_requests(ce); + + spin_unlock_irq(&ce->guc_state.lock); - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); i915_request_notify_execute_cb_imm(rq); } @@ -1992,19 +2675,35 @@ static const struct intel_context_ops guc_context_ops = { .destroy = guc_context_destroy, .create_virtual = guc_create_virtual, + .create_parallel = guc_create_parallel, }; +static void submit_work_cb(struct irq_work *wrk) +{ + struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); + + might_lock(&rq->engine->sched_engine->lock); + i915_sw_fence_complete(&rq->submit); +} + static void __guc_signal_context_fence(struct intel_context *ce) { - struct i915_request *rq; + struct i915_request *rq, *rn; lockdep_assert_held(&ce->guc_state.lock); if (!list_empty(&ce->guc_state.fences)) trace_intel_context_fence_release(ce); - list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) - i915_sw_fence_complete(&rq->submit); + /* + * Use an IRQ to ensure locking order of sched_engine->lock -> + * ce->guc_state.lock is preserved. + */ + list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, + guc_fence_link) { + list_del(&rq->guc_fence_link); + irq_work_queue(&rq->submit_work); + } INIT_LIST_HEAD(&ce->guc_state.fences); } @@ -2013,6 +2712,8 @@ static void guc_signal_context_fence(struct intel_context *ce) { unsigned long flags; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_wait_for_deregister_to_register(ce); __guc_signal_context_fence(ce); @@ -2022,13 +2723,28 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && !submission_disabled(ce_to_guc(ce)); } +static void guc_context_init(struct intel_context *ce) +{ + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); + set_bit(CONTEXT_GUC_INIT, &ce->flags); +} + static int guc_request_alloc(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); struct intel_guc *guc = ce_to_guc(ce); unsigned long flags; int ret; @@ -2057,14 +2773,17 @@ static int guc_request_alloc(struct i915_request *rq) rq->reserved_space -= GUC_REQUEST_SIZE; + if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) + guc_context_init(ce); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the - * guc_ids and creating horrible race conditions. This is especially bad - * when guc_ids are being stolen due to over subscription. By the time + * guc_id and creating horrible race conditions. This is especially bad + * when guc_id are being stolen due to over subscription. By the time * this function is reached, it is guaranteed that the guc_id will be * persistent until the generated request is retired. Thus, sealing these - * race conditions. It is still safe to fail here if guc_ids are + * race conditions. It is still safe to fail here if guc_id are * exhausted and return -EAGAIN to the user indicating that they can try * again in the future. * @@ -2074,7 +2793,7 @@ static int guc_request_alloc(struct i915_request *rq) * decremented on each retire. When it is zero, a lock around the * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. */ - if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) goto out; ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ @@ -2087,7 +2806,7 @@ static int guc_request_alloc(struct i915_request *rq) disable_submission(guc); goto out; /* GPU will be reset */ } - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); unpin_guc_id(guc, ce); return ret; } @@ -2102,22 +2821,16 @@ out: * schedule enable or context registration if either G2H is pending * respectfully. Once a G2H returns, the fence is released that is * blocking these requests (see guc_signal_context_fence). - * - * We can safely check the below fields outside of the lock as it isn't - * possible for these fields to transition from being clear to set but - * converse is possible, hence the need for the check within the lock. */ - if (likely(!context_wait_for_deregister_to_register(ce) && - !context_pending_disable(ce))) - return 0; - spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { + init_irq_work(&rq->submit_work, submit_work_cb); i915_sw_fence_await(&rq->submit); list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } + incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; @@ -2135,8 +2848,30 @@ static int guc_virtual_context_pre_pin(struct intel_context *ce, static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) { struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + int ret = __guc_context_pin(ce, engine, vaddr); + intel_engine_mask_t tmp, mask = ce->engine->mask; - return __guc_context_pin(ce, engine, vaddr); + if (likely(!ret)) + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_get(engine); + + return ret; +} + +static void guc_virtual_context_unpin(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + + unpin_guc_id(guc, ce); + lrc_unpin(ce); + + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_put_async(engine); } static void guc_virtual_context_enter(struct intel_context *ce) @@ -2173,7 +2908,98 @@ static const struct intel_context_ops virtual_guc_context_ops = { .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, - .unpin = guc_context_unpin, + .unpin = guc_virtual_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .sched_disable = guc_context_sched_disable, + + .destroy = guc_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + struct intel_guc *guc = ce_to_guc(ce); + int ret; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + ret = pin_guc_id(guc, ce); + if (unlikely(ret < 0)) + return ret; + + return __guc_context_pin(ce, engine, vaddr); +} + +static int guc_child_context_pin(struct intel_context *ce, void *vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + __intel_context_pin(ce->parallel.parent); + return __guc_context_pin(ce, engine, vaddr); +} + +static void guc_parent_context_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + if (ce->parallel.last_rq) + i915_request_put(ce->parallel.last_rq); + unpin_guc_id(guc, ce); + lrc_unpin(ce); +} + +static void guc_child_context_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + lrc_unpin(ce); +} + +static void guc_child_context_post_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + lrc_post_unpin(ce); + intel_context_unpin(ce->parallel.parent); +} + +static void guc_child_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + + __guc_context_destroy(ce); +} + +static const struct intel_context_ops virtual_parent_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_context_pre_pin, + .pin = guc_parent_context_pin, + .unpin = guc_parent_context_unpin, .post_unpin = guc_context_post_unpin, .ban = guc_context_ban, @@ -2190,6 +3016,110 @@ static const struct intel_context_ops virtual_guc_context_ops = { .get_sibling = guc_virtual_get_sibling, }; +static const struct intel_context_ops virtual_child_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_context_pre_pin, + .pin = guc_child_context_pin, + .unpin = guc_child_context_unpin, + .post_unpin = guc_child_context_post_unpin, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .destroy = guc_child_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +/* + * The below override of the breadcrumbs is enabled when the user configures a + * context for parallel submission (multi-lrc, parent-child). + * + * The overridden breadcrumbs implements an algorithm which allows the GuC to + * safely preempt all the hw contexts configured for parallel submission + * between each BB. The contract between the i915 and GuC is if the parent + * context can be preempted, all the children can be preempted, and the GuC will + * always try to preempt the parent before the children. A handshake between the + * parent / children breadcrumbs ensures the i915 holds up its end of the deal + * creating a window to preempt between each set of BBs. + */ +static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +static u32 * +emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs); +static u32 * +emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs); + +static struct intel_context * +guc_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_engine_cs **siblings = NULL; + struct intel_context *parent = NULL, *ce, *err; + int i, j; + + siblings = kmalloc_array(num_siblings, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < width; ++i) { + for (j = 0; j < num_siblings; ++j) + siblings[j] = engines[i * num_siblings + j]; + + ce = intel_engine_create_virtual(siblings, num_siblings, + FORCE_VIRTUAL); + if (!ce) { + err = ERR_PTR(-ENOMEM); + goto unwind; + } + + if (i == 0) { + parent = ce; + parent->ops = &virtual_parent_context_ops; + } else { + ce->ops = &virtual_child_context_ops; + intel_context_bind_parent_child(parent, ce); + } + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + parent->engine->emit_bb_start = + emit_bb_start_parent_no_preempt_mid_batch; + parent->engine->emit_fini_breadcrumb = + emit_fini_breadcrumb_parent_no_preempt_mid_batch; + parent->engine->emit_fini_breadcrumb_dw = + 12 + 4 * parent->parallel.number_children; + for_each_child(parent, ce) { + ce->engine->emit_bb_start = + emit_bb_start_child_no_preempt_mid_batch; + ce->engine->emit_fini_breadcrumb = + emit_fini_breadcrumb_child_no_preempt_mid_batch; + ce->engine->emit_fini_breadcrumb_dw = 16; + } + + kfree(siblings); + return parent; + +unwind: + if (parent) + intel_context_put(parent); + kfree(siblings); + return err; +} + static bool guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) { @@ -2249,7 +3179,7 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine) static void guc_bump_inflight_request_prio(struct i915_request *rq, int prio) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); /* Short circuit function */ @@ -2259,7 +3189,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) return; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_FINI) { if (rq->guc_prio != GUC_PRIO_INIT) sub_context_inflight_prio(ce, rq->guc_prio); @@ -2267,16 +3197,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, add_context_inflight_prio(ce, rq->guc_prio); update_context_prio(ce); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_retire_inflight_request_prio(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); guc_prio_fini(rq, ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -2310,6 +3240,8 @@ static void guc_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void setup_hwsp(struct intel_engine_cs *engine) @@ -2385,9 +3317,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) * and even it did this code would be run again. */ - for_each_engine(engine, gt, id) - if (engine->kernel_context) - guc_kernel_context_pin(guc, engine->kernel_context); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) + guc_kernel_context_pin(guc, ce); + } } static void guc_release(struct intel_engine_cs *engine) @@ -2580,13 +3516,13 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return NULL; } - return ce; -} + if (unlikely(intel_context_is_child(ce))) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Context is child, desc_idx %u", desc_idx); + return NULL; + } -static void decr_outstanding_submission_g2h(struct intel_guc *guc) -{ - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) - wake_up_all(&guc->ct.wq); + return ce; } int intel_guc_deregister_done_process_msg(struct intel_guc *guc, @@ -2607,6 +3543,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; @@ -2622,6 +3565,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ + intel_gt_pm_put_async(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -2652,8 +3596,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, 0x%x, desc_idx %u", - atomic_read(&ce->guc_sched_state_no_lock), + "Bad context sched_state 0x%x, desc_idx %u", ce->guc_state.sched_state, desc_idx); return -EPROTO; } @@ -2661,10 +3604,26 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_pending_enable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -2721,7 +3680,12 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - if (likely(!intel_context_is_banned(ce))) { + /* + * XXX: Racey if request cancellation has occurred, see comment in + * __guc_reset_context(). + */ + if (likely(!intel_context_is_banned(ce) && + !context_blocked(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } @@ -2797,33 +3761,47 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) struct intel_context *ce; struct i915_request *rq; unsigned long index; + unsigned long flags; /* Reset called during driver load? GuC not yet initialised! */ if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) continue; intel_engine_set_hung_context(engine, ce); /* Can only cope with one hang at a time... */ - return; + intel_context_put(ce); + xa_lock(&guc->context_lookup); + goto done; } +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } +done: + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_dump_active_requests(struct intel_engine_cs *engine, @@ -2839,23 +3817,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - spin_lock_irqsave(&ce->guc_active.lock, flags); - intel_engine_dump_active_requests(&ce->guc_active.requests, + spin_lock(&ce->guc_state.lock); + intel_engine_dump_active_requests(&ce->guc_state.requests, hung_rq, m); - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock(&ce->guc_state.lock); + +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_submission_print_info(struct intel_guc *guc, @@ -2881,7 +3870,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc, priolist_for_each_request(rq, pl) drm_printf(p, "guc_id=%u, seqno=%llu\n", - rq->context->guc_id, + rq->context->guc_id.id, rq->fence.seqno); } spin_unlock_irqrestore(&sched_engine->lock, flags); @@ -2893,46 +3882,348 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", - ce->guc_prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_prio_count[i]); + i, ce->guc_state.prio_count[i]); } drm_printf(p, "\n"); } +static inline void guc_log_context(struct drm_printer *p, + struct intel_context *ce) +{ + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); + drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + drm_printf(p, "\t\tContext Pin Count: %u\n", + atomic_read(&ce->pin_count)); + drm_printf(p, "\t\tGuC ID Ref Count: %u\n", + atomic_read(&ce->guc_id.ref)); + drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + ce->guc_state.sched_state); +} + void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p) { struct intel_context *ce; unsigned long index; + unsigned long flags; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); - drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); - drm_printf(p, "\t\tContext Pin Count: %u\n", - atomic_read(&ce->pin_count)); - drm_printf(p, "\t\tGuC ID Ref Count: %u\n", - atomic_read(&ce->guc_id_ref)); - drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", - ce->guc_state.sched_state, - atomic_read(&ce->guc_sched_state_no_lock)); + GEM_BUG_ON(intel_context_is_child(ce)); + guc_log_context(p, ce); guc_log_context_priority(p, ce); + + if (intel_context_is_parent(ce)) { + struct guc_process_desc *desc = __get_process_desc(ce); + struct intel_context *child; + + drm_printf(p, "\t\tNumber children: %u\n", + ce->parallel.number_children); + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(desc->head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(desc->tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(desc->wq_status)); + + if (ce->engine->emit_bb_start == + emit_bb_start_parent_no_preempt_mid_batch) { + u8 i; + + drm_printf(p, "\t\tChildren Go: %u\n\n", + get_children_go_value(ce)); + for (i = 0; i < ce->parallel.number_children; ++i) + drm_printf(p, "\t\tChildren Join: %u\n", + get_children_join_value(ce, i)); + } + + for_each_child(ce, child) + guc_log_context(p, child); + } + } + xa_unlock_irqrestore(&guc->context_lookup, flags); +} + +static inline u32 get_children_go_addr(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + return i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce) + + offsetof(struct parent_scratch, go.semaphore); +} + +static inline u32 get_children_join_addr(struct intel_context *ce, + u8 child_index) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + return i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce) + + offsetof(struct parent_scratch, join[child_index].semaphore); +} + +#define PARENT_GO_BB 1 +#define PARENT_GO_FINI_BREADCRUMB 0 +#define CHILD_GO_BB 1 +#define CHILD_GO_FINI_BREADCRUMB 0 +static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + struct intel_context *ce = rq->context; + u32 *cs; + u8 i; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Wait on children */ + for (i = 0; i < ce->parallel.number_children; ++i) { + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = PARENT_GO_BB; + *cs++ = get_children_join_addr(ce, i); + *cs++ = 0; + } + + /* Turn off preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + + /* Tell children go */ + cs = gen8_emit_ggtt_write(cs, + CHILD_GO_BB, + get_children_go_addr(ce), + 0); + + /* Jump to batch */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + struct intel_context *ce = rq->context; + struct intel_context *parent = intel_context_to_parent(ce); + u32 *cs; + + GEM_BUG_ON(!intel_context_is_child(ce)); + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Signal parent */ + cs = gen8_emit_ggtt_write(cs, + PARENT_GO_BB, + get_children_join_addr(parent, + ce->parallel.child_index), + 0); + + /* Wait on parent for go */ + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = CHILD_GO_BB; + *cs++ = get_children_go_addr(parent); + *cs++ = 0; + + /* Turn off preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* Jump to batch */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 * +__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + u8 i; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + /* Wait on children */ + for (i = 0; i < ce->parallel.number_children; ++i) { + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = PARENT_GO_FINI_BREADCRUMB; + *cs++ = get_children_join_addr(ce, i); + *cs++ = 0; + } + + /* Turn on preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + /* Tell children go */ + cs = gen8_emit_ggtt_write(cs, + CHILD_GO_FINI_BREADCRUMB, + get_children_go_addr(ce), + 0); + + return cs; +} + +/* + * If this true, a submission of multi-lrc requests had an error and the + * requests need to be skipped. The front end (execuf IOCTL) should've called + * i915_request_skip which squashes the BB but we still need to emit the fini + * breadrcrumbs seqno write. At this point we don't know how many of the + * requests in the multi-lrc submission were generated so we can't do the + * handshake between the parent and children (e.g. if 4 requests should be + * generated but 2nd hit an error only 1 would be seen by the GuC backend). + * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error + * has occurred on any of the requests in submission / relationship. + */ +static inline bool skip_handshake(struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); +} + +static u32 * +emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + if (unlikely(skip_handshake(rq))) { + /* + * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, + * the -6 comes from the length of the emits below. + */ + memset(cs, 0, sizeof(u32) * + (ce->engine->emit_fini_breadcrumb_dw - 6)); + cs += ce->engine->emit_fini_breadcrumb_dw - 6; + } else { + cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); + } + + /* Emit fini breadcrumb */ + cs = gen8_emit_ggtt_write(cs, + rq->fence.seqno, + i915_request_active_timeline(rq)->hwsp_offset, + 0); + + /* User interrupt */ + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + + return cs; +} + +static u32 * +__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + struct intel_context *parent = intel_context_to_parent(ce); + + GEM_BUG_ON(!intel_context_is_child(ce)); + + /* Turn on preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + /* Signal parent */ + cs = gen8_emit_ggtt_write(cs, + PARENT_GO_FINI_BREADCRUMB, + get_children_join_addr(parent, + ce->parallel.child_index), + 0); + + /* Wait parent on for go */ + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = CHILD_GO_FINI_BREADCRUMB; + *cs++ = get_children_go_addr(parent); + *cs++ = 0; + + return cs; +} + +static u32 * +emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + + GEM_BUG_ON(!intel_context_is_child(ce)); + + if (unlikely(skip_handshake(rq))) { + /* + * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, + * the -6 comes from the length of the emits below. + */ + memset(cs, 0, sizeof(u32) * + (ce->engine->emit_fini_breadcrumb_dw - 6)); + cs += ce->engine->emit_fini_breadcrumb_dw - 6; + } else { + cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); } + + /* Emit fini breadcrumb */ + cs = gen8_emit_ggtt_write(cs, + rq->fence.seqno, + i915_request_active_timeline(rq)->hwsp_offset, + 0); + + /* User interrupt */ + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + + return cs; } static struct intel_context * -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags) { struct guc_virtual_engine *ve; struct intel_guc *guc; @@ -2981,6 +4272,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) } ve->base.mask |= sibling->mask; + ve->base.logical_mask |= sibling->logical_mask; if (n != 0 && ve->base.class != sibling->class) { DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", @@ -3036,3 +4328,8 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#include "selftest_guc_multi_lrc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2..ff4b6869b80b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); + err = PTR_ERR(vaddr); + goto unpin_out; } copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); - GEM_BUG_ON(copied < huc->fw.rsa_size); - i915_gem_object_unpin_map(vma->obj); + if (copied < huc->fw.rsa_size) { + err = -ENOMEM; + goto unpin_out; + } + huc->rsa_data = vma; return 0; + +unpin_out: + i915_vma_unpin_and_release(&vma, 0); + return err; } static void intel_huc_rsa_data_destroy(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c index 5733c15fd123..15998963b863 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c @@ -5,7 +5,7 @@ #include <drm/drm_print.h> -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_huc.h" #include "intel_huc_debugfs.h" @@ -21,11 +21,11 @@ static int huc_info_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(huc_info); void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "huc_info", &huc_info_fops, NULL }, }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 86c318516e14..2fef3b0bbe95 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc) } /* Intermediate platforms are HuC authentication only */ - if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_S(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 089d98662f46..c2f7924295e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -6,7 +6,7 @@ #include <linux/debugfs.h> #include <drm/drm_print.h> -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_guc_debugfs.h" #include "intel_huc_debugfs.h" #include "intel_uc.h" @@ -32,11 +32,11 @@ static int uc_usage_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(uc_usage); void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "usage", &uc_usage_fops, NULL }, }; struct dentry *root; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3a16d08608a5..3aa87be4f2e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -7,6 +7,7 @@ #include <linux/firmware.h> #include <drm/drm_print.h> +#include "gem/i915_gem_lmem.h" #include "intel_uc_fw.h" #include "intel_uc_fw_abi.h" #include "i915_drv.h" @@ -50,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ @@ -370,7 +372,14 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; - obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (HAS_LMEM(i915)) { + obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); + if (!IS_ERR(obj)) + obj->flags |= I915_BO_ALLOC_PM_EARLY; + } else { + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + } + if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fail; @@ -413,20 +422,25 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - struct i915_vma dummy = { - .node.start = uc_fw_ggtt_offset(uc_fw), - .node.size = obj->base.size, - .pages = obj->mm.pages, - .vm = &ggtt->vm, - }; + struct i915_vma *dummy = &uc_fw->dummy; + u32 pte_flags = 0; + + dummy->node.start = uc_fw_ggtt_offset(uc_fw); + dummy->node.size = obj->base.size; + dummy->pages = obj->mm.pages; + dummy->vm = &ggtt->vm; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size); /* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy.pages); + if (i915_gem_object_has_struct_page(obj)) + drm_clflush_sg(dummy->pages); - ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); + if (i915_gem_object_is_lmem(obj)) + pte_flags |= PTE_LM; + + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); } static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) @@ -585,13 +599,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) */ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) { - struct sg_table *pages = uc_fw->obj->mm.pages; + struct intel_memory_region *mr = uc_fw->obj->mm.region; u32 size = min_t(u32, uc_fw->rsa_size, max_len); u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size; + struct sgt_iter iter; + size_t count = 0; + int idx; + /* Called during reset handling, must be atomic [no fs_reclaim] */ GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw)); - return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset); + idx = offset >> PAGE_SHIFT; + offset = offset_in_page(offset); + if (i915_gem_object_has_struct_page(uc_fw->obj)) { + struct page *page; + + for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = kmap_atomic(page); + memcpy(dst, vaddr + offset, len); + kunmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } else { + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void __iomem *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = io_mapping_map_atomic_wc(&mr->iomap, + addr - mr->region.start); + memcpy_fromio(dst, vaddr + offset, len); + io_mapping_unmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } + + return count; } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 99bb1fe1af66..1e00bf65639e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -10,6 +10,7 @@ #include "intel_uc_fw_abi.h" #include "intel_device_info.h" #include "i915_gem.h" +#include "i915_vma.h" struct drm_printer; struct drm_i915_private; @@ -75,6 +76,14 @@ struct intel_uc_fw { bool user_overridden; size_t size; struct drm_i915_gem_object *obj; + /** + * @dummy: A vma used in binding the uc fw to ggtt. We can't define this + * vma on the stack as it can lead to a stack overflow, so we define it + * here. Safe to have 1 copy per uc fw because the binding is single + * threaded as it done during driver load (inherently single threaded) + * or during a GT reset (mutex guarantees single threaded). + */ + struct i915_vma dummy; /* * The firmware build process will generate a version header file with major and diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index 000000000000..fb0e4a7bd8ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, + struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(&rq->submit, + &from->fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + int i; + struct i915_request *last[3] = {NULL, NULL, NULL}, *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context *ce; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + + /* Submit requests and inject errors forcing G2H to be dropped */ + for (i = 0; i < 3; ++i) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + pr_err("Failed to create context, %d: %d\n", i, ret); + goto err; + } + + switch (i) { + case 0: + ce->drop_schedule_enable = true; + break; + case 1: + ce->drop_schedule_disable = true; + break; + case 2: + ce->drop_deregister = true; + break; + } + + rq = nop_user_request(ce, NULL); + intel_context_put(ce); + + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to create request, %d: %d\n", i, ret); + goto err; + } + + last[i] = rq; + } + + for (i = 0; i < 3; ++i) { + ret = i915_request_wait(last[i], 0, HZ); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err; + } + i915_request_put(last[i]); + last[i] = NULL; + } + + /* Force all H2G / G2H to be submitted / processed */ + intel_gt_retire_requests(gt); + msleep(500); + + /* Scrub missing G2H */ + intel_gt_handle_error(engine->gt, -1, 0, "selftest reset"); + + /* GT will not idle if G2H are lost */ + ret = intel_gt_wait_for_idle(gt, HZ); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err; + } + +err: + for (i = 0; i < 3; ++i) + if (last[i]) + i915_request_put(last[i]); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return ret; +} + +int intel_guc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_scrub_ctbs), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c new file mode 100644 index 000000000000..50953c8e8b53 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2019 Intel Corporation + */ + +#include "selftests/igt_spinner.h" +#include "selftests/igt_reset.h" +#include "selftests/intel_scheduler_helpers.h" +#include "gt/intel_engine_heartbeat.h" +#include "gem/selftests/mock_context.h" + +static void logical_sort(struct intel_engine_cs **engines, int num_engines) +{ + struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1]; + int i, j; + + for (i = 0; i < num_engines; ++i) + for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) { + if (engines[j]->logical_mask & BIT(i)) { + sorted[i] = engines[j]; + break; + } + } + + memcpy(*engines, *sorted, + sizeof(struct intel_engine_cs *) * num_engines); +} + +static struct intel_context * +multi_lrc_create_parent(struct intel_gt *gt, u8 class, + unsigned long flags) +{ + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int i = 0; + + for_each_engine(engine, gt, id) { + if (engine->class != class) + continue; + + siblings[i++] = engine; + } + + if (i <= 1) + return ERR_PTR(0); + + logical_sort(siblings, i); + + return intel_engine_create_parallel(siblings, 1, i); +} + +static void multi_lrc_context_unpin(struct intel_context *ce) +{ + struct intel_context *child; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + for_each_child(ce, child) + intel_context_unpin(child); + intel_context_unpin(ce); +} + +static void multi_lrc_context_put(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + /* + * Only the parent gets the creation ref put in the uAPI, the parent + * itself is responsible for creation ref put on the children. + */ + intel_context_put(ce); +} + +static struct i915_request * +multi_lrc_nop_request(struct intel_context *ce) +{ + struct intel_context *child; + struct i915_request *rq, *child_rq; + int i = 0; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + for_each_child(ce, child) { + child_rq = intel_context_create_request(child); + if (IS_ERR(child_rq)) + goto child_error; + + if (++i == ce->parallel.number_children) + set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, + &child_rq->fence.flags); + i915_request_add(child_rq); + } + + return rq; + +child_error: + i915_request_put(rq); + + return ERR_PTR(-ENOMEM); +} + +static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class) +{ + struct intel_context *parent; + struct i915_request *rq; + int ret; + + parent = multi_lrc_create_parent(gt, class, 0); + if (IS_ERR(parent)) { + pr_err("Failed creating contexts: %ld", PTR_ERR(parent)); + return PTR_ERR(parent); + } else if (!parent) { + pr_debug("Not enough engines in class: %d", class); + return 0; + } + + rq = multi_lrc_nop_request(parent); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed creating requests: %d", ret); + goto out; + } + + ret = intel_selftest_wait_for_rq(rq); + if (ret) + pr_err("Failed waiting on request: %d", ret); + + i915_request_put(rq); + + if (ret >= 0) { + ret = intel_gt_wait_for_idle(gt, HZ * 5); + if (ret < 0) + pr_err("GT failed to idle: %d\n", ret); + } + +out: + multi_lrc_context_unpin(parent); + multi_lrc_context_put(parent); + return ret; +} + +static int intel_guc_multi_lrc_basic(void *arg) +{ + struct intel_gt *gt = arg; + unsigned int class; + int ret; + + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + ret = __intel_guc_multi_lrc_basic(gt, class); + if (ret) + return ret; + } + + return 0; +} + +int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_multi_lrc_basic), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} |