summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c47
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c7
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h56
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h153
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c150
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c55
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c104
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h (renamed from drivers/gpu/drm/i915/gt/debugfs_gt.h)18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c (renamed from drivers/gpu/drm/i915/gt/debugfs_engines.c)10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c (renamed from drivers/gpu/drm/i915/gt/debugfs_gt_pm.c)197
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c93
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c176
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c262
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.h2
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c28
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h119
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c28
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c60
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h34
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2299
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c93
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c127
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c179
73 files changed, 3889 insertions, 1084 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
deleted file mode 100644
index f69257eaa1cc..000000000000
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef DEBUGFS_ENGINES_H
-#define DEBUGFS_ENGINES_H
-
-struct intel_gt;
-struct dentry;
-
-void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
-
-#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
deleted file mode 100644
index 591eb60785db..000000000000
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#include <linux/debugfs.h>
-
-#include "debugfs_engines.h"
-#include "debugfs_gt.h"
-#include "debugfs_gt_pm.h"
-#include "intel_sseu_debugfs.h"
-#include "uc/intel_uc_debugfs.h"
-#include "i915_drv.h"
-
-void debugfs_gt_register(struct intel_gt *gt)
-{
- struct dentry *root;
-
- if (!gt->i915->drm.primary->debugfs_root)
- return;
-
- root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
- if (IS_ERR(root))
- return;
-
- debugfs_engines_register(gt, root);
- debugfs_gt_pm_register(gt, root);
- intel_sseu_debugfs_register(gt, root);
-
- intel_uc_debugfs_register(&gt->uc, root);
-}
-
-void intel_gt_debugfs_register_files(struct dentry *root,
- const struct debugfs_gt_file *files,
- unsigned long count, void *data)
-{
- while (count--) {
- umode_t mode = files->fops->write ? 0644 : 0444;
-
- if (!files->eval || files->eval(data))
- debugfs_create_file(files->name,
- mode, root, data,
- files->fops);
-
- files++;
- }
-}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
deleted file mode 100644
index 4cf5f5c9da7d..000000000000
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef DEBUGFS_GT_PM_H
-#define DEBUGFS_GT_PM_H
-
-struct intel_gt;
-struct dentry;
-
-void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
-
-#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1aee5e6b1b23..890191f286e3 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->flush);
mutex_init(&ppgtt->pin_mutex);
- ppgtt_init(&ppgtt->base, gt);
+ ppgtt_init(&ppgtt->base, gt, 0);
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 6e0e52eeb87a..037a9a6e4889 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
I915_GTT_PAGE_SIZE_2M)))) {
vaddr = px_vaddr(pd);
vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+ clflush_cache_range(vaddr, PAGE_SIZE);
page_size = I915_GTT_PAGE_SIZE_64K;
/*
@@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
for (i = 1; i < index; i += 16)
memset64(vaddr + i, encode, 15);
+ clflush_cache_range(vaddr, PAGE_SIZE);
}
}
@@ -751,7 +753,8 @@ err_pd:
* space.
*
*/
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
int err;
@@ -760,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
- ppgtt_init(ppgtt, gt);
+ ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index b9028c2ad3c7..f541d19264b4 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -12,7 +12,9 @@ struct i915_address_space;
struct intel_gt;
enum i915_cache_level;
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
+
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 17ca4dc4d0cb..5634d14052bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_post_unpin;
+ intel_engine_pm_might_get(ce->engine);
+
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
@@ -395,19 +397,22 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
spin_lock_init(&ce->guc_state.lock);
INIT_LIST_HEAD(&ce->guc_state.fences);
+ INIT_LIST_HEAD(&ce->guc_state.requests);
+
+ ce->guc_id.id = GUC_INVALID_LRC_ID;
+ INIT_LIST_HEAD(&ce->guc_id.link);
- spin_lock_init(&ce->guc_active.lock);
- INIT_LIST_HEAD(&ce->guc_active.requests);
+ INIT_LIST_HEAD(&ce->destroyed_link);
- ce->guc_id = GUC_INVALID_LRC_ID;
- INIT_LIST_HEAD(&ce->guc_id_link);
+ INIT_LIST_HEAD(&ce->parallel.child_list);
/*
* Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding.
*/
- i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify);
- i915_sw_fence_commit(&ce->guc_blocked);
+ i915_sw_fence_init(&ce->guc_state.blocked,
+ sw_fence_dummy_notify);
+ i915_sw_fence_commit(&ce->guc_state.blocked);
i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire, 0);
@@ -415,13 +420,20 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
void intel_context_fini(struct intel_context *ce)
{
+ struct intel_context *child, *next;
+
if (ce->timeline)
intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm);
+ /* Need to put the creation ref for the children */
+ if (intel_context_is_parent(ce))
+ for_each_child_safe(ce, child, next)
+ intel_context_put(child);
+
mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
- i915_sw_fence_fini(&ce->guc_blocked);
+ i915_sw_fence_fini(&ce->guc_state.blocked);
}
void i915_context_module_exit(void)
@@ -517,24 +529,53 @@ retry:
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
{
+ struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
unsigned long flags;
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
- spin_lock_irqsave(&ce->guc_active.lock, flags);
- list_for_each_entry_reverse(rq, &ce->guc_active.requests,
+ /*
+ * We search the parent list to find an active request on the submitted
+ * context. The parent list contains the requests for all the contexts
+ * in the relationship so we have to do a compare of each request's
+ * context.
+ */
+ spin_lock_irqsave(&parent->guc_state.lock, flags);
+ list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) {
+ if (rq->context != ce)
+ continue;
if (i915_request_completed(rq))
break;
active = rq;
}
- spin_unlock_irqrestore(&ce->guc_active.lock, flags);
+ spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active;
}
+void intel_context_bind_parent_child(struct intel_context *parent,
+ struct intel_context *child)
+{
+ /*
+ * Callers responsibility to validate that this function is used
+ * correctly but we use GEM_BUG_ON here ensure that they do.
+ */
+ GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
+ GEM_BUG_ON(intel_context_is_pinned(parent));
+ GEM_BUG_ON(intel_context_is_child(parent));
+ GEM_BUG_ON(intel_context_is_pinned(child));
+ GEM_BUG_ON(intel_context_is_child(child));
+ GEM_BUG_ON(intel_context_is_parent(child));
+
+ parent->parallel.child_index = parent->parallel.number_children++;
+ list_add_tail(&child->parallel.child_link,
+ &parent->parallel.child_list);
+ child->parallel.parent = parent;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..246c37d72cd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce);
int intel_context_reconfigure_sseu(struct intel_context *ce,
const struct intel_sseu sseu);
+#define PARENT_SCRATCH_SIZE PAGE_SIZE
+
+static inline bool intel_context_is_child(struct intel_context *ce)
+{
+ return !!ce->parallel.parent;
+}
+
+static inline bool intel_context_is_parent(struct intel_context *ce)
+{
+ return !!ce->parallel.number_children;
+}
+
+static inline bool intel_context_is_pinned(struct intel_context *ce);
+
+static inline struct intel_context *
+intel_context_to_parent(struct intel_context *ce)
+{
+ if (intel_context_is_child(ce)) {
+ /*
+ * The parent holds ref count to the child so it is always safe
+ * for the parent to access the child, but the child has a
+ * pointer to the parent without a ref. To ensure this is safe
+ * the child should only access the parent pointer while the
+ * parent is pinned.
+ */
+ GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+
+ return ce->parallel.parent;
+ } else {
+ return ce;
+ }
+}
+
+static inline bool intel_context_is_parallel(struct intel_context *ce)
+{
+ return intel_context_is_child(ce) || intel_context_is_parent(ce);
+}
+
+void intel_context_bind_parent_child(struct intel_context *parent,
+ struct intel_context *child);
+
+#define for_each_child(parent, ce)\
+ list_for_each_entry(ce, &(parent)->parallel.child_list,\
+ parallel.child_link)
+#define for_each_child_safe(parent, ce, cn)\
+ list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\
+ parallel.child_link)
+
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context
@@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline;
int err;
- err = mutex_lock_interruptible(&tl->mutex);
+ if (intel_context_is_parent(ce))
+ err = mutex_lock_interruptible_nested(&tl->mutex, 0);
+ else if (intel_context_is_child(ce))
+ err = mutex_lock_interruptible_nested(&tl->mutex,
+ ce->parallel.child_index + 1);
+ else
+ err = mutex_lock_interruptible(&tl->mutex);
if (err)
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..9e0177dc5484 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,9 +55,13 @@ struct intel_context_ops {
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
- /* virtual engine/context interface */
+ /* virtual/parallel engine/context interface */
struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
- unsigned int count);
+ unsigned int count,
+ unsigned long flags);
+ struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width);
struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
unsigned int sibling);
};
@@ -112,6 +116,8 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
+#define CONTEXT_GUC_INIT 10
+#define CONTEXT_PERMA_PIN 11
struct {
u64 timeout_us;
@@ -152,52 +158,141 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+ /**
+ * pinned_contexts_link: List link for the engine's pinned contexts.
+ * This is only used if this is a perma-pinned kernel context and
+ * the list is assumed to only be manipulated during driver load
+ * or unload time so no mutex protection currently.
+ */
+ struct list_head pinned_contexts_link;
+
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
struct {
- /** lock: protects everything in guc_state */
+ /** @lock: protects everything in guc_state */
spinlock_t lock;
/**
- * sched_state: scheduling state of this context using GuC
+ * @sched_state: scheduling state of this context using GuC
* submission
*/
- u16 sched_state;
+ u32 sched_state;
/*
- * fences: maintains of list of requests that have a submit
- * fence related to GuC submission
+ * @fences: maintains a list of requests that are currently
+ * being fenced until a GuC operation completes
*/
struct list_head fences;
+ /**
+ * @blocked: fence used to signal when the blocking of a
+ * context's submissions is complete.
+ */
+ struct i915_sw_fence blocked;
+ /** @number_committed_requests: number of committed requests */
+ int number_committed_requests;
+ /** @requests: list of active requests on this context */
+ struct list_head requests;
+ /** @prio: the context's current guc priority */
+ u8 prio;
+ /**
+ * @prio_count: a counter of the number requests in flight in
+ * each priority bucket
+ */
+ u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
} guc_state;
struct {
- /** lock: protects everything in guc_active */
- spinlock_t lock;
- /** requests: active requests on this context */
- struct list_head requests;
- } guc_active;
-
- /* GuC scheduling state flags that do not require a lock. */
- atomic_t guc_sched_state_no_lock;
-
- /* GuC LRC descriptor ID */
- u16 guc_id;
+ /**
+ * @id: handle which is used to uniquely identify this context
+ * with the GuC, protected by guc->submission_state.lock
+ */
+ u16 id;
+ /**
+ * @ref: the number of references to the guc_id, when
+ * transitioning in and out of zero protected by
+ * guc->submission_state.lock
+ */
+ atomic_t ref;
+ /**
+ * @link: in guc->guc_id_list when the guc_id has no refs but is
+ * still valid, protected by guc->submission_state.lock
+ */
+ struct list_head link;
+ } guc_id;
- /* GuC LRC descriptor reference count */
- atomic_t guc_id_ref;
+ /**
+ * @destroyed_link: link in guc->submission_state.destroyed_contexts, in
+ * list when context is pending to be destroyed (deregistered with the
+ * GuC), protected by guc->submission_state.lock
+ */
+ struct list_head destroyed_link;
- /*
- * GuC ID link - in list when unpinned but guc_id still valid in GuC
+ /** @parallel: sub-structure for parallel submission members */
+ struct {
+ union {
+ /**
+ * @child_list: parent's list of children
+ * contexts, no protection as immutable after context
+ * creation
+ */
+ struct list_head child_list;
+ /**
+ * @child_link: child's link into parent's list of
+ * children
+ */
+ struct list_head child_link;
+ };
+ /** @parent: pointer to parent if child */
+ struct intel_context *parent;
+ /**
+ * @last_rq: last request submitted on a parallel context, used
+ * to insert submit fences between requests in the parallel
+ * context
+ */
+ struct i915_request *last_rq;
+ /**
+ * @fence_context: fence context composite fence when doing
+ * parallel submission
+ */
+ u64 fence_context;
+ /**
+ * @seqno: seqno for composite fence when doing parallel
+ * submission
+ */
+ u32 seqno;
+ /** @number_children: number of children if parent */
+ u8 number_children;
+ /** @child_index: index into child_list if child */
+ u8 child_index;
+ /** @guc: GuC specific members for parallel submission */
+ struct {
+ /** @wqi_head: head pointer in work queue */
+ u16 wqi_head;
+ /** @wqi_tail: tail pointer in work queue */
+ u16 wqi_tail;
+ /**
+ * @parent_page: page in context state (ce->state) used
+ * by parent for work queue, process descriptor
+ */
+ u8 parent_page;
+ } guc;
+ } parallel;
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+ /**
+ * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
*/
- struct list_head guc_id_link;
+ bool drop_schedule_enable;
- /* GuC context blocked fence */
- struct i915_sw_fence guc_blocked;
+ /**
+ * @drop_schedule_disable: Force drop of schedule disable G2H for
+ * selftest
+ */
+ bool drop_schedule_disable;
- /*
- * GuC priority management
+ /**
+ * @drop_deregister: Force drop of deregister G2H for selftest
*/
- u8 guc_prio;
- u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM];
+ bool drop_deregister;
+#endif
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 87579affb952..08559ace0ada 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -2,6 +2,7 @@
#ifndef _INTEL_RINGBUFFER_H_
#define _INTEL_RINGBUFFER_H_
+#include <asm/cacheflush.h>
#include <drm/drm_util.h>
#include <linux/hashtable.h>
@@ -175,6 +176,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32))
+#define I915_GEM_HWS_PXP 0x60
+#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32))
#define I915_GEM_HWS_SCRATCH 0x80
#define I915_HWS_CSB_BUF0_INDEX 0x10
@@ -273,15 +276,25 @@ static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return false;
return intel_engine_has_preemption(engine);
}
+#define FORCE_VIRTUAL BIT(0)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
- unsigned int count);
+ unsigned int count, unsigned long flags);
+
+static inline struct intel_context *
+intel_engine_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_engines,
+ unsigned int width)
+{
+ GEM_BUG_ON(!engines[0]->cops->create_parallel);
+ return engines[0]->cops->create_parallel(engines, num_engines, width);
+}
static inline bool
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
@@ -300,7 +313,7 @@ intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return false;
if (intel_engine_is_virtual(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0d9105a31d84..ff6753ccb129 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
GEM_DEBUG_WARN_ON(iir);
}
-static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
+ u8 logical_instance)
{
const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915;
@@ -320,6 +321,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
+ INIT_LIST_HEAD(&engine->pinned_contexts_list);
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
@@ -334,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->class = info->class;
engine->instance = info->instance;
+ engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
engine->props.heartbeat_interval_ms =
@@ -398,7 +401,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
- if (GRAPHICS_VER(i915) >= 9)
+ if (GRAPHICS_VER(i915) >= 9 &&
+ engine->gt->info.sfc_mask & BIT(engine->instance))
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
}
@@ -474,18 +478,25 @@ void intel_engines_free(struct intel_gt *gt)
}
static
-bool gen11_vdbox_has_sfc(struct drm_i915_private *i915,
+bool gen11_vdbox_has_sfc(struct intel_gt *gt,
unsigned int physical_vdbox,
unsigned int logical_vdbox, u16 vdbox_mask)
{
+ struct drm_i915_private *i915 = gt->i915;
+
/*
* In Gen11, only even numbered logical VDBOXes are hooked
* up to an SFC (Scaler & Format Converter) unit.
* In Gen12, Even numbered physical instance always are connected
* to an SFC. Odd numbered physical instances have SFC only if
* previous even instance is fused off.
+ *
+ * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
+ * in the fuse register that tells us whether a specific SFC is present.
*/
- if (GRAPHICS_VER(i915) == 12)
+ if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
+ return false;
+ else if (GRAPHICS_VER(i915) == 12)
return (physical_vdbox % 2 == 0) ||
!(BIT(physical_vdbox - 1) & vdbox_mask);
else if (GRAPHICS_VER(i915) == 11)
@@ -512,7 +523,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
unsigned int logical_vdbox = 0;
unsigned int i;
- u32 media_fuse;
+ u32 media_fuse, fuse1;
u16 vdbox_mask;
u16 vebox_mask;
@@ -534,6 +545,13 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
GEN11_GT_VEBOX_DISABLE_SHIFT;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1);
+ gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
+ } else {
+ gt->info.sfc_mask = ~0;
+ }
+
for (i = 0; i < I915_MAX_VCS; i++) {
if (!HAS_ENGINE(gt, _VCS(i))) {
vdbox_mask &= ~BIT(i);
@@ -546,7 +564,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
continue;
}
- if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask))
+ if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
gt->info.vdbox_sfc_access |= BIT(i);
logical_vdbox++;
}
@@ -572,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
return info->engine_mask;
}
+static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
+ u8 class, const u8 *map, u8 num_instances)
+{
+ int i, j;
+ u8 current_logical_id = 0;
+
+ for (j = 0; j < num_instances; ++j) {
+ for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+ if (!HAS_ENGINE(gt, i) ||
+ intel_engines[i].class != class)
+ continue;
+
+ if (intel_engines[i].instance == map[j]) {
+ logical_ids[intel_engines[i].instance] =
+ current_logical_id++;
+ break;
+ }
+ }
+ }
+}
+
+static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
+{
+ int i;
+ u8 map[MAX_ENGINE_INSTANCE + 1];
+
+ for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+ map[i] = i;
+ populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+}
+
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt
@@ -583,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
- unsigned int i;
+ unsigned int i, class;
+ u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err;
drm_WARN_ON(&i915->drm, engine_mask == 0);
@@ -593,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
if (i915_inject_probe_failure(i915))
return -ENODEV;
- for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
- if (!HAS_ENGINE(gt, i))
- continue;
+ for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+ setup_logical_ids(gt, logical_ids, class);
- err = intel_engine_setup(gt, i);
- if (err)
- goto cleanup;
+ for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+ u8 instance = intel_engines[i].instance;
- mask |= BIT(i);
+ if (intel_engines[i].class != class ||
+ !HAS_ENGINE(gt, i))
+ continue;
+
+ err = intel_engine_setup(gt, i,
+ logical_ids[instance]);
+ if (err)
+ goto cleanup;
+
+ mask |= BIT(i);
+ }
}
/*
@@ -875,6 +933,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
return ERR_PTR(err);
}
+ list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
+
/*
* Give our perma-pinned kernel timelines a separate lockdep class,
* so that we can use them from within the normal user timelines
@@ -897,6 +957,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
list_del(&ce->timeline->engine_link);
mutex_unlock(&hwsp->vm->mutex);
+ list_del(&ce->pinned_contexts_link);
intel_context_unpin(ce);
intel_context_put(ce);
}
@@ -1163,16 +1224,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
u32 mmio_base = engine->mmio_base;
int slice;
int subslice;
+ int iter;
memset(instdone, 0, sizeof(*instdone));
- switch (GRAPHICS_VER(i915)) {
- default:
+ if (GRAPHICS_VER(i915) >= 8) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id != RCS0)
- break;
+ return;
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
@@ -1182,21 +1243,39 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
instdone->slice_common_extra[1] =
intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
}
- for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
- instdone->sampler[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_SAMPLER_INSTDONE);
- instdone->row[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_ROW_INSTDONE);
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+ instdone->sampler[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_SAMPLER_INSTDONE);
+ instdone->row[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_ROW_INSTDONE);
+ }
+ } else {
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
+ instdone->sampler[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_SAMPLER_INSTDONE);
+ instdone->row[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_ROW_INSTDONE);
+ }
}
- break;
- case 7:
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice)
+ instdone->geom_svg[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ XEHPG_INSTDONE_GEOM_SVG);
+ }
+ } else if (GRAPHICS_VER(i915) >= 7) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id != RCS0)
- break;
+ return;
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
@@ -1204,22 +1283,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
instdone->row[0][0] =
intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
-
- break;
- case 6:
- case 5:
- case 4:
+ } else if (GRAPHICS_VER(i915) >= 4) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id == RCS0)
/* HACK: Using the wrong struct member */
instdone->slice_common =
intel_uncore_read(uncore, GEN4_INSTDONE1);
- break;
- case 3:
- case 2:
+ } else {
instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
- break;
}
}
@@ -1881,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
- unsigned int count)
+ unsigned int count, unsigned long flags)
{
if (count == 0)
return ERR_PTR(-EINVAL);
- if (count == 1)
+ if (count == 1 && !(flags & FORCE_VIRTUAL))
return intel_context_create(siblings[0]);
GEM_BUG_ON(!siblings[0]->cops->create_virtual);
- return siblings[0]->cops->create_virtual(siblings, count);
+ return siblings[0]->cops->create_virtual(siblings, count, flags);
}
struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 74775ae961b2..a3698f611f45 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -207,7 +207,7 @@ out:
void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return;
next_heartbeat(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 1f07ac4e0672..a1334b48dde7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
unsigned long flags;
bool result = true;
+ /*
+ * This is execlist specific behaviour intended to ensure the GPU is
+ * idle by switching to a known 'safe' context. With GuC submission, the
+ * same idle guarantee is achieved by other means (disabling
+ * scheduling). Further, switching to a 'safe' context has no effect
+ * with GuC submission as the scheduler can just switch back again.
+ *
+ * FIXME: Move this backend scheduler specific behaviour into the
+ * scheduler backend.
+ */
+ if (intel_engine_uses_guc(engine))
+ return true;
+
/* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt))
return true;
@@ -298,6 +311,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
intel_engine_init_heartbeat(engine);
}
+/**
+ * intel_engine_reset_pinned_contexts - Reset the pinned contexts of
+ * an engine.
+ * @engine: The engine whose pinned contexts we want to reset.
+ *
+ * Typically the pinned context LMEM images lose or get their content
+ * corrupted on suspend. This function resets their images.
+ */
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+
+ list_for_each_entry(ce, &engine->pinned_contexts_list,
+ pinned_contexts_link) {
+ /* kernel context gets reset at __engine_unpark() */
+ if (ce == engine->kernel_context)
+ continue;
+
+ dbg_poison_ce(ce);
+ ce->ops->reset(ce);
+ }
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_engine_pm.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 70ea46d6cfb0..d68675925b79 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -6,9 +6,11 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
+#include "i915_drv.h"
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
+#include "intel_gt_pm.h"
static inline bool
intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
@@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
return intel_wakeref_is_active(&engine->wakeref);
}
+static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+ __intel_wakeref_get(&engine->wakeref);
+}
+
static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
{
intel_wakeref_get(&engine->wakeref);
@@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref);
}
+static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_is_virtual(engine)) {
+ intel_wakeref_might_get(&engine->wakeref);
+ } else {
+ struct intel_gt *gt = engine->gt;
+ struct intel_engine_cs *tengine;
+ intel_engine_mask_t tmp, mask = engine->mask;
+
+ for_each_engine_masked(tengine, gt, mask, tmp)
+ intel_wakeref_might_get(&tengine->wakeref);
+ }
+ intel_gt_pm_might_get(engine->gt);
+}
+
static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
{
intel_wakeref_put(&engine->wakeref);
@@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
}
+static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_is_virtual(engine)) {
+ intel_wakeref_might_put(&engine->wakeref);
+ } else {
+ struct intel_gt *gt = engine->gt;
+ struct intel_engine_cs *tengine;
+ intel_engine_mask_t tmp, mask = engine->mask;
+
+ for_each_engine_masked(tengine, gt, mask, tmp)
+ intel_wakeref_might_put(&tengine->wakeref);
+ }
+ intel_gt_pm_might_put(engine->gt);
+}
+
static inline struct i915_request *
intel_engine_create_kernel_request(struct intel_engine_cs *engine)
{
@@ -69,4 +106,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine)
void intel_engine_init__pm(struct intel_engine_cs *engine);
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine);
+
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ed91bcff20eb..e0f773585c29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -67,8 +67,11 @@ struct intel_instdone {
/* The following exist only in the RCS engine */
u32 slice_common;
u32 slice_common_extra[2];
- u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
- u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+ u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
+ u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
+
+ /* Added in XeHPG */
+ u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
};
/*
@@ -266,6 +269,13 @@ struct intel_engine_cs {
unsigned int guc_id;
intel_engine_mask_t mask;
+ /**
+ * @logical_mask: logical mask of engine, reported to user space via
+ * query IOCTL and used to communicate with the GuC in logical space.
+ * The logical instance of a physical engine can change based on product
+ * and fusing.
+ */
+ intel_engine_mask_t logical_mask;
u8 class;
u8 instance;
@@ -304,6 +314,13 @@ struct intel_engine_cs {
struct intel_context *kernel_context; /* pinned */
+ /**
+ * pinned_contexts_list: List of pinned contexts. This list is only
+ * assumed to be manipulated during driver load- or unload time and
+ * does therefore not have any additional protection.
+ */
+ struct list_head pinned_contexts_list;
+
intel_engine_mask_t saturated; /* submitting semaphores too late? */
struct {
@@ -546,7 +563,7 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_timeslices(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return false;
return engine->flags & I915_ENGINE_HAS_TIMESLICES;
@@ -578,4 +595,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
(instdone_has_subslice(dev_priv_, sseu_, slice_, \
subslice_)))
+
+#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
+ for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
+ (iter_) < GEN_MAX_SUBSLICES; \
+ (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
+ (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
+ for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
+
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..bedb80057046 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
}
static struct intel_context *
-execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
+execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags);
static struct i915_request *
__active_request(const struct intel_timeline * const tl,
@@ -2140,10 +2141,6 @@ static void __execlists_unhold(struct i915_request *rq)
if (p->flags & I915_DEPENDENCY_WEAK)
continue;
- /* Propagate any change in error status */
- if (rq->fence.error)
- i915_request_set_error_once(w, rq->fence.error);
-
if (w->engine != rq->engine)
continue;
@@ -2565,7 +2562,7 @@ __execlists_context_pre_pin(struct intel_context *ce,
if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) {
lrc_init_state(ce, engine, *vaddr);
- __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
+ __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
}
return 0;
@@ -2791,6 +2788,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void enable_error_interrupt(struct intel_engine_cs *engine)
@@ -3341,7 +3340,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
if (can_preempt(engine)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
- if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (CONFIG_DRM_I915_TIMESLICE_DURATION)
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
}
}
@@ -3786,7 +3785,8 @@ unlock:
}
static struct intel_context *
-execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
+execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags)
{
struct virtual_engine *ve;
unsigned int n;
@@ -3879,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
ve->siblings[ve->num_siblings++] = sibling;
ve->base.mask |= sibling->mask;
+ ve->base.logical_mask |= sibling->logical_mask;
/*
* All physical engines must be compatible for their emission
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index de3ac58fceec..57c97554393b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
struct i915_ppgtt *ppgtt;
int err;
- ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+ ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
atomic_set(&ggtt->vm.open, 0);
- rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
flush_workqueue(ggtt->vm.i915->wq);
mutex_lock(&ggtt->vm.mutex);
@@ -814,6 +813,21 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
return 0;
}
+static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
+{
+ /*
+ * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
+ * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
+ */
+ GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
+ return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
+}
+
+static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
+{
+ return gen6_gttmmadr_size(i915) / 2;
+}
+
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
@@ -822,8 +836,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
u32 pte_flags;
int ret;
- /* For Modern GENs the PTEs and register space are split in the BAR */
- phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+ GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
+ phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
/*
* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
@@ -910,6 +924,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
size = gen8_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
@@ -1373,13 +1388,31 @@ err_st_alloc:
}
static struct scatterlist *
-remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+remap_pages(struct drm_i915_gem_object *obj,
+ unsigned int offset, unsigned int alignment_pad,
unsigned int width, unsigned int height,
unsigned int src_stride, unsigned int dst_stride,
struct sg_table *st, struct scatterlist *sg)
{
unsigned int row;
+ if (!width || !height)
+ return sg;
+
+ if (alignment_pad) {
+ st->nents++;
+
+ /*
+ * The DE ignores the PTEs for the padding tiles, the sg entry
+ * here is just a convenience to indicate how many padding PTEs
+ * to insert at this spot.
+ */
+ sg_set_page(sg, NULL, alignment_pad * 4096, 0);
+ sg_dma_address(sg) = 0;
+ sg_dma_len(sg) = alignment_pad * 4096;
+ sg = sg_next(sg);
+ }
+
for (row = 0; row < height; row++) {
unsigned int left = width * I915_GTT_PAGE_SIZE;
@@ -1439,6 +1472,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *st;
struct scatterlist *sg;
+ unsigned int gtt_offset = 0;
int ret = -ENOMEM;
int i;
@@ -1455,10 +1489,19 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
sg = st->sgl;
for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
- sg = remap_pages(obj, rem_info->plane[i].offset,
+ unsigned int alignment_pad = 0;
+
+ if (rem_info->plane_alignment)
+ alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset;
+
+ sg = remap_pages(obj,
+ rem_info->plane[i].offset, alignment_pad,
rem_info->plane[i].width, rem_info->plane[i].height,
rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
st, sg);
+
+ gtt_offset += alignment_pad +
+ rem_info->plane[i].dst_stride * rem_info->plane[i].height;
}
i915_sg_trim(st);
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 1c3af0fc0456..f8253012d166 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -28,10 +28,13 @@
#define INSTR_26_TO_24_MASK 0x7000000
#define INSTR_26_TO_24_SHIFT 24
+#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
+
/*
* Memory interface instructions used by the kernel
*/
-#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+#define MI_INSTR(opcode, flags) \
+ (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
#define MI_GLOBAL_GTT (1<<22)
@@ -57,6 +60,7 @@
#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0)
#define MI_SUSPEND_FLUSH_EN (1<<0)
#define MI_SET_APPID MI_INSTR(0x0e, 0)
+#define MI_SET_APPID_SESSION_ID(x) ((x) << 0)
#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0)
#define MI_OVERLAY_CONTINUE (0x0<<21)
#define MI_OVERLAY_ON (0x1<<21)
@@ -146,6 +150,7 @@
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
+#define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22)
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
@@ -273,6 +278,19 @@
#define MI_MATH_REG_CF 0x33
/*
+ * Media instructions used by the kernel
+ */
+#define MEDIA_INSTR(pipe, op, sub_op, flags) \
+ (__INSTR(INSTR_RC_CLIENT) | (pipe) << INSTR_SUBCLIENT_SHIFT | \
+ (op) << INSTR_26_TO_24_SHIFT | (sub_op) << 16 | (flags))
+
+#define MFX_WAIT MEDIA_INSTR(1, 0, 0, 0)
+#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8)
+#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9)
+
+#define CRYPTO_KEY_EXCHANGE MEDIA_INSTR(2, 6, 9, 0)
+
+/*
* Commands used only by the command parser
*/
#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
@@ -328,8 +346,6 @@
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
-#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16))
-
#define COLOR_BLT ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c986642..1cb1948ac959 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,7 +3,7 @@
* Copyright © 2019 Intel Corporation
*/
-#include "debugfs_gt.h"
+#include "intel_gt_debugfs.h"
#include "gem/i915_gem_lmem.h"
#include "i915_drv.h"
@@ -15,12 +15,13 @@
#include "intel_gt_requests.h"
#include "intel_migrate.h"
#include "intel_mocs.h"
+#include "intel_pm.h"
#include "intel_rc6.h"
#include "intel_renderstate.h"
#include "intel_rps.h"
#include "intel_uncore.h"
-#include "intel_pm.h"
#include "shmem_utils.h"
+#include "pxp/intel_pxp.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -434,7 +435,7 @@ void intel_gt_driver_register(struct intel_gt *gt)
{
intel_rps_driver_register(&gt->rps);
- debugfs_gt_register(gt);
+ intel_gt_debugfs_register(gt);
}
static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -481,7 +482,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
static struct i915_address_space *kernel_vm(struct intel_gt *gt)
{
if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
- return &i915_ppgtt_create(gt)->vm;
+ return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
else
return i915_vm_get(&gt->ggtt->vm);
}
@@ -660,6 +661,8 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
return err;
+ intel_gt_init_workarounds(gt);
+
/*
* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
@@ -682,6 +685,8 @@ int intel_gt_init(struct intel_gt *gt)
goto err_pm;
}
+ intel_set_mocs_index(gt);
+
err = intel_engines_init(gt);
if (err)
goto err_engines;
@@ -710,6 +715,8 @@ int intel_gt_init(struct intel_gt *gt)
intel_migrate_init(&gt->migrate, gt);
+ intel_pxp_init(&gt->pxp);
+
goto out_fw;
err_gt:
__intel_gt_disable(gt);
@@ -737,6 +744,8 @@ void intel_gt_driver_remove(struct intel_gt *gt)
intel_uc_driver_remove(&gt->uc);
intel_engines_release(gt);
+
+ intel_gt_flush_buffer_pool(gt);
}
void intel_gt_driver_unregister(struct intel_gt *gt)
@@ -745,12 +754,14 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
intel_rps_driver_unregister(&gt->rps);
+ intel_pxp_fini(&gt->pxp);
+
/*
* Upon unregistering the device to prevent any new users, cancel
* all in-flight requests so that we can quickly unbind the active
* resources.
*/
- intel_gt_set_wedged(gt);
+ intel_gt_set_wedged_on_fini(gt);
/* Scrub all HW state upon release */
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
@@ -765,6 +776,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
if (vm) /* FIXME being called twice on error paths :( */
i915_vm_put(vm);
+ intel_wa_list_free(&gt->wa_list);
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
intel_gt_fini_buffer_pool(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index aa0a59c5b614..acc49c56a9f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -245,8 +245,6 @@ void intel_gt_fini_buffer_pool(struct intel_gt *gt)
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
- intel_gt_flush_buffer_pool(gt);
-
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
new file mode 100644
index 000000000000..f103664b71d4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "i915_drv.h"
+#include "intel_gt_debugfs.h"
+#include "intel_gt_engines_debugfs.h"
+#include "intel_gt_pm_debugfs.h"
+#include "intel_sseu_debugfs.h"
+#include "pxp/intel_pxp_debugfs.h"
+#include "uc/intel_uc_debugfs.h"
+
+int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
+{
+ int ret = intel_gt_terminally_wedged(gt);
+
+ switch (ret) {
+ case -EIO:
+ *val = 1;
+ return 0;
+ case 0:
+ *val = 0;
+ return 0;
+ default:
+ return ret;
+ }
+}
+
+int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
+{
+ /* Flush any previous reset before applying for a new one */
+ wait_event(gt->reset.queue,
+ !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
+
+ intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
+ "Manually reset engine mask to %llx", val);
+ return 0;
+}
+
+/*
+ * keep the interface clean where the first parameter
+ * is a 'struct intel_gt *' instead of 'void *'
+ */
+static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
+{
+ return intel_gt_debugfs_reset_show(data, val);
+}
+
+static int __intel_gt_debugfs_reset_store(void *data, u64 val)
+{
+ return intel_gt_debugfs_reset_store(data, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
+ __intel_gt_debugfs_reset_store, "%llu\n");
+
+static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct intel_gt_debugfs_file files[] = {
+ { "reset", &reset_fops, NULL },
+ };
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
+}
+
+void intel_gt_debugfs_register(struct intel_gt *gt)
+{
+ struct dentry *root;
+
+ if (!gt->i915->drm.primary->debugfs_root)
+ return;
+
+ root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ if (IS_ERR(root))
+ return;
+
+ gt_debugfs_register(gt, root);
+
+ intel_gt_engines_debugfs_register(gt, root);
+ intel_gt_pm_debugfs_register(gt, root);
+ intel_sseu_debugfs_register(gt, root);
+
+ intel_uc_debugfs_register(&gt->uc, root);
+ intel_pxp_debugfs_register(&gt->pxp, root);
+}
+
+void intel_gt_debugfs_register_files(struct dentry *root,
+ const struct intel_gt_debugfs_file *files,
+ unsigned long count, void *data)
+{
+ while (count--) {
+ umode_t mode = files->fops->write ? 0644 : 0444;
+
+ if (!files->eval || files->eval(data))
+ debugfs_create_file(files->name,
+ mode, root, data,
+ files->fops);
+
+ files++;
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
index f77540f727e9..e307ceb99031 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@@ -3,14 +3,14 @@
* Copyright © 2019 Intel Corporation
*/
-#ifndef DEBUGFS_GT_H
-#define DEBUGFS_GT_H
+#ifndef INTEL_GT_DEBUGFS_H
+#define INTEL_GT_DEBUGFS_H
#include <linux/file.h>
struct intel_gt;
-#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \
+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
return single_open(file, __name ## _show, inode->i_private); \
@@ -23,16 +23,20 @@ static const struct file_operations __name ## _fops = { \
.release = single_release, \
}
-void debugfs_gt_register(struct intel_gt *gt);
+void intel_gt_debugfs_register(struct intel_gt *gt);
-struct debugfs_gt_file {
+struct intel_gt_debugfs_file {
const char *name;
const struct file_operations *fops;
bool (*eval)(void *data);
};
void intel_gt_debugfs_register_files(struct dentry *root,
- const struct debugfs_gt_file *files,
+ const struct intel_gt_debugfs_file *files,
unsigned long count, void *data);
-#endif /* DEBUGFS_GT_H */
+/* functions that need to be accessed by the upper level non-gt interfaces */
+int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
+int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
+
+#endif /* INTEL_GT_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
index 5e3725e62241..8f9b874fdc9c 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
@@ -6,10 +6,10 @@
#include <drm/drm_print.h>
-#include "debugfs_engines.h"
-#include "debugfs_gt.h"
#include "i915_drv.h" /* for_each_engine! */
#include "intel_engine.h"
+#include "intel_gt_debugfs.h"
+#include "intel_gt_engines_debugfs.h"
static int engines_show(struct seq_file *m, void *data)
{
@@ -24,11 +24,11 @@ static int engines_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines);
-void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "engines", &engines_fops },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h
new file mode 100644
index 000000000000..dda113452da9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_ENGINES_DEBUGFS_H
+#define INTEL_GT_ENGINES_DEBUGFS_H
+
+struct intel_gt;
+struct dentry;
+
+void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* INTEL_GT_ENGINES_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index b2de83be4d97..699a74582d32 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -13,6 +13,7 @@
#include "intel_lrc_reg.h"
#include "intel_uncore.h"
#include "intel_rps.h"
+#include "pxp/intel_pxp_irq.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@@ -64,6 +65,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(&gt->rps, iir);
+ if (instance == OTHER_KCR_INSTANCE)
+ return intel_pxp_irq_handler(&gt->pxp, iir);
+
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
}
@@ -196,6 +200,9 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+
+ intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_MASK, ~0);
}
void gen11_gt_irq_postinstall(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index dea8e2479897..524eaf678790 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -18,6 +18,9 @@
#include "intel_rc6.h"
#include "intel_rps.h"
#include "intel_wakeref.h"
+#include "pxp/intel_pxp_pm.h"
+
+#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
@@ -262,6 +265,8 @@ int intel_gt_resume(struct intel_gt *gt)
intel_uc_resume(&gt->uc);
+ intel_pxp_resume(&gt->pxp);
+
user_forcewake(gt, false);
out_fw:
@@ -279,7 +284,7 @@ static void wait_for_suspend(struct intel_gt *gt)
if (!intel_gt_pm_is_awake(gt))
return;
- if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ if (intel_gt_wait_for_idle(gt, I915_GT_SUSPEND_IDLE_TIMEOUT) == -ETIME) {
/*
* Forcibly cancel outstanding work and leave
* the gpu quiet.
@@ -296,7 +301,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
user_forcewake(gt, true);
wait_for_suspend(gt);
- intel_uc_suspend(&gt->uc);
+ intel_pxp_suspend(&gt->pxp, false);
}
static suspend_state_t pm_suspend_target(void)
@@ -320,6 +325,8 @@ void intel_gt_suspend_late(struct intel_gt *gt)
GEM_BUG_ON(gt->awake);
+ intel_uc_suspend(&gt->uc);
+
/*
* On disabling the device, we want to turn off HW access to memory
* that we no longer own.
@@ -346,6 +353,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
+ intel_pxp_suspend(&gt->pxp, true);
intel_uc_runtime_suspend(&gt->uc);
GT_TRACE(gt, "\n");
@@ -353,11 +361,19 @@ void intel_gt_runtime_suspend(struct intel_gt *gt)
int intel_gt_runtime_resume(struct intel_gt *gt)
{
+ int ret;
+
GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
intel_ggtt_restore_fences(gt->ggtt);
- return intel_uc_runtime_resume(&gt->uc);
+ ret = intel_uc_runtime_resume(&gt->uc);
+ if (ret)
+ return ret;
+
+ intel_pxp_resume(&gt->pxp);
+
+ return 0;
}
static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index d0588d8aaa44..bc898df7a48c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
return intel_wakeref_get_if_active(&gt->wakeref);
}
+static inline void intel_gt_pm_might_get(struct intel_gt *gt)
+{
+ intel_wakeref_might_get(&gt->wakeref);
+}
+
static inline void intel_gt_pm_put(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
@@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(&gt->wakeref);
}
+static inline void intel_gt_pm_might_put(struct intel_gt *gt)
+{
+ intel_wakeref_might_put(&gt->wakeref);
+}
+
+#define with_intel_gt_pm(gt, tmp) \
+ for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+ intel_gt_pm_put(gt), tmp = 0)
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index d6f5836396f8..404dfa7673c6 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -6,18 +6,59 @@
#include <linux/seq_file.h>
-#include "debugfs_gt.h"
-#include "debugfs_gt_pm.h"
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
+#include "intel_gt_debugfs.h"
#include "intel_gt_pm.h"
+#include "intel_gt_pm_debugfs.h"
#include "intel_llc.h"
+#include "intel_pcode.h"
#include "intel_rc6.h"
#include "intel_rps.h"
#include "intel_runtime_pm.h"
-#include "intel_sideband.h"
#include "intel_uncore.h"
+#include "vlv_sideband.h"
+
+int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
+{
+ atomic_inc(&gt->user_wakeref);
+ intel_gt_pm_get(gt);
+ if (GRAPHICS_VER(gt->i915) >= 6)
+ intel_uncore_forcewake_user_get(gt->uncore);
+
+ return 0;
+}
+
+int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) >= 6)
+ intel_uncore_forcewake_user_put(gt->uncore);
+ intel_gt_pm_put(gt);
+ atomic_dec(&gt->user_wakeref);
+
+ return 0;
+}
+
+static int forcewake_user_open(struct inode *inode, struct file *file)
+{
+ struct intel_gt *gt = inode->i_private;
+
+ return intel_gt_pm_debugfs_forcewake_user_open(gt);
+}
+
+static int forcewake_user_release(struct inode *inode, struct file *file)
+{
+ struct intel_gt *gt = inode->i_private;
+
+ return intel_gt_pm_debugfs_forcewake_user_release(gt);
+}
+
+static const struct file_operations forcewake_user_fops = {
+ .owner = THIS_MODULE,
+ .open = forcewake_user_open,
+ .release = forcewake_user_release,
+};
static int fw_domains_show(struct seq_file *m, void *data)
{
@@ -36,7 +77,7 @@ static int fw_domains_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
static void print_rc6_res(struct seq_file *m,
const char *title,
@@ -238,11 +279,10 @@ static int drpc_show(struct seq_file *m, void *unused)
return err;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc);
-static int frequency_show(struct seq_file *m, void *unused)
+void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
{
- struct intel_gt *gt = m->private;
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_rps *rps = &gt->rps;
@@ -254,21 +294,21 @@ static int frequency_show(struct seq_file *m, void *unused)
u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
- seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
- seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
- seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+ drm_printf(p, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+ drm_printf(p, "Requested VID: %d\n", rgvswctl & 0x3f);
+ drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
MEMSTAT_VID_SHIFT);
- seq_printf(m, "Current P-state: %d\n",
+ drm_printf(p, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
u32 rpmodectl, freq_sts;
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
- seq_printf(m, "Video Turbo Mode: %s\n",
+ drm_printf(p, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "HW control enabled: %s\n",
+ drm_printf(p, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
+ drm_printf(p, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
@@ -276,25 +316,25 @@ static int frequency_show(struct seq_file *m, void *unused)
freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
- seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
- seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+ drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq);
- seq_printf(m, "actual GPU freq: %d MHz\n",
+ drm_printf(p, "actual GPU freq: %d MHz\n",
intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
- seq_printf(m, "current GPU freq: %d MHz\n",
+ drm_printf(p, "current GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
- seq_printf(m, "max GPU freq: %d MHz\n",
+ drm_printf(p, "max GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m, "min GPU freq: %d MHz\n",
+ drm_printf(p, "min GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
- seq_printf(m, "idle GPU freq: %d MHz\n",
+ drm_printf(p, "idle GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
- seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+ drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else if (GRAPHICS_VER(i915) >= 6) {
u32 rp_state_limits;
@@ -309,13 +349,11 @@ static int frequency_show(struct seq_file *m, void *unused)
int max_freq;
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
- if (IS_GEN9_LP(i915)) {
- rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ rp_state_cap = intel_rps_read_state_cap(rps);
+ if (IS_GEN9_LP(i915))
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
- } else {
- rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+ else
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
- }
/* RPSTAT1 is in the GT power well */
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
@@ -376,113 +414,121 @@ static int frequency_show(struct seq_file *m, void *unused)
}
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
- seq_printf(m, "Video Turbo Mode: %s\n",
+ drm_printf(p, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "HW control enabled: %s\n",
+ drm_printf(p, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
+ drm_printf(p, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
- seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
if (GRAPHICS_VER(i915) <= 10)
- seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+ drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
pm_isr, pm_iir);
- seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
- seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
- seq_printf(m, "Render p-state ratio: %d\n",
+ drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ drm_printf(p, "Render p-state ratio: %d\n",
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
- seq_printf(m, "Render p-state VID: %d\n",
+ drm_printf(p, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
- seq_printf(m, "Render p-state limit: %d\n",
+ drm_printf(p, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
- seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
- seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
- seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
- seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
- seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
- seq_printf(m, "CAGF: %dMHz\n", cagf);
- seq_printf(m, "RP CUR UP EI: %d (%lldns)\n",
+ drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
+ drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
+ drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
+ drm_printf(p, "CAGF: %dMHz\n", cagf);
+ drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
rpcurupei,
intel_gt_pm_interval_to_ns(gt, rpcurupei));
- seq_printf(m, "RP CUR UP: %d (%lldns)\n",
+ drm_printf(p, "RP CUR UP: %d (%lldns)\n",
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
- seq_printf(m, "RP PREV UP: %d (%lldns)\n",
+ drm_printf(p, "RP PREV UP: %d (%lldns)\n",
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
- seq_printf(m, "Up threshold: %d%%\n",
+ drm_printf(p, "Up threshold: %d%%\n",
rps->power.up_threshold);
- seq_printf(m, "RP UP EI: %d (%lldns)\n",
+ drm_printf(p, "RP UP EI: %d (%lldns)\n",
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
- seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n",
+ drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
- seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n",
+ drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
rpcurdownei,
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
- seq_printf(m, "RP CUR DOWN: %d (%lldns)\n",
+ drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown,
intel_gt_pm_interval_to_ns(gt, rpcurdown));
- seq_printf(m, "RP PREV DOWN: %d (%lldns)\n",
+ drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown,
intel_gt_pm_interval_to_ns(gt, rpprevdown));
- seq_printf(m, "Down threshold: %d%%\n",
+ drm_printf(p, "Down threshold: %d%%\n",
rps->power.down_threshold);
- seq_printf(m, "RP DOWN EI: %d (%lldns)\n",
+ drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
- seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n",
+ drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
max_freq = (rp_state_cap & 0xff00) >> 8;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
rp_state_cap >> 0) & 0xff;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
- seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ drm_printf(p, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m, "Current freq: %d MHz\n",
+ drm_printf(p, "Current freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
- seq_printf(m, "Actual freq: %d MHz\n", cagf);
- seq_printf(m, "Idle freq: %d MHz\n",
+ drm_printf(p, "Actual freq: %d MHz\n", cagf);
+ drm_printf(p, "Idle freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
- seq_printf(m, "Min freq: %d MHz\n",
+ drm_printf(p, "Min freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
- seq_printf(m, "Boost freq: %d MHz\n",
+ drm_printf(p, "Boost freq: %d MHz\n",
intel_gpu_freq(rps, rps->boost_freq));
- seq_printf(m, "Max freq: %d MHz\n",
+ drm_printf(p, "Max freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m,
+ drm_printf(p,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else {
- seq_puts(m, "no P-state info available\n");
+ drm_puts(p, "no P-state info available\n");
}
- seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
- seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
- seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+ drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+ drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+ drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
intel_runtime_pm_put(uncore->rpm, wakeref);
+}
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ intel_gt_pm_frequency_dump(gt, &p);
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency);
static int llc_show(struct seq_file *m, void *data)
{
@@ -535,7 +581,7 @@ static bool llc_eval(void *data)
return HAS_LLC(gt->i915);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc);
static const char *rps_power_to_str(unsigned int power)
{
@@ -614,14 +660,15 @@ static bool rps_eval(void *data)
return HAS_RPS(gt->i915);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
-void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "drpc", &drpc_fops, NULL },
{ "frequency", &frequency_fops, NULL },
{ "forcewake", &fw_domains_fops, NULL },
+ { "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
new file mode 100644
index 000000000000..a8457887ec65
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PM_DEBUGFS_H
+#define INTEL_GT_PM_DEBUGFS_H
+
+struct intel_gt;
+struct dentry;
+struct drm_printer;
+
+void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
+void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
+
+/* functions that need to be accessed by the upper level non-gt interfaces */
+int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
+int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
+
+#endif /* INTEL_GT_PM_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1..14216cc471b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -26,6 +26,7 @@
#include "intel_rps_types.h"
#include "intel_migrate_types.h"
#include "intel_wakeref.h"
+#include "pxp/intel_pxp_types.h"
struct drm_i915_private;
struct i915_ggtt;
@@ -72,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct i915_wa_list wa_list;
+
struct intel_gt_timelines {
spinlock_t lock; /* protects active_list */
struct list_head active_list;
@@ -184,6 +187,9 @@ struct intel_gt {
u8 num_engines;
+ /* General presence of SFC units */
+ u8 sfc_mask;
+
/* Media engine access to SFC per instance */
u8 vdbox_sfc_access;
@@ -192,6 +198,12 @@ struct intel_gt {
unsigned long mslice_mask;
} info;
+
+ struct {
+ u8 uc_index;
+ } mocs;
+
+ struct intel_pxp pxp;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e137dd32b5b8..67d14afa6623 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -28,7 +28,8 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
* used the passed in size for the page size, which should ensure it
* also has the same alignment.
*/
- obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
+ obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
+ vm->lmem_pt_obj_flags);
/*
* Ensure all paging structures for this vm share the same dma-resv
* object underneath, with the idea that one object_lock() will lock
@@ -155,7 +156,7 @@ void i915_vm_resv_release(struct kref *kref)
static void __i915_vm_release(struct work_struct *work)
{
struct i915_address_space *vm =
- container_of(work, struct i915_address_space, rcu.work);
+ container_of(work, struct i915_address_space, release_work);
vm->cleanup(vm);
i915_address_space_fini(vm);
@@ -171,7 +172,7 @@ void i915_vm_release(struct kref *kref)
GEM_BUG_ON(i915_is_ggtt(vm));
trace_i915_ppgtt_release(vm);
- queue_rcu_work(vm->i915->wq, &vm->rcu);
+ queue_work(vm->i915->wq, &vm->release_work);
}
void i915_address_space_init(struct i915_address_space *vm, int subclass)
@@ -185,7 +186,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
if (!kref_read(&vm->resv_ref))
kref_init(&vm->resv_ref);
- INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+ INIT_WORK(&vm->release_work, __i915_vm_release);
atomic_set(&vm->open, 1);
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index bc7153018ebd..bc6750263359 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -213,7 +213,7 @@ struct i915_vma_ops {
struct i915_address_space {
struct kref ref;
- struct rcu_work rcu;
+ struct work_struct release_work;
struct drm_mm mm;
struct intel_gt *gt;
@@ -260,6 +260,9 @@ struct i915_address_space {
u8 pd_shift;
u8 scratch_order;
+ /* Flags used when creating page-table objects for this vm */
+ unsigned long lmem_pt_obj_flags;
+
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
@@ -519,7 +522,8 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
@@ -537,7 +541,8 @@ static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
int i915_ppgtt_init_hw(struct intel_gt *gt);
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index eb1a15deed22..08d7d5ae263a 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -3,12 +3,13 @@
* Copyright © 2019 Intel Corporation
*/
+#include <asm/tsc.h>
#include <linux/cpufreq.h>
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_llc.h"
-#include "intel_sideband.h"
+#include "intel_pcode.h"
struct ia_constants {
unsigned int min_gpu_freq;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bb4af4977920..56156cf18c41 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = {
END
};
+static const u8 dg2_xcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END
+};
+
static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = {
END
};
+static const u8 dg2_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
#undef END
#undef REG16
#undef REG
@@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
!intel_engine_has_relative_mmio(engine));
if (engine->class == RENDER_CLASS) {
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ return dg2_rcs_offsets;
+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return xehp_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_rcs_offsets;
@@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
else
return gen8_rcs_offsets;
} else {
- if (GRAPHICS_VER(engine->i915) >= 12)
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ return dg2_xcs_offsets;
+ else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 9)
return gen9_xcs_offsets;
@@ -861,7 +942,13 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
- obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
+ if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
+ ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+ context_size += PARENT_SCRATCH_SIZE;
+ }
+
+ obj = i915_gem_object_create_lmem(engine->i915, context_size,
+ I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 1dac21aa7e5c..afb1cce9a352 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -78,7 +78,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* TODO: Add support for huge LMEM PTEs
*/
- vm = i915_ppgtt_create(gt);
+ vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(vm))
return ERR_CAST(vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d..15f9ada28a7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,8 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+ u8 uc_index;
+ u8 unused_entries_index;
};
/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -40,6 +42,8 @@ struct drm_i915_mocs_table {
#define L3_ESC(value) ((value) << 0)
#define L3_SCC(value) ((value) << 1)
#define _L3_CACHEABILITY(value) ((value) << 4)
+#define L3_GLBGO(value) ((value) << 6)
+#define L3_LKUP(value) ((value) << 7)
/* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
@@ -88,18 +92,25 @@ struct drm_i915_mocs_table {
*
* Entries not part of the following tables are undefined as far as
* userspace is concerned and shouldn't be relied upon. For Gen < 12
- * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
*
* The last few entries are reserved by the hardware. For ICL+ they
* should be initialized according to bspec and never used, for older
* platforms they should never be written to.
*
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
* interface for ICL+. For older platforms, they are part of kernel
* ABI. It is expected that, for specific hardware platform, existing
* entries will remain constant and the table will only be updated by
* adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ * indices have been set to L3 WB. These reserved entries should never
+ * be used, they may be changed to low performant variants with better
+ * coherency in the future if more entries are needed.
+ * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
*/
#define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -282,17 +293,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
};
static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
- /* Error */
- MOCS_ENTRY(0, 0, L3_0_DIRECT),
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
- /* Reserved */
- MOCS_ENTRY(2, 0, L3_0_DIRECT),
- MOCS_ENTRY(3, 0, L3_0_DIRECT),
- MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -314,6 +317,83 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
MOCS_ENTRY(63, 0, L3_1_UC),
};
+static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
+ GEN11_MOCS_ENTRIES,
+ /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+ MOCS_ENTRY(48,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + L3 */
+ MOCS_ENTRY(49,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + LLC */
+ MOCS_ENTRY(50,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* Implicitly enable L1 - HDC:L1 */
+ MOCS_ENTRY(51,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* HW Special Case (CCS) */
+ MOCS_ENTRY(60,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* HW Special Case (Displayable) */
+ MOCS_ENTRY(61,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+};
+
+static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = {
+ /* wa_1608975824 */
+ MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)),
+
+ /* UC - Coherent; GO:L3 */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)),
+ /* UC - Non-Coherent; GO:L3 */
+ MOCS_ENTRY(4, 0, L3_1_UC),
+
+ /* WB */
+ MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)),
+
+ /* HW Reserved - SW program but never use. */
+ MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)),
+ MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)),
+ MOCS_ENTRY(60, 0, L3_1_UC),
+ MOCS_ENTRY(61, 0, L3_1_UC),
+ MOCS_ENTRY(62, 0, L3_1_UC),
+ MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
+ /* UC - Coherent; GO:L3 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
+ /* Wa_14011441408: Set Go to Memory for MOCS#0 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -340,14 +420,45 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
{
unsigned int flags;
- if (IS_DG1(i915)) {
+ memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
+ table->unused_entries_index = I915_MOCS_PTE;
+ if (IS_DG2(i915)) {
+ if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) {
+ table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
+ table->table = dg2_mocs_table_g10_ax;
+ } else {
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
+ }
+ table->uc_index = 1;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->unused_entries_index = 3;
+ } else if (IS_XEHPSDV(i915)) {
+ table->size = ARRAY_SIZE(xehpsdv_mocs_table);
+ table->table = xehpsdv_mocs_table;
+ table->uc_index = 2;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->unused_entries_index = 5;
+ } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
+ table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- } else if (GRAPHICS_VER(i915) >= 12) {
+ table->uc_index = 1;
+ table->unused_entries_index = 5;
+ } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
+ /* For TGL/RKL, Can't be changed now for ABI reasons */
table->size = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->uc_index = 3;
+ } else if (GRAPHICS_VER(i915) >= 12) {
+ table->size = ARRAY_SIZE(gen12_mocs_table);
+ table->table = gen12_mocs_table;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->uc_index = 3;
+ table->unused_entries_index = 2;
} else if (GRAPHICS_VER(i915) == 11) {
table->size = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -393,16 +504,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
}
/*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
*/
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (index < table->size && table->table[index].used)
return table->table[index].control_value;
-
- return table->table[I915_MOCS_PTE].control_value;
+ return table->table[table->unused_entries_index].control_value;
}
#define for_each_mocs(mocs, t, i) \
@@ -417,6 +528,8 @@ static void __init_mocs_table(struct intel_uncore *uncore,
unsigned int i;
u32 mocs;
+ drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index,
+ "Unused entries index should have been defined\n");
for_each_mocs(mocs, table, i)
intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
}
@@ -443,16 +556,16 @@ static void init_mocs_table(struct intel_engine_cs *engine,
}
/*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
*/
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
-
- return table->table[I915_MOCS_PTE].l3cc_value;
+ return table->table[table->unused_entries_index].l3cc_value;
}
static u32 l3cc_combine(u16 low, u16 high)
@@ -468,10 +581,9 @@ static u32 l3cc_combine(u16 low, u16 high)
0; \
i++)
-static void init_l3cc_table(struct intel_engine_cs *engine,
+static void init_l3cc_table(struct intel_uncore *uncore,
const struct drm_i915_mocs_table *table)
{
- struct intel_uncore *uncore = engine->uncore;
unsigned int i;
u32 l3cc;
@@ -496,7 +608,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, &table);
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
- init_l3cc_table(engine, &table);
+ init_l3cc_table(engine->uncore, &table);
}
static u32 global_mocs_offset(void)
@@ -504,6 +616,14 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
}
+void intel_set_mocs_index(struct intel_gt *gt)
+{
+ struct drm_i915_mocs_table table;
+
+ get_mocs_settings(gt->i915, &table);
+ gt->mocs.uc_index = table.uc_index;
+}
+
void intel_mocs_init(struct intel_gt *gt)
{
struct drm_i915_mocs_table table;
@@ -515,6 +635,14 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, &table);
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+
+ /*
+ * Initialize the L3CC table as part of mocs initalization to make
+ * sure the LNCFCMOCSx registers are programmed for the subsequent
+ * memory transactions including guc transactions
+ */
+ if (flags & HAS_RENDER_L3CC)
+ init_l3cc_table(gt->uncore, &table);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index d83274f5163b..76db827210c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -36,5 +36,6 @@ struct intel_gt;
void intel_mocs_init(struct intel_gt *gt);
void intel_mocs_init_engine(struct intel_engine_cs *engine);
+void intel_set_mocs_index(struct intel_gt *gt);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 886060f7e6fc..4396bfd630d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -155,19 +155,20 @@ int i915_ppgtt_init_hw(struct intel_gt *gt)
}
static struct i915_ppgtt *
-__ppgtt_create(struct intel_gt *gt)
+__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags)
{
if (GRAPHICS_VER(gt->i915) < 8)
return gen6_ppgtt_create(gt);
else
- return gen8_ppgtt_create(gt);
+ return gen8_ppgtt_create(gt, lmem_pt_obj_flags);
}
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
- ppgtt = __ppgtt_create(gt);
+ ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags);
if (IS_ERR(ppgtt))
return ppgtt;
@@ -298,7 +299,8 @@ int ppgtt_set_pages(struct i915_vma *vma)
return 0;
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct drm_i915_private *i915 = gt->i915;
@@ -306,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
ppgtt->vm.i915 = i915;
ppgtt->vm.dma = i915->drm.dev;
ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+ ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags;
dma_resv_init(&ppgtt->vm._resv);
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 799d382eea79..43093dd2d0c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -9,8 +9,8 @@
#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_pcode.h"
#include "intel_rc6.h"
-#include "intel_sideband.h"
/**
* DOC: RC6
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index a74b72f50cc9..afb35d2e5c73 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -32,7 +32,7 @@ static int init_fake_lmem_bar(struct intel_memory_region *mem)
mem->remap_addr = dma_map_resource(i915->drm.dev,
mem->region.start,
mem->fake_mappable.size,
- PCI_DMA_BIDIRECTIONAL,
+ DMA_BIDIRECTIONAL,
DMA_ATTR_FORCE_CONTIGUOUS);
if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) {
drm_mm_remove_node(&mem->fake_mappable);
@@ -62,7 +62,7 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem)
dma_unmap_resource(mem->i915->drm.dev,
mem->remap_addr,
mem->fake_mappable.size,
- PCI_DMA_BIDIRECTIONAL,
+ DMA_BIDIRECTIONAL,
DMA_ATTR_FORCE_CONTIGUOUS);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 7c4d5158e03b..2fdd52b62092 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -112,7 +112,8 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
- obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+ obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 2958e2fae380..586dca1731ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -17,6 +17,7 @@
#include "intel_ring.h"
#include "shmem_utils.h"
#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -291,7 +292,9 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
- clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+ drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void reset_prepare(struct intel_engine_cs *engine)
@@ -1265,7 +1268,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
int size, err;
if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS)
- return 0;
+ return NULL;
err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
if (err < 0)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 0a03fbed9f9b..5e275f8dda8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -11,8 +11,9 @@
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
+#include "intel_pcode.h"
#include "intel_rps.h"
-#include "intel_sideband.h"
+#include "vlv_sideband.h"
#include "../../../platform/x86/intel_ips.h"
#define BUSY_MAX_EI 20u /* ms */
@@ -994,20 +995,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
static void gen6_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rp_state_cap = intel_rps_read_state_cap(rps);
/* All of these values are in units of 50MHz */
/* static values from HW: RP0 > RP1 > RPn (min_freq) */
if (IS_GEN9_LP(i915)) {
- u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
-
rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
rps->min_freq = (rp_state_cap >> 0) & 0xff;
} else {
- u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
-
rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
rps->min_freq = (rp_state_cap >> 16) & 0xff;
@@ -2144,6 +2141,19 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
+u32 intel_rps_read_state_cap(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ if (IS_XEHPSDV(i915))
+ return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
+ else if (IS_GEN9_LP(i915))
+ return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ else
+ return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+}
+
/* External interface for intel_ips.ko */
static struct drm_i915_private __rcu *ips_mchdev;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 4213bcce1667..11960d64ca82 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
+u32 intel_rps_read_state_cap(struct intel_rps *rps);
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index bbd272943c3f..bdf09051b8a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
}
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u32 ss_mask)
+ u8 *subslice_mask, u32 ss_mask)
{
int offset = slice * sseu->ss_stride;
- memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+ memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
}
unsigned int
@@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
return total;
}
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
- u8 s_en, u32 ss_en, u16 eu_en)
+static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
+{
+ u32 ss_mask;
+
+ ss_mask = ss_en >> (s * sseu->max_subslices);
+ ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
+
+ return ss_mask;
+}
+
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
+ u32 g_ss_en, u32 c_ss_en, u16 eu_en)
{
int s, ss;
- /* ss_en represents entire subslice mask across all slices */
+ /* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
- sizeof(ss_en) * BITS_PER_BYTE);
+ sizeof(g_ss_en) * BITS_PER_BYTE);
for (s = 0; s < sseu->max_slices; s++) {
if ((s_en & BIT(s)) == 0)
@@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
sseu->slice_mask |= BIT(s);
- intel_sseu_set_subslices(sseu, s, ss_en);
+ /*
+ * XeHP introduces the concept of compute vs geometry DSS. To
+ * reduce variation between GENs around subslice usage, store a
+ * mask for both the geometry and compute enabled masks since
+ * userspace will need to be able to query these masks
+ * independently. Also compute a total enabled subslice count
+ * for the purposes of selecting subslices to use in a
+ * particular GEM context.
+ */
+ intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
+ get_ss_stride_mask(sseu, s, c_ss_en));
+ intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
+ get_ss_stride_mask(sseu, s, g_ss_en));
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ get_ss_stride_mask(sseu, s,
+ g_ss_en | c_ss_en));
for (ss = 0; ss < sseu->max_subslices; ss++)
if (intel_sseu_has_subslice(sseu, s, ss))
@@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
{
struct sseu_dev_info *sseu = &gt->info.sseu;
struct intel_uncore *uncore = gt->uncore;
- u32 dss_en;
+ u32 g_dss_en, c_dss_en = 0;
u16 eu_en = 0;
u8 eu_en_fuse;
u8 s_en;
@@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
GEN11_GT_S_ENA_MASK;
- dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE);
+ g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
/* one bit per pair of EUs */
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
@@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
- gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+ gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
/* TGL only supports slice-level power gating */
sseu->has_slice_pg = 1;
@@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
GEN11_EU_DIS_MASK);
- gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
+ gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
/* ICL has no power gating restrictions. */
sseu->has_slice_pg = 1;
@@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
sseu_set_eus(sseu, 0, 1, ~disabled_mask);
}
- intel_sseu_set_subslices(sseu, 0, subslice_mask);
+ intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
sseu->eu_total = compute_eu_total(sseu);
@@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
@@ -485,10 +514,9 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
}
fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
- switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
+ switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
default:
- MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >>
- HSW_F1_EU_DIS_SHIFT);
+ MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
fallthrough;
case HSW_F1_EU_DIS_10EUS:
sseu->eu_per_subslice = 10;
@@ -506,7 +534,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
sseu->eu_per_subslice);
for (s = 0; s < sseu->max_slices; s++) {
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
sseu_set_eus(sseu, s, ss,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 22fef98887c0..60882a74741e 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -26,9 +26,14 @@ struct drm_printer;
#define GEN_DSS_PER_CSLICE 8
#define GEN_DSS_PER_MSLICE 8
+#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
+
struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
@@ -78,6 +83,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
u8 mask;
int ss_idx = subslice / BITS_PER_BYTE;
+ if (slice >= sseu->max_slices ||
+ subslice >= sseu->max_subslices)
+ return false;
+
GEM_BUG_ON(ss_idx >= sseu->ss_stride);
mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
@@ -97,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u32 ss_mask);
+ u8 *subslice_mask, u32 ss_mask);
void intel_sseu_info_init(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index 1ba8b7da9d37..8bb3a91dad82 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -4,9 +4,9 @@
* Copyright © 2020 Intel Corporation
*/
-#include "debugfs_gt.h"
-#include "intel_sseu_debugfs.h"
#include "i915_drv.h"
+#include "intel_gt_debugfs.h"
+#include "intel_sseu_debugfs.h"
static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
int slice, u8 *to_mask)
@@ -282,7 +282,7 @@ static int sseu_status_show(struct seq_file *m, void *unused)
return intel_sseu_status(m, gt);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status);
static int rcs_topology_show(struct seq_file *m, void *unused)
{
@@ -293,11 +293,11 @@ static int rcs_topology_show(struct seq_file *m, void *unused)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "sseu_status", &sseu_status_fops, NULL },
{ "rcs_topology", &rcs_topology_fops, NULL },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 1257f4f11e66..438bbc7b8147 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline)
timeline->hwsp_map = vaddr;
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
- clflush(vaddr + ofs);
+ drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
return 0;
}
@@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl)
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
WRITE_ONCE(*hwsp_seqno, tl->seqno);
- clflush(hwsp_seqno);
+ drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
}
void intel_timeline_enter(struct intel_timeline *tl)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index aae609d7d85d..e1f362530889 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -644,6 +644,72 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
}
+static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ /*
+ * This is a "fake" workaround defined by software to ensure we
+ * maintain reliable, backward-compatible behavior for userspace with
+ * regards to how nested MI_BATCH_BUFFER_START commands are handled.
+ *
+ * The per-context setting of MI_MODE[12] determines whether the bits
+ * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
+ * in the traditional manner or whether they should instead use a new
+ * tgl+ meaning that breaks backward compatibility, but allows nesting
+ * into 3rd-level batchbuffers. When this new capability was first
+ * added in TGL, it remained off by default unless a context
+ * intentionally opted in to the new behavior. However Xe_HPG now
+ * flips this on by default and requires that we explicitly opt out if
+ * we don't want the new behavior.
+ *
+ * From a SW perspective, we want to maintain the backward-compatible
+ * behavior for userspace, so we'll apply a fake workaround to set it
+ * back to the legacy behavior on platforms where the hardware default
+ * is to break compatibility. At the moment there is no Linux
+ * userspace that utilizes third-level batchbuffers, so this will avoid
+ * userspace from needing to make any changes. using the legacy
+ * meaning is the correct thing to do. If/when we have userspace
+ * consumers that want to utilize third-level batch nesting, we can
+ * provide a context parameter to allow them to opt-in.
+ */
+ wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
+}
+
+static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ u8 mocs;
+
+ /*
+ * Some blitter commands do not have a field for MOCS, those
+ * commands will use MOCS index pointed by BLIT_CCTL.
+ * BLIT_CCTL registers are needed to be programmed to un-cached.
+ */
+ if (engine->class == COPY_ENGINE_CLASS) {
+ mocs = engine->gt->mocs.uc_index;
+ wa_write_clr_set(wal,
+ BLIT_CCTL(engine->mmio_base),
+ BLIT_CCTL_MASK,
+ BLIT_CCTL_MOCS(mocs, mocs));
+ }
+}
+
+/*
+ * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
+ * defined by the hardware team, but it programming general context registers.
+ * Adding those context register programming in context workaround
+ * allow us to use the wa framework for proper application and validation.
+ */
+static void
+gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ fakewa_disable_nestedbb_mode(engine, wal);
+
+ gen12_ctx_gt_mocs_init(engine, wal);
+}
+
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
struct i915_wa_list *wal,
@@ -651,11 +717,19 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
{
struct drm_i915_private *i915 = engine->i915;
- if (engine->class != RENDER_CLASS)
- return;
-
wa_init_start(wal, name, engine->name);
+ /* Applies to all engines */
+ /*
+ * Fake workarounds are not the actual workaround but
+ * programming of context registers using workaround framework.
+ */
+ if (GRAPHICS_VER(i915) >= 12)
+ gen12_ctx_gt_fake_wa_init(engine, wal);
+
+ if (engine->class != RENDER_CLASS)
+ goto done;
+
if (IS_DG1(i915))
dg1_ctx_workarounds_init(engine, wal);
else if (GRAPHICS_VER(i915) == 12)
@@ -685,6 +759,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
else
MISSING_CASE(GRAPHICS_VER(i915));
+done:
wa_init_finish(wal);
}
@@ -729,7 +804,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
-gen4_gt_workarounds_init(struct drm_i915_private *i915,
+gen4_gt_workarounds_init(struct intel_gt *gt,
struct i915_wa_list *wal)
{
/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
@@ -737,29 +812,29 @@ gen4_gt_workarounds_init(struct drm_i915_private *i915,
}
static void
-g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen4_gt_workarounds_init(i915, wal);
+ gen4_gt_workarounds_init(gt, wal);
/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
}
static void
-ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- g4x_gt_workarounds_init(i915, wal);
+ g4x_gt_workarounds_init(gt, wal);
wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
}
static void
-snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
}
static void
-ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
wa_masked_dis(wal,
@@ -775,7 +850,7 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* WaForceL3Serialization:vlv */
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
@@ -788,7 +863,7 @@ vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* L3 caching of data atomics doesn't work -- disable it. */
wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
@@ -803,8 +878,10 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = gt->i915;
+
/* WaDisableKillLogic:bxt,skl,kbl */
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
@@ -829,9 +906,9 @@ gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal
}
static void
-skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableGafsUnitClkGating:skl */
wa_write_or(wal,
@@ -839,19 +916,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
/* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0))
+ if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0))
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
}
static void
-kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_GT_STEP(i915, 0, STEP_C0))
+ if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -868,15 +945,15 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
}
static void
-cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableGafsUnitClkGating:cfl */
wa_write_or(wal,
@@ -901,21 +978,21 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
}
-static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal,
+static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
unsigned int slice, unsigned int subslice)
{
- drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
+ drm_dbg(&gt->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
}
static void
-icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
- const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+ const struct sseu_dev_info *sseu = &gt->info.sseu;
unsigned int slice, subslice;
- GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
+ GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
slice = 0;
@@ -935,16 +1012,15 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
* then we can just rely on the default steering and won't need to
* worry about explicitly re-steering L3BANK reads later.
*/
- if (i915->gt.info.l3bank_mask & BIT(subslice))
- i915->gt.steering_table[L3BANK] = NULL;
+ if (gt->info.l3bank_mask & BIT(subslice))
+ gt->steering_table[L3BANK] = NULL;
- __add_mcr_wa(i915, wal, slice, subslice);
+ __add_mcr_wa(gt, wal, slice, subslice);
}
static void
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = gt->i915;
const struct sseu_dev_info *sseu = &gt->info.sseu;
unsigned long slice, subslice = 0, slice_mask = 0;
u64 dss_mask = 0;
@@ -1008,7 +1084,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
WARN_ON(subslice > GEN_DSS_PER_GSLICE);
WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0);
- __add_mcr_wa(i915, wal, slice, subslice);
+ __add_mcr_wa(gt, wal, slice, subslice);
/*
* SQIDI ranges are special because they use different steering
@@ -1024,9 +1100,11 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- icl_wa_init_mcr(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ icl_wa_init_mcr(gt, wal);
/* WaModifyGamTlbPartitioning:icl */
wa_write_clr_set(wal,
@@ -1077,10 +1155,9 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
* the engine-specific workaround list.
*/
static void
-wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
+wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct intel_engine_cs *engine;
- struct intel_gt *gt = &i915->gt;
int id;
for_each_engine(engine, gt, id) {
@@ -1094,22 +1171,23 @@ wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-gen12_gt_workarounds_init(struct drm_i915_private *i915,
- struct i915_wa_list *wal)
+gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- icl_wa_init_mcr(i915, wal);
+ icl_wa_init_mcr(gt, wal);
/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
- wa_14011060649(i915, wal);
+ wa_14011060649(gt, wal);
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
}
static void
-tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen12_gt_workarounds_init(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ gen12_gt_workarounds_init(gt, wal);
/* Wa_1409420604:tgl */
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
@@ -1130,9 +1208,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen12_gt_workarounds_init(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ gen12_gt_workarounds_init(gt, wal);
/* Wa_1607087056:dg1 */
if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
@@ -1154,60 +1234,62 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- xehp_init_mcr(&i915->gt, wal);
+ xehp_init_mcr(gt, wal);
}
static void
-gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
+gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = gt->i915;
+
if (IS_XEHPSDV(i915))
- xehpsdv_gt_workarounds_init(i915, wal);
+ xehpsdv_gt_workarounds_init(gt, wal);
else if (IS_DG1(i915))
- dg1_gt_workarounds_init(i915, wal);
+ dg1_gt_workarounds_init(gt, wal);
else if (IS_TIGERLAKE(i915))
- tgl_gt_workarounds_init(i915, wal);
+ tgl_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 12)
- gen12_gt_workarounds_init(i915, wal);
+ gen12_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 11)
- icl_gt_workarounds_init(i915, wal);
+ icl_gt_workarounds_init(gt, wal);
else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
- cfl_gt_workarounds_init(i915, wal);
+ cfl_gt_workarounds_init(gt, wal);
else if (IS_GEMINILAKE(i915))
- glk_gt_workarounds_init(i915, wal);
+ glk_gt_workarounds_init(gt, wal);
else if (IS_KABYLAKE(i915))
- kbl_gt_workarounds_init(i915, wal);
+ kbl_gt_workarounds_init(gt, wal);
else if (IS_BROXTON(i915))
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
else if (IS_SKYLAKE(i915))
- skl_gt_workarounds_init(i915, wal);
+ skl_gt_workarounds_init(gt, wal);
else if (IS_HASWELL(i915))
- hsw_gt_workarounds_init(i915, wal);
+ hsw_gt_workarounds_init(gt, wal);
else if (IS_VALLEYVIEW(i915))
- vlv_gt_workarounds_init(i915, wal);
+ vlv_gt_workarounds_init(gt, wal);
else if (IS_IVYBRIDGE(i915))
- ivb_gt_workarounds_init(i915, wal);
+ ivb_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 6)
- snb_gt_workarounds_init(i915, wal);
+ snb_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 5)
- ilk_gt_workarounds_init(i915, wal);
+ ilk_gt_workarounds_init(gt, wal);
else if (IS_G4X(i915))
- g4x_gt_workarounds_init(i915, wal);
+ g4x_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 4)
- gen4_gt_workarounds_init(i915, wal);
+ gen4_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) <= 8)
;
else
MISSING_CASE(GRAPHICS_VER(i915));
}
-void intel_gt_init_workarounds(struct drm_i915_private *i915)
+void intel_gt_init_workarounds(struct intel_gt *gt)
{
- struct i915_wa_list *wal = &i915->gt_wa_list;
+ struct i915_wa_list *wal = &gt->wa_list;
wa_init_start(wal, "GT", "global");
- gt_init_workarounds(i915, wal);
+ gt_init_workarounds(gt, wal);
wa_init_finish(wal);
}
@@ -1278,7 +1360,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
void intel_gt_apply_workarounds(struct intel_gt *gt)
{
- wa_list_apply(gt, &gt->i915->gt_wa_list);
+ wa_list_apply(gt, &gt->wa_list);
}
static bool wa_list_verify(struct intel_gt *gt,
@@ -1310,7 +1392,7 @@ static bool wa_list_verify(struct intel_gt *gt,
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
{
- return wa_list_verify(gt, &gt->i915->gt_wa_list, from);
+ return wa_list_verify(gt, &gt->wa_list, from);
}
__maybe_unused
@@ -1604,6 +1686,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
i915_mmio_reg_offset(RING_NOPID(base)));
}
+/*
+ * engine_fake_wa_init(), a place holder to program the registers
+ * which are not part of an official workaround defined by the
+ * hardware team.
+ * Adding programming of those register inside workaround will
+ * allow utilizing wa framework to proper application and verification.
+ */
+static void
+engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ u8 mocs;
+
+ /*
+ * RING_CMD_CCTL are need to be programed to un-cached
+ * for memory writes and reads outputted by Command
+ * Streamers on Gen12 onward platforms.
+ */
+ if (GRAPHICS_VER(engine->i915) >= 12) {
+ mocs = engine->gt->mocs.uc_index;
+ wa_masked_field_set(wal,
+ RING_CMD_CCTL(engine->mmio_base),
+ CMD_CCTL_MOCS_MASK,
+ CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+ }
+}
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
@@ -2044,6 +2151,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
return;
+ engine_fake_wa_init(engine, wal);
+
if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
@@ -2067,12 +2176,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
wa_list_apply(engine->gt, &engine->wa_list);
}
-struct mcr_range {
- u32 start;
- u32 end;
-};
-
-static const struct mcr_range mcr_ranges_gen8[] = {
+static const struct i915_range mcr_ranges_gen8[] = {
{ .start = 0x5500, .end = 0x55ff },
{ .start = 0x7000, .end = 0x7fff },
{ .start = 0x9400, .end = 0x97ff },
@@ -2081,7 +2185,7 @@ static const struct mcr_range mcr_ranges_gen8[] = {
{},
};
-static const struct mcr_range mcr_ranges_gen12[] = {
+static const struct i915_range mcr_ranges_gen12[] = {
{ .start = 0x8150, .end = 0x815f },
{ .start = 0x9520, .end = 0x955f },
{ .start = 0xb100, .end = 0xb3ff },
@@ -2090,7 +2194,7 @@ static const struct mcr_range mcr_ranges_gen12[] = {
{},
};
-static const struct mcr_range mcr_ranges_xehp[] = {
+static const struct i915_range mcr_ranges_xehp[] = {
{ .start = 0x4000, .end = 0x4aff },
{ .start = 0x5200, .end = 0x52ff },
{ .start = 0x5400, .end = 0x7fff },
@@ -2109,7 +2213,7 @@ static const struct mcr_range mcr_ranges_xehp[] = {
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
- const struct mcr_range *mcr_ranges;
+ const struct i915_range *mcr_ranges;
int i;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
index 15abb68b6c00..9beaab77c7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
@@ -24,7 +24,7 @@ static inline void intel_wa_list_free(struct i915_wa_list *wal)
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine);
int intel_engine_emit_ctx_wa(struct i915_request *rq);
-void intel_gt_init_workarounds(struct drm_i915_private *i915);
+void intel_gt_init_workarounds(struct intel_gt *gt);
void intel_gt_apply_workarounds(struct intel_gt *gt);
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 2c1af030310c..8b89215afe46 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
{
struct intel_context *ce;
+ INIT_LIST_HEAD(&engine->pinned_contexts_list);
+
engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
if (!engine->sched_engine)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 317eebf086c3..6e6e4d747cca 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -290,7 +290,7 @@ static int live_heartbeat_fast(void *arg)
int err = 0;
/* Check that the heartbeat ticks at the desired rate. */
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return 0;
for_each_engine(engine, gt, id) {
@@ -352,7 +352,7 @@ static int live_heartbeat_off(void *arg)
int err = 0;
/* Check that we can turn off heartbeat and not interrupt VIP */
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return 0;
for_each_engine(engine, gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f12ffe797639..b367ecfa42de 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -992,7 +992,7 @@ static int live_timeslice_preempt(void *arg)
* need to preempt the current task and replace it with another
* ready task.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
@@ -1122,7 +1122,7 @@ static int live_timeslice_rewind(void *arg)
* but only a few of those requests, forcing us to rewind the
* RING_TAIL of the original request.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
for_each_engine(engine, gt, id) {
@@ -1299,7 +1299,7 @@ static int live_timeslice_queue(void *arg)
* ELSP[1] is already occupied, so must rely on timeslicing to
* eject ELSP[0] in favour of the queue.)
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
@@ -1420,7 +1420,7 @@ static int live_timeslice_nopreempt(void *arg)
* We should not timeslice into a request that is marked with
* I915_REQUEST_NOPREEMPT.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
if (igt_spinner_init(&spin, gt))
@@ -2260,7 +2260,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
int err;
/* Preempt cancel non-preemptible spinner in ELSP0 */
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(arg->engine->gt))
@@ -2316,7 +2316,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
struct i915_request *rq;
int err;
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(engine->gt))
@@ -3375,7 +3375,7 @@ static int live_preempt_timeout(void *arg)
* Check that we force preemption to occur by cancelling the previous
* context if it refuses to yield the GPU.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(gt))
@@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
if (batch) {
struct i915_address_space *vm;
- vm = i915_gem_context_get_vm_rcu(ctx);
+ vm = i915_gem_context_get_eb_vm(ctx);
vma = i915_vma_instance(batch, vm, NULL);
i915_vm_put(vm);
if (IS_ERR(vma))
@@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
- ve[n] = intel_engine_create_virtual(siblings, nsibling);
+ ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve[n])) {
err = PTR_ERR(ve[n]);
nctx = n;
@@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine.
*/
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
i915_request_add(rq);
}
- ce = intel_engine_create_virtual(siblings, nsibling);
+ ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
/* XXX We do not handle oversubscription and fairness with normal rq */
for (n = 0; n < nsibling; n++) {
- ce = intel_engine_create_virtual(siblings, nsibling);
+ ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
if (err)
goto out_scratch;
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_scratch;
@@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_spin;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 2c1ed32ca5ac..7e2d99dd012d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -117,7 +117,7 @@ static struct i915_request *
hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
struct intel_gt *gt = h->gt;
- struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
+ struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx);
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_vma *hws, *vma;
@@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (err)
pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n",
engine->name, rq->fence.context,
- rq->fence.seqno, rq->context->guc_id, err);
+ rq->fence.seqno, rq->context->guc_id.id, err);
}
skip:
@@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
if (err)
pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n",
engine->name, rq->fence.context,
- rq->fence.seqno, rq->context->guc_id, err);
+ rq->fence.seqno, rq->context->guc_id.id, err);
}
count++;
@@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n",
engine->name, test_name,
rq->fence.context,
- rq->fence.seqno, rq->context->guc_id);
+ rq->fence.seqno, rq->context->guc_id.id);
i915_request_put(rq);
GEM_TRACE_DUMP();
@@ -1596,7 +1596,7 @@ static int igt_reset_evict_ppgtt(void *arg)
if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
return 0;
- ppgtt = i915_ppgtt_create(gt);
+ ppgtt = i915_ppgtt_create(gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index e623ac45f4aa..962e91ba3be4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -66,7 +66,7 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
memset(lists, 0, sizeof(*lists));
wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
- gt_init_workarounds(gt->i915, &lists->gt_wa_list);
+ gt_init_workarounds(gt, &lists->gt_wa_list);
wa_init_finish(&lists->gt_wa_list);
for_each_engine(engine, gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff582222aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+ INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index fbfcae727d7f..6e228343e8cb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -3,6 +3,7 @@
* Copyright © 2014-2019 Intel Corporation
*/
+#include "gem/i915_gem_lmem.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"
@@ -647,7 +648,14 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
u64 flags;
int ret;
- obj = i915_gem_object_create_shmem(gt->i915, size);
+ if (HAS_LMEM(gt->i915))
+ obj = i915_gem_object_create_lmem(gt->i915, size,
+ I915_BO_ALLOC_CPU_CLEAR |
+ I915_BO_ALLOC_CONTIGUOUS |
+ I915_BO_ALLOC_PM_EARLY);
+ else
+ obj = i915_gem_object_create_shmem(gt->i915, size);
+
if (IS_ERR(obj))
return ERR_CAST(obj);
@@ -748,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
}
}
}
+
+void intel_guc_write_barrier(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
+ /*
+ * Ensure intel_uncore_write_fw can be used rather than
+ * intel_uncore_write.
+ */
+ GEM_BUG_ON(guc->send_regs.fw_domains);
+
+ /*
+ * This register is used by the i915 and GuC for MMIO based
+ * communication. Once we are in this code CTBs are the only
+ * method the i915 uses to communicate with the GuC so it is
+ * safe to write to this register (a value of 0 is NOP for MMIO
+ * communication). If we ever start mixing CTBs and MMIOs a new
+ * register will have to be chosen. This function is also used
+ * to enforce ordering of a work queue item write and an update
+ * to the process descriptor. When a work queue is being used,
+ * CTBs are also the only mechanism of communication.
+ */
+ intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
+ } else {
+ /* wmb() sufficient for a barrier if in smem */
+ wmb();
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2e27fe59786b..31cf9fb48c7e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -22,74 +22,155 @@
struct __guc_ads_blob;
-/*
- * Top level structure of GuC. It handles firmware loading and manages client
- * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
- * submission.
+/**
+ * struct intel_guc - Top level structure of GuC.
+ *
+ * It handles firmware loading and manages client pool. intel_guc owns an
+ * i915_sched_engine for submission.
*/
struct intel_guc {
+ /** @fw: the GuC firmware */
struct intel_uc_fw fw;
+ /** @log: sub-structure containing GuC log related data and objects */
struct intel_guc_log log;
+ /** @ct: the command transport communication channel */
struct intel_guc_ct ct;
+ /** @slpc: sub-structure containing SLPC related data and objects */
struct intel_guc_slpc slpc;
- /* Global engine used to submit requests to GuC */
+ /** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
+ /**
+ * @stalled_request: if GuC can't process a request for any reason, we
+ * save it until GuC restarts processing. No other request can be
+ * submitted until the stalled request is processed.
+ */
struct i915_request *stalled_request;
+ /**
+ * @submission_stall_reason: reason why submission is stalled
+ */
+ enum {
+ STALL_NONE,
+ STALL_REGISTER_CONTEXT,
+ STALL_MOVE_LRC_TAIL,
+ STALL_ADD_REQUEST,
+ } submission_stall_reason;
/* intel_guc_recv interrupt related state */
+ /** @irq_lock: protects GuC irq state */
spinlock_t irq_lock;
+ /**
+ * @msg_enabled_mask: mask of events that are processed when receiving
+ * an INTEL_GUC_ACTION_DEFAULT G2H message.
+ */
unsigned int msg_enabled_mask;
+ /**
+ * @outstanding_submission_g2h: number of outstanding GuC to Host
+ * responses related to GuC submission, used to determine if the GT is
+ * idle
+ */
atomic_t outstanding_submission_g2h;
+ /** @interrupts: pointers to GuC interrupt-managing functions. */
struct {
void (*reset)(struct intel_guc *guc);
void (*enable)(struct intel_guc *guc);
void (*disable)(struct intel_guc *guc);
} interrupts;
- /*
- * contexts_lock protects the pool of free guc ids and a linked list of
- * guc ids available to be stolen
+ /**
+ * @submission_state: sub-structure for submission state protected by
+ * single lock
+ */
+ struct {
+ /**
+ * @lock: protects everything in submission_state,
+ * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
+ * out of zero
+ */
+ spinlock_t lock;
+ /**
+ * @guc_ids: used to allocate new guc_ids, single-lrc
+ */
+ struct ida guc_ids;
+ /**
+ * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+ */
+ unsigned long *guc_ids_bitmap;
+ /**
+ * @guc_id_list: list of intel_context with valid guc_ids but no
+ * refs
+ */
+ struct list_head guc_id_list;
+ /**
+ * @destroyed_contexts: list of contexts waiting to be destroyed
+ * (deregistered with the GuC)
+ */
+ struct list_head destroyed_contexts;
+ /**
+ * @destroyed_worker: worker to deregister contexts, need as we
+ * need to take a GT PM reference and can't from destroy
+ * function as it might be in an atomic context (no sleeping)
+ */
+ struct work_struct destroyed_worker;
+ } submission_state;
+
+ /**
+ * @submission_supported: tracks whether we support GuC submission on
+ * the current platform
*/
- spinlock_t contexts_lock;
- struct ida guc_ids;
- struct list_head guc_id_list;
-
bool submission_supported;
+ /** @submission_selected: tracks whether the user enabled GuC submission */
bool submission_selected;
+ /**
+ * @rc_supported: tracks whether we support GuC rc on the current platform
+ */
bool rc_supported;
+ /** @rc_selected: tracks whether the user enabled GuC rc */
bool rc_selected;
+ /** @ads_vma: object allocated to hold the GuC ADS */
struct i915_vma *ads_vma;
+ /** @ads_blob: contents of the GuC ADS */
struct __guc_ads_blob *ads_blob;
+ /** @ads_regset_size: size of the save/restore regsets in the ADS */
u32 ads_regset_size;
+ /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool;
+ /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */
void *lrc_desc_pool_vaddr;
- /* guc_id to intel_context lookup */
+ /**
+ * @context_lookup: used to resolve intel_context from guc_id, if a
+ * context is present in this structure it is registered with the GuC
+ */
struct xarray context_lookup;
- /* Control params for fw initialization */
+ /** @params: Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
- /* GuC's FW specific registers used in MMIO send */
+ /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */
struct {
u32 base;
unsigned int count;
enum forcewake_domains fw_domains;
} send_regs;
- /* register used to send interrupts to the GuC FW */
+ /** @notify_reg: register used to send interrupts to the GuC FW */
i915_reg_t notify_reg;
- /* Store msg (e.g. log flush) that we see while CTBs are disabled */
+ /**
+ * @mmio_msg: notification bitmask that the GuC writes in one of its
+ * registers when the CT channel is disabled, to be processed when the
+ * channel is back up.
+ */
u32 mmio_msg;
- /* To serialize the intel_guc_send actions */
+ /** @send_mutex: used to serialize the intel_guc_send actions */
struct mutex send_mutex;
};
@@ -295,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc);
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
+void intel_guc_write_barrier(struct intel_guc *guc);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 6926919bcac6..621c893a009f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
for_each_engine(engine, gt, id) {
u8 guc_class = engine_class_to_guc_class(engine->class);
- system_info->mapping_table[guc_class][engine->instance] =
+ system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
engine->instance;
}
}
@@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt);
}
+#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
+#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
static int guc_prep_golden_context(struct intel_guc *guc,
struct __guc_ads_blob *blob)
{
@@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc,
if (!blob)
continue;
- blob->ads.eng_state_size[guc_class] = real_size;
+ /*
+ * This interface is slightly confusing. We need to pass the
+ * base address of the full golden context and the size of just
+ * the engine state, which is the section of the context image
+ * that starts after the execlists context. This is required to
+ * allow the GuC to restore just the engine state when a
+ * watchdog reset occurs.
+ * We calculate the engine state size by removing the size of
+ * what comes before it in the context image (which is identical
+ * on all engines).
+ */
+ blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE;
blob->ads.golden_context_lrca[guc_class] = addr_ggtt;
addr_ggtt += alloc_size;
}
@@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc)
u8 engine_class, guc_class;
u8 *ptr;
- /* Skip execlist and PPGTT registers + HWSP */
- const u32 lr_hw_context_size = 80 * sizeof(u32);
- const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE +
- lr_hw_context_size;
-
if (!intel_uc_uses_guc_submission(&gt->uc))
return;
@@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc)
continue;
}
- GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size);
+ GEM_BUG_ON(blob->ads.eng_state_size[guc_class] !=
+ real_size - LRC_SKIP_SIZE);
GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt);
addr_ggtt += alloc_size;
- shmem_read(engine->default_state, skip_size, ptr + skip_size,
- real_size - skip_size);
+ shmem_read(engine->default_state, 0, ptr, real_size);
ptr += alloc_size;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 22b4733b55e2..a0cc34be7b56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -168,12 +168,15 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type,
FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr),
FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr),
};
+ int ret;
GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
GEM_BUG_ON(size % SZ_4K);
/* CT registration must go over MMIO */
- return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+ return ret > 0 ? -EPROTO : ret;
}
static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
@@ -188,8 +191,8 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
err = guc_action_register_ct_buffer(ct_to_guc(ct), type,
desc_addr, buff_addr, size);
if (unlikely(err))
- CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n",
- guc_ct_buffer_type_to_str(type), err);
+ CT_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+ guc_ct_buffer_type_to_str(type), ERR_PTR(err));
return err;
}
@@ -201,11 +204,14 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB),
FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type),
};
+ int ret;
GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
/* CT deregistration must go over MMIO */
- return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+ return ret > 0 ? -EPROTO : ret;
}
static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
@@ -213,8 +219,8 @@ static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
if (unlikely(err))
- CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n",
- guc_ct_buffer_type_to_str(type), err);
+ CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n",
+ guc_ct_buffer_type_to_str(type), ERR_PTR(err));
return err;
}
@@ -377,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence;
}
-static void write_barrier(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_gt *gt = guc_to_gt(guc);
-
- if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
- GEM_BUG_ON(guc->send_regs.fw_domains);
- /*
- * This register is used by the i915 and GuC for MMIO based
- * communication. Once we are in this code CTBs are the only
- * method the i915 uses to communicate with the GuC so it is
- * safe to write to this register (a value of 0 is NOP for MMIO
- * communication). If we ever start mixing CTBs and MMIOs a new
- * register will have to be chosen.
- */
- intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
- } else {
- /* wmb() sufficient for a barrier if in smem */
- wmb();
- }
-}
-
static int ct_write(struct intel_guc_ct *ct,
const u32 *action,
u32 len /* in dwords */,
@@ -468,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct,
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
*/
- write_barrier(ct);
+ intel_guc_write_barrier(ct_to_guc(ct));
/* update local copies */
ctb->tail = tail;
@@ -522,9 +506,6 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
#undef done
- if (unlikely(err))
- DRM_ERROR("CT: fence %u err %d\n", req->fence, err);
-
*status = req->status;
return err;
}
@@ -722,8 +703,11 @@ retry:
err = wait_for_ct_request_update(&request, status);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
- if (unlikely(err))
+ if (unlikely(err)) {
+ CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+ action[0], request.fence);
goto unlink;
+ }
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
err = -EIO;
@@ -775,8 +759,8 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
- CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n",
- action[0], ret, status);
+ CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+ action[0], ERR_PTR(ret), status);
} else if (unlikely(ret)) {
CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
action[0], ret, ret);
@@ -1042,9 +1026,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w)
container_of(w, struct intel_guc_ct, requests.worker);
bool done;
- done = ct_process_incoming_requests(ct);
- if (!done)
- queue_work(system_unbound_wq, &ct->requests.worker);
+ do {
+ done = ct_process_incoming_requests(ct);
+ } while (!done);
}
static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 887c8c8f35db..25f09a420561 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -5,14 +5,14 @@
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
+#include "gt/uc/intel_guc_ads.h"
+#include "gt/uc/intel_guc_ct.h"
+#include "gt/uc/intel_guc_slpc.h"
+#include "gt/uc/intel_guc_submission.h"
#include "intel_guc.h"
#include "intel_guc_debugfs.h"
#include "intel_guc_log_debugfs.h"
-#include "gt/uc/intel_guc_ct.h"
-#include "gt/uc/intel_guc_ads.h"
-#include "gt/uc/intel_guc_submission.h"
-#include "gt/uc/intel_guc_slpc.h"
static int guc_info_show(struct seq_file *m, void *data)
{
@@ -35,7 +35,7 @@ static int guc_info_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_info);
static int guc_registered_contexts_show(struct seq_file *m, void *data)
{
@@ -49,7 +49,7 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
static int guc_slpc_info_show(struct seq_file *m, void *unused)
{
@@ -62,7 +62,7 @@ static int guc_slpc_info_show(struct seq_file *m, void *unused)
return intel_guc_slpc_print_info(slpc, &p);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
static bool intel_eval_slpc_support(void *data)
{
@@ -73,7 +73,7 @@ static bool intel_eval_slpc_support(void *data)
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "guc_info", &guc_info_fops, NULL },
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 76fe766ad1bc..196424be0998 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -41,18 +41,21 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
}
/* Copy RSA signature from the fw image to HW for verification */
-static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
- struct intel_uncore *uncore)
+static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
{
u32 rsa[UOS_RSA_SCRATCH_COUNT];
size_t copied;
int i;
copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
- GEM_BUG_ON(copied < sizeof(rsa));
+ if (copied < sizeof(rsa))
+ return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
+
+ return 0;
}
/*
@@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc)
* by the DMA engine in one operation, whereas the RSA signature is
* loaded via MMIO.
*/
- guc_xfer_rsa(&guc->fw, uncore);
+ ret = guc_xfer_rsa(&guc->fw, uncore);
+ if (ret)
+ goto out;
/*
* Current uCode expects the code to be loaded at 8k; locations below
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..722933e26347 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,27 +52,27 @@
#define GUC_DOORBELL_INVALID 256
-#define GUC_WQ_SIZE (PAGE_SIZE * 2)
-
-/* Work queue item header definitions */
+/*
+ * Work queue item header definitions
+ *
+ * Work queue is circular buffer used to submit complex (multi-lrc) submissions
+ * to the GuC. A work queue item is an entry in the circular buffer.
+ */
#define WQ_STATUS_ACTIVE 1
#define WQ_STATUS_SUSPENDED 2
#define WQ_STATUS_CMD_ERROR 3
#define WQ_STATUS_ENGINE_ID_NOT_USED 4
#define WQ_STATUS_SUSPENDED_FROM_RESET 5
-#define WQ_TYPE_SHIFT 0
-#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
-#define WQ_TARGET_SHIFT 10
-#define WQ_LEN_SHIFT 16
-#define WQ_NO_WCFLUSH_WAIT (1 << 27)
-#define WQ_PRESENT_WORKLOAD (1 << 28)
-
-#define WQ_RING_TAIL_SHIFT 20
-#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
-#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
+#define WQ_TYPE_BATCH_BUF 0x1
+#define WQ_TYPE_PSEUDO 0x2
+#define WQ_TYPE_INORDER 0x3
+#define WQ_TYPE_NOOP 0x4
+#define WQ_TYPE_MULTI_LRC 0x5
+#define WQ_TYPE_MASK GENMASK(7, 0)
+#define WQ_LEN_MASK GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK GENMASK(28, 18)
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
@@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 reserved[36];
} __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
index 64e0b86bf258..46026c2c1722 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -6,7 +6,7 @@
#include <linux/fs.h>
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_guc.h"
#include "intel_guc_log.h"
#include "intel_guc_log_debugfs.h"
@@ -17,7 +17,7 @@ static int guc_log_dump_show(struct seq_file *m, void *data)
return intel_guc_log_dump(m->private, &p, false);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
{
@@ -25,7 +25,7 @@ static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
return intel_guc_log_dump(m->private, &p, true);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
static int guc_log_level_get(void *data, u64 *val)
{
@@ -109,7 +109,7 @@ static const struct file_operations guc_log_relay_fops = {
void intel_guc_log_debugfs_register(struct intel_guc_log *log,
struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "guc_log_dump", &guc_log_dump_fops, NULL },
{ "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL },
{ "guc_log_level", &guc_log_level_fops, NULL },
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 87d8dc8f51b9..38b47e73e35d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
@@ -28,21 +29,6 @@
/**
* DOC: GuC-based command submission
*
- * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
- * firmware is moving to an updated submission interface and we plan to
- * turn submission back on when that lands. The below documentation (and related
- * code) matches the old submission model and will be updated as part of the
- * upgrade to the new flow.
- *
- * GuC stage descriptor:
- * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC. Currently, we only use one
- * descriptor. This stage descriptor lets the GuC know about the workqueue and
- * process descriptor. Theoretically, it also lets the GuC know about our HW
- * contexts (context ID, etc...), but we actually employ a kind of submission
- * where the GuC uses the LRCA sent via the work item instead. This is called
- * a "proxy" submission.
- *
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
* a value to the action register (SOFT_SCRATCH_0) along with any data. It then
@@ -51,14 +37,85 @@
* processes the request. The kernel driver polls waiting for this update and
* then proceeds.
*
- * Work Items:
- * There are several types of work items that the host may place into a
- * workqueue, each with its own requirements and limitations. Currently only
- * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
- * represents in-order queue. The kernel driver packs ring tail pointer and an
- * ELSP context descriptor dword into Work Item.
- * See guc_add_request()
+ * Command Transport buffers (CTBs):
+ * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
+ * - G2H) are a message interface between the i915 and GuC.
+ *
+ * Context registration:
+ * Before a context can be submitted it must be registered with the GuC via a
+ * H2G. A unique guc_id is associated with each context. The context is either
+ * registered at request creation time (normal operation) or at submission time
+ * (abnormal operation, e.g. after a reset).
+ *
+ * Context submission:
+ * The i915 updates the LRC tail value in memory. The i915 must enable the
+ * scheduling of the context within the GuC for the GuC to actually consider it.
+ * Therefore, the first time a disabled context is submitted we use a schedule
+ * enable H2G, while follow up submissions are done via the context submit H2G,
+ * which informs the GuC that a previously enabled context has new work
+ * available.
+ *
+ * Context unpin:
+ * To unpin a context a H2G is used to disable scheduling. When the
+ * corresponding G2H returns indicating the scheduling disable operation has
+ * completed it is safe to unpin the context. While a disable is in flight it
+ * isn't safe to resubmit the context so a fence is used to stall all future
+ * requests of that context until the G2H is returned.
+ *
+ * Context deregistration:
+ * Before a context can be destroyed or if we steal its guc_id we must
+ * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
+ * safe to submit anything to this guc_id until the deregister completes so a
+ * fence is used to stall all requests associated with this guc_id until the
+ * corresponding G2H returns indicating the guc_id has been deregistered.
+ *
+ * submission_state.guc_ids:
+ * Unique number associated with private GuC context data passed in during
+ * context registration / submission / deregistration. 64k available. Simple ida
+ * is used for allocation.
+ *
+ * Stealing guc_ids:
+ * If no guc_ids are available they can be stolen from another context at
+ * request creation time if that context is unpinned. If a guc_id can't be found
+ * we punt this problem to the user as we believe this is near impossible to hit
+ * during normal use cases.
+ *
+ * Locking:
+ * In the GuC submission code we have 3 basic spin locks which protect
+ * everything. Details about each below.
+ *
+ * sched_engine->lock
+ * This is the submission lock for all contexts that share an i915 schedule
+ * engine (sched_engine), thus only one of the contexts which share a
+ * sched_engine can be submitting at a time. Currently only one sched_engine is
+ * used for all of GuC submission but that could change in the future.
+ *
+ * guc->submission_state.lock
+ * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
+ * list.
+ *
+ * ce->guc_state.lock
+ * Protects everything under ce->guc_state. Ensures that a context is in the
+ * correct state before issuing a H2G. e.g. We don't issue a schedule disable
+ * on a disabled context (bad idea), we don't issue a schedule enable when a
+ * schedule disable is in flight, etc... Also protects list of inflight requests
+ * on the context and the priority management state. Lock is individual to each
+ * context.
+ *
+ * Lock ordering rules:
+ * sched_engine->lock -> ce->guc_state.lock
+ * guc->submission_state.lock -> ce->guc_state.lock
*
+ * Reset races:
+ * When a full GT reset is triggered it is assumed that some G2H responses to
+ * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
+ * fatal as we do certain operations upon receiving a G2H (e.g. destroy
+ * contexts, release guc_ids, etc...). When this occurs we can scrub the
+ * context state and cleanup appropriately, however this is quite racey.
+ * To avoid races, the reset code must disable submission before scrubbing for
+ * the missing G2H, while the submission code must check for submission being
+ * disabled and skip sending H2Gs and updating context states when it is. Both
+ * sides must also make sure to hold the relevant locks.
*/
/* GuC Virtual Engine */
@@ -68,91 +125,56 @@ struct guc_virtual_engine {
};
static struct intel_context *
-guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
+guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags);
+
+static struct intel_context *
+guc_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width);
#define GUC_REQUEST_SIZE 64 /* bytes */
/*
- * Below is a set of functions which control the GuC scheduling state which do
- * not require a lock as all state transitions are mutually exclusive. i.e. It
- * is not possible for the context pinning code and submission, for the same
- * context, to be executing simultaneously. We still need an atomic as it is
- * possible for some of the bits to changing at the same time though.
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
*/
-#define SCHED_STATE_NO_LOCK_ENABLED BIT(0)
-#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
-#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
-static inline bool context_enabled(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_ENABLED);
-}
-
-static inline void set_context_enabled(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_enabled(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_pending_enable(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_PENDING_ENABLE);
-}
-
-static inline void set_context_pending_enable(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_pending_enable(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_registered(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_REGISTERED);
-}
-
-static inline void set_context_registered(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_registered(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
- &ce->guc_sched_state_no_lock);
-}
+#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16)
/*
* Below is a set of functions which control the GuC scheduling state which
- * require a lock, aside from the special case where the functions are called
- * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
- * path to be executing on the context.
+ * require a lock.
*/
#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
#define SCHED_STATE_DESTROYED BIT(1)
#define SCHED_STATE_PENDING_DISABLE BIT(2)
#define SCHED_STATE_BANNED BIT(3)
-#define SCHED_STATE_BLOCKED_SHIFT 4
+#define SCHED_STATE_ENABLED BIT(4)
+#define SCHED_STATE_PENDING_ENABLE BIT(5)
+#define SCHED_STATE_REGISTERED BIT(6)
+#define SCHED_STATE_BLOCKED_SHIFT 7
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
+
static inline void init_sched_state(struct intel_context *ce)
{
- /* Only should be called from guc_lrc_desc_pin() */
- atomic_set(&ce->guc_sched_state_no_lock, 0);
- ce->guc_state.sched_state = 0;
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
+}
+
+__maybe_unused
+static bool sched_state_is_init(struct intel_context *ce)
+{
+ /*
+ * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
+ * suspend.
+ */
+ return !(ce->guc_state.sched_state &=
+ ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
}
static inline bool
@@ -165,7 +187,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce)
static inline void
set_context_wait_for_deregister_to_register(struct intel_context *ce)
{
- /* Only should be called from guc_lrc_desc_pin() without lock */
+ lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state |=
SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
}
@@ -225,6 +247,57 @@ static inline void clr_context_banned(struct intel_context *ce)
ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
}
+static inline bool context_enabled(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
+}
+
+static inline bool context_pending_enable(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void set_context_pending_enable(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void clr_context_pending_enable(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline bool context_registered(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
+}
+
+static inline void set_context_registered(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
+}
+
+static inline void clr_context_registered(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
+}
+
static inline u32 context_blocked(struct intel_context *ce)
{
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -233,7 +306,6 @@ static inline u32 context_blocked(struct intel_context *ce)
static inline void incr_context_blocked(struct intel_context *ce)
{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
@@ -243,7 +315,6 @@ static inline void incr_context_blocked(struct intel_context *ce)
static inline void decr_context_blocked(struct intel_context *ce)
{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
@@ -251,14 +322,39 @@ static inline void decr_context_blocked(struct intel_context *ce)
ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
}
+static inline bool context_has_committed_requests(struct intel_context *ce)
+{
+ return !!ce->guc_state.number_committed_requests;
+}
+
+static inline void incr_context_committed_requests(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ++ce->guc_state.number_committed_requests;
+ GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
+}
+
+static inline void decr_context_committed_requests(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ --ce->guc_state.number_committed_requests;
+ GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
+}
+
+static struct intel_context *
+request_to_scheduling_context(struct i915_request *rq)
+{
+ return intel_context_to_parent(rq->context);
+}
+
static inline bool context_guc_id_invalid(struct intel_context *ce)
{
- return ce->guc_id == GUC_INVALID_LRC_ID;
+ return ce->guc_id.id == GUC_INVALID_LRC_ID;
}
static inline void set_context_guc_id_invalid(struct intel_context *ce)
{
- ce->guc_id = GUC_INVALID_LRC_ID;
+ ce->guc_id.id = GUC_INVALID_LRC_ID;
}
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
@@ -271,6 +367,104 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
}
+/*
+ * When using multi-lrc submission a scratch memory area is reserved in the
+ * parent's context state for the process descriptor, work queue, and handshake
+ * between the parent + children contexts to insert safe preemption points
+ * between each of the BBs. Currently the scratch area is sized to a page.
+ *
+ * The layout of this scratch area is below:
+ * 0 guc_process_desc
+ * + sizeof(struct guc_process_desc) child go
+ * + CACHELINE_BYTES child join[0]
+ * ...
+ * + CACHELINE_BYTES child join[n - 1]
+ * ... unused
+ * PARENT_SCRATCH_SIZE / 2 work queue start
+ * ... work queue
+ * PARENT_SCRATCH_SIZE - 1 work queue end
+ */
+#define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
+#define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
+
+struct sync_semaphore {
+ u32 semaphore;
+ u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parent_scratch {
+ struct guc_process_desc pdesc;
+
+ struct sync_semaphore go;
+ struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
+
+ u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) -
+ sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
+
+ u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+static u32 __get_parent_scratch_offset(struct intel_context *ce)
+{
+ GEM_BUG_ON(!ce->parallel.guc.parent_page);
+
+ return ce->parallel.guc.parent_page * PAGE_SIZE;
+}
+
+static u32 __get_wq_offset(struct intel_context *ce)
+{
+ BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
+
+ return __get_parent_scratch_offset(ce) + WQ_OFFSET;
+}
+
+static struct parent_scratch *
+__get_parent_scratch(struct intel_context *ce)
+{
+ BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
+ BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
+
+ /*
+ * Need to subtract LRC_STATE_OFFSET here as the
+ * parallel.guc.parent_page is the offset into ce->state while
+ * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
+ */
+ return (struct parent_scratch *)
+ (ce->lrc_reg_state +
+ ((__get_parent_scratch_offset(ce) -
+ LRC_STATE_OFFSET) / sizeof(u32)));
+}
+
+static struct guc_process_desc *
+__get_process_desc(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->pdesc;
+}
+
+static u32 *get_wq_pointer(struct guc_process_desc *desc,
+ struct intel_context *ce,
+ u32 wqi_size)
+{
+ /*
+ * Check for space in work queue. Caching a value of head pointer in
+ * intel_context structure in order reduce the number accesses to shared
+ * GPU memory which may be across a PCIe bus.
+ */
+#define AVAILABLE_SPACE \
+ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
+ if (wqi_size > AVAILABLE_SPACE) {
+ ce->parallel.guc.wqi_head = READ_ONCE(desc->head);
+
+ if (wqi_size > AVAILABLE_SPACE)
+ return NULL;
+ }
+#undef AVAILABLE_SPACE
+
+ return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
+}
+
static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
{
struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
@@ -352,20 +546,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+static void decr_outstanding_submission_g2h(struct intel_guc *guc)
+{
+ if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
+ wake_up_all(&guc->ct.wq);
+}
+
static int guc_submission_send_busy_loop(struct intel_guc *guc,
const u32 *action,
u32 len,
u32 g2h_len_dw,
bool loop)
{
- int err;
-
- err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ /*
+ * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
+ * so we don't handle the case where we don't get a reply because we
+ * aborted the send due to the channel being busy.
+ */
+ GEM_BUG_ON(g2h_len_dw && !loop);
- if (!err && g2h_len_dw)
+ if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return err;
+ return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -421,15 +624,17 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
-static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
int err = 0;
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u32 action[3];
int len = 0;
u32 g2h_len_dw = 0;
bool enabled;
+ lockdep_assert_held(&rq->engine->sched_engine->lock);
+
/*
* Corner case where requests were sitting in the priority list or a
* request resubmitted after the context was banned.
@@ -437,41 +642,34 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
if (unlikely(intel_context_is_banned(ce))) {
i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(ce->engine);
- goto out;
+ return 0;
}
- GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
+ GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
- /*
- * Corner case where the GuC firmware was blown away and reloaded while
- * this context was pinned.
- */
- if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) {
- err = guc_lrc_desc_pin(ce, false);
- if (unlikely(err))
- goto out;
- }
+ spin_lock(&ce->guc_state.lock);
/*
* The request / context will be run on the hardware when scheduling
- * gets enabled in the unblock.
+ * gets enabled in the unblock. For multi-lrc we still submit the
+ * context to move the LRC tails.
*/
- if (unlikely(context_blocked(ce)))
+ if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
goto out;
- enabled = context_enabled(ce);
+ enabled = context_enabled(ce) || context_blocked(ce);
if (!enabled) {
action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
- action[len++] = ce->guc_id;
+ action[len++] = ce->guc_id.id;
action[len++] = GUC_CONTEXT_ENABLE;
set_context_pending_enable(ce);
intel_context_get(ce);
g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
} else {
action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
- action[len++] = ce->guc_id;
+ action[len++] = ce->guc_id.id;
}
err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
@@ -479,6 +677,18 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
trace_intel_context_sched_enable(ce);
atomic_inc(&guc->outstanding_submission_g2h);
set_context_enabled(ce);
+
+ /*
+ * Without multi-lrc KMD does the submission step (moving the
+ * lrc tail) so enabling scheduling is sufficient to submit the
+ * context. This isn't the case in multi-lrc submission as the
+ * GuC needs to move the tails, hence the need for another H2G
+ * to submit a multi-lrc context after enabling scheduling.
+ */
+ if (intel_context_is_parent(ce)) {
+ action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
+ err = intel_guc_send_nb(guc, action, len - 1, 0);
+ }
} else if (!enabled) {
clr_context_pending_enable(ce);
intel_context_put(ce);
@@ -487,9 +697,22 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
trace_i915_request_guc_submit(rq);
out:
+ spin_unlock(&ce->guc_state.lock);
return err;
}
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+{
+ int ret = __guc_add_request(guc, rq);
+
+ if (unlikely(ret == -EBUSY)) {
+ guc->stalled_request = rq;
+ guc->submission_stall_reason = STALL_ADD_REQUEST;
+ }
+
+ return ret;
+}
+
static inline void guc_set_lrc_tail(struct i915_request *rq)
{
rq->context->lrc_reg_state[CTX_RING_TAIL] =
@@ -501,6 +724,135 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
}
+static bool is_multi_lrc_rq(struct i915_request *rq)
+{
+ return intel_context_is_parallel(rq->context);
+}
+
+static bool can_merge_rq(struct i915_request *rq,
+ struct i915_request *last)
+{
+ return request_to_scheduling_context(rq) ==
+ request_to_scheduling_context(last);
+}
+
+static u32 wq_space_until_wrap(struct intel_context *ce)
+{
+ return (WQ_SIZE - ce->parallel.guc.wqi_tail);
+}
+
+static void write_wqi(struct guc_process_desc *desc,
+ struct intel_context *ce,
+ u32 wqi_size)
+{
+ BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
+
+ /*
+ * Ensure WQI are visible before updating tail
+ */
+ intel_guc_write_barrier(ce_to_guc(ce));
+
+ ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
+ (WQ_SIZE - 1);
+ WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail);
+}
+
+static int guc_wq_noop_append(struct intel_context *ce)
+{
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce));
+ u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
+
+ if (!wqi)
+ return -EBUSY;
+
+ GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+ *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw);
+ ce->parallel.guc.wqi_tail = 0;
+
+ return 0;
+}
+
+static int __guc_wq_item_append(struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+ struct intel_context *child;
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ unsigned int wqi_size = (ce->parallel.number_children + 4) *
+ sizeof(u32);
+ u32 *wqi;
+ u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+ int ret;
+
+ /* Ensure context is in correct state updating work queue */
+ GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
+ GEM_BUG_ON(context_guc_id_invalid(ce));
+ GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
+ GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
+
+ /* Insert NOOP if this work queue item will wrap the tail pointer. */
+ if (wqi_size > wq_space_until_wrap(ce)) {
+ ret = guc_wq_noop_append(ce);
+ if (ret)
+ return ret;
+ }
+
+ wqi = get_wq_pointer(desc, ce, wqi_size);
+ if (!wqi)
+ return -EBUSY;
+
+ GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+ *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw);
+ *wqi++ = ce->lrc.lrca;
+ *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
+ FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
+ *wqi++ = 0; /* fence_id */
+ for_each_child(ce, child)
+ *wqi++ = child->ring->tail / sizeof(u64);
+
+ write_wqi(desc, ce, wqi_size);
+
+ return 0;
+}
+
+static int guc_wq_item_append(struct intel_guc *guc,
+ struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+ int ret = 0;
+
+ if (likely(!intel_context_is_banned(ce))) {
+ ret = __guc_wq_item_append(rq);
+
+ if (unlikely(ret == -EBUSY)) {
+ guc->stalled_request = rq;
+ guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
+ }
+ }
+
+ return ret;
+}
+
+static bool multi_lrc_submit(struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+
+ intel_ring_set_tail(rq->ring, rq->tail);
+
+ /*
+ * We expect the front end (execbuf IOCTL) to set this flag on the last
+ * request generated from a multi-BB submission. This indicates to the
+ * backend (GuC interface) that we should submit this context thus
+ * submitting all the requests generated in parallel.
+ */
+ return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
+ intel_context_is_banned(ce);
+}
+
static int guc_dequeue_one_context(struct intel_guc *guc)
{
struct i915_sched_engine * const sched_engine = guc->sched_engine;
@@ -514,7 +866,17 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
if (guc->stalled_request) {
submit = true;
last = guc->stalled_request;
- goto resubmit;
+
+ switch (guc->submission_stall_reason) {
+ case STALL_REGISTER_CONTEXT:
+ goto register_context;
+ case STALL_MOVE_LRC_TAIL:
+ goto move_lrc_tail;
+ case STALL_ADD_REQUEST:
+ goto add_request;
+ default:
+ MISSING_CASE(guc->submission_stall_reason);
+ }
}
while ((rb = rb_first_cached(&sched_engine->queue))) {
@@ -522,8 +884,8 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
struct i915_request *rq, *rn;
priolist_for_each_request_consume(rq, rn, p) {
- if (last && rq->context != last->context)
- goto done;
+ if (last && !can_merge_rq(rq, last))
+ goto register_context;
list_del_init(&rq->sched.link);
@@ -531,33 +893,84 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
trace_i915_request_in(rq, 0);
last = rq;
- submit = true;
+
+ if (is_multi_lrc_rq(rq)) {
+ /*
+ * We need to coalesce all multi-lrc requests in
+ * a relationship into a single H2G. We are
+ * guaranteed that all of these requests will be
+ * submitted sequentially.
+ */
+ if (multi_lrc_submit(rq)) {
+ submit = true;
+ goto register_context;
+ }
+ } else {
+ submit = true;
+ }
}
rb_erase_cached(&p->node, &sched_engine->queue);
i915_priolist_free(p);
}
-done:
+
+register_context:
if (submit) {
- guc_set_lrc_tail(last);
-resubmit:
+ struct intel_context *ce = request_to_scheduling_context(last);
+
+ if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
+ !intel_context_is_banned(ce))) {
+ ret = guc_lrc_desc_pin(ce, false);
+ if (unlikely(ret == -EPIPE)) {
+ goto deadlk;
+ } else if (ret == -EBUSY) {
+ guc->stalled_request = last;
+ guc->submission_stall_reason =
+ STALL_REGISTER_CONTEXT;
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
+ goto deadlk;
+ }
+ }
+
+move_lrc_tail:
+ if (is_multi_lrc_rq(last)) {
+ ret = guc_wq_item_append(guc, last);
+ if (ret == -EBUSY) {
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
+ goto deadlk;
+ }
+ } else {
+ guc_set_lrc_tail(last);
+ }
+
+add_request:
ret = guc_add_request(guc, last);
- if (unlikely(ret == -EPIPE))
+ if (unlikely(ret == -EPIPE)) {
+ goto deadlk;
+ } else if (ret == -EBUSY) {
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
goto deadlk;
- else if (ret == -EBUSY) {
- tasklet_schedule(&sched_engine->tasklet);
- guc->stalled_request = last;
- return false;
}
}
guc->stalled_request = NULL;
+ guc->submission_stall_reason = STALL_NONE;
return submit;
deadlk:
sched_engine->tasklet.callback = NULL;
tasklet_disable_nosync(&sched_engine->tasklet);
return false;
+
+schedule_tasklet:
+ tasklet_schedule(&sched_engine->tasklet);
+ return false;
}
static void guc_submission_tasklet(struct tasklet_struct *t)
@@ -596,10 +1009,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
unsigned long index, flags;
bool pending_disable, pending_enable, deregister, destroyed, banned;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- /* Flush context */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ /*
+ * Corner case where the ref count on the object is zero but and
+ * deregister G2H was lost. In this case we don't touch the ref
+ * count and finish the destroy of the context.
+ */
+ bool do_put = kref_get_unless_zero(&ce->ref);
+
+ xa_unlock(&guc->context_lookup);
+
+ spin_lock(&ce->guc_state.lock);
/*
* Once we are at this point submission_disabled() is guaranteed
@@ -615,11 +1036,16 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
banned = context_banned(ce);
init_sched_state(ce);
+ spin_unlock(&ce->guc_state.lock);
+
+ GEM_BUG_ON(!do_put && !destroyed);
+
if (pending_enable || destroyed || deregister) {
- atomic_dec(&guc->outstanding_submission_g2h);
+ decr_outstanding_submission_g2h(guc);
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
+ intel_gt_pm_put_async(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -635,14 +1061,20 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
intel_engine_signal_breadcrumbs(ce->engine);
}
intel_context_sched_disable_unpin(ce);
- atomic_dec(&guc->outstanding_submission_g2h);
- spin_lock_irqsave(&ce->guc_state.lock, flags);
+ decr_outstanding_submission_g2h(guc);
+
+ spin_lock(&ce->guc_state.lock);
guc_blocked_fence_complete(ce);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ spin_unlock(&ce->guc_state.lock);
intel_context_put(ce);
}
+
+ if (do_put)
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
static inline bool
@@ -692,6 +1124,8 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+static void guc_flush_destroyed_contexts(struct intel_guc *guc);
+
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
{
int i;
@@ -710,6 +1144,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
guc_flush_submissions(guc);
+ guc_flush_destroyed_contexts(guc);
/*
* Handle any outstanding G2Hs before reset. Call IRQ handler directly
@@ -725,6 +1160,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
wait_for_reset(guc, &guc->outstanding_submission_g2h);
} while (!list_empty(&guc->ct.requests.incoming));
}
+
scrub_guc_desc_for_outstanding_g2h(guc);
}
@@ -796,16 +1232,14 @@ __unwind_incomplete_requests(struct intel_context *ce)
unsigned long flags;
spin_lock_irqsave(&sched_engine->lock, flags);
- spin_lock(&ce->guc_active.lock);
- list_for_each_entry_safe(rq, rn,
- &ce->guc_active.requests,
- sched.link) {
+ spin_lock(&ce->guc_state.lock);
+ list_for_each_entry_safe_reverse(rq, rn,
+ &ce->guc_state.requests,
+ sched.link) {
if (i915_request_completed(rq))
continue;
list_del_init(&rq->sched.link);
- spin_unlock(&ce->guc_active.lock);
-
__i915_request_unsubmit(rq);
/* Push the request back into the queue for later resubmission. */
@@ -816,64 +1250,111 @@ __unwind_incomplete_requests(struct intel_context *ce)
}
GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
- list_add_tail(&rq->sched.link, pl);
+ list_add(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
- spin_lock(&ce->guc_active.lock);
}
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
static void __guc_reset_context(struct intel_context *ce, bool stalled)
{
+ bool local_stalled;
struct i915_request *rq;
+ unsigned long flags;
u32 head;
+ int i, number_children = ce->parallel.number_children;
+ bool skip = false;
+ struct intel_context *parent = ce;
+
+ GEM_BUG_ON(intel_context_is_child(ce));
intel_context_get(ce);
/*
- * GuC will implicitly mark the context as non-schedulable
- * when it sends the reset notification. Make sure our state
- * reflects this change. The context will be marked enabled
- * on resubmission.
+ * GuC will implicitly mark the context as non-schedulable when it sends
+ * the reset notification. Make sure our state reflects this change. The
+ * context will be marked enabled on resubmission.
+ *
+ * XXX: If the context is reset as a result of the request cancellation
+ * this G2H is received after the schedule disable complete G2H which is
+ * wrong as this creates a race between the request cancellation code
+ * re-submitting the context and this G2H handler. This is a bug in the
+ * GuC but can be worked around in the meantime but converting this to a
+ * NOP if a pending enable is in flight as this indicates that a request
+ * cancellation has occurred.
*/
- clr_context_enabled(ce);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ if (likely(!context_pending_enable(ce)))
+ clr_context_enabled(ce);
+ else
+ skip = true;
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(skip))
+ goto out_put;
- rq = intel_context_find_active_request(ce);
- if (!rq) {
- head = ce->ring->tail;
- stalled = false;
- goto out_replay;
- }
+ /*
+ * For each context in the relationship find the hanging request
+ * resetting each context / request as needed
+ */
+ for (i = 0; i < number_children + 1; ++i) {
+ if (!intel_context_is_pinned(ce))
+ goto next_context;
- if (!i915_request_started(rq))
- stalled = false;
+ local_stalled = false;
+ rq = intel_context_find_active_request(ce);
+ if (!rq) {
+ head = ce->ring->tail;
+ goto out_replay;
+ }
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
- head = intel_ring_wrap(ce->ring, rq->head);
- __i915_request_reset(rq, stalled);
+ if (i915_request_started(rq))
+ local_stalled = true;
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ head = intel_ring_wrap(ce->ring, rq->head);
+
+ __i915_request_reset(rq, local_stalled && stalled);
out_replay:
- guc_reset_state(ce, head, stalled);
- __unwind_incomplete_requests(ce);
- intel_context_put(ce);
+ guc_reset_state(ce, head, local_stalled && stalled);
+next_context:
+ if (i != number_children)
+ ce = list_next_entry(ce, parallel.child_link);
+ }
+
+ __unwind_incomplete_requests(parent);
+out_put:
+ intel_context_put(parent);
}
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
if (unlikely(!guc_submission_initialized(guc))) {
/* Reset called during driver load? GuC not yet initialised! */
return;
}
- xa_for_each(&guc->context_lookup, index, ce)
- if (intel_context_is_pinned(ce))
+ xa_lock_irqsave(&guc->context_lookup, flags);
+ xa_for_each(&guc->context_lookup, index, ce) {
+ if (!kref_get_unless_zero(&ce->ref))
+ continue;
+
+ xa_unlock(&guc->context_lookup);
+
+ if (intel_context_is_pinned(ce) &&
+ !intel_context_is_child(ce))
__guc_reset_context(ce, stalled);
+ intel_context_put(ce);
+
+ xa_lock(&guc->context_lookup);
+ }
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
/* GuC is blown away, drop all references to contexts */
xa_destroy(&guc->context_lookup);
}
@@ -886,10 +1367,10 @@ static void guc_cancel_context_requests(struct intel_context *ce)
/* Mark all executing requests as skipped. */
spin_lock_irqsave(&sched_engine->lock, flags);
- spin_lock(&ce->guc_active.lock);
- list_for_each_entry(rq, &ce->guc_active.requests, sched.link)
+ spin_lock(&ce->guc_state.lock);
+ list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
i915_request_put(i915_request_mark_eio(rq));
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
@@ -948,11 +1429,25 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
- xa_for_each(&guc->context_lookup, index, ce)
- if (intel_context_is_pinned(ce))
+ xa_lock_irqsave(&guc->context_lookup, flags);
+ xa_for_each(&guc->context_lookup, index, ce) {
+ if (!kref_get_unless_zero(&ce->ref))
+ continue;
+
+ xa_unlock(&guc->context_lookup);
+
+ if (intel_context_is_pinned(ce) &&
+ !intel_context_is_child(ce))
guc_cancel_context_requests(ce);
+ intel_context_put(ce);
+
+ xa_lock(&guc->context_lookup);
+ }
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
guc_cancel_sched_engine_requests(guc->sched_engine);
/* GuC is blown away, drop all references to contexts */
@@ -981,6 +1476,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
intel_gt_unpark_heartbeats(guc_to_gt(guc));
}
+static void destroyed_worker_func(struct work_struct *w);
+
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -1003,9 +1500,17 @@ int intel_guc_submission_init(struct intel_guc *guc)
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
- spin_lock_init(&guc->contexts_lock);
- INIT_LIST_HEAD(&guc->guc_id_list);
- ida_init(&guc->guc_ids);
+ spin_lock_init(&guc->submission_state.lock);
+ INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
+ ida_init(&guc->submission_state.guc_ids);
+ INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
+ INIT_WORK(&guc->submission_state.destroyed_worker,
+ destroyed_worker_func);
+
+ guc->submission_state.guc_ids_bitmap =
+ bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+ if (!guc->submission_state.guc_ids_bitmap)
+ return -ENOMEM;
return 0;
}
@@ -1015,8 +1520,10 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->lrc_desc_pool)
return;
+ guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy(guc);
i915_sched_engine_put(guc->sched_engine);
+ bitmap_free(guc->submission_state.guc_ids_bitmap);
}
static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1027,21 +1534,28 @@ static inline void queue_request(struct i915_sched_engine *sched_engine,
list_add_tail(&rq->sched.link,
i915_sched_lookup_priolist(sched_engine, prio));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ tasklet_hi_schedule(&sched_engine->tasklet);
}
static int guc_bypass_tasklet_submit(struct intel_guc *guc,
struct i915_request *rq)
{
- int ret;
+ int ret = 0;
__i915_request_submit(rq);
trace_i915_request_in(rq, 0);
- guc_set_lrc_tail(rq);
- ret = guc_add_request(guc, rq);
- if (ret == -EBUSY)
- guc->stalled_request = rq;
+ if (is_multi_lrc_rq(rq)) {
+ if (multi_lrc_submit(rq)) {
+ ret = guc_wq_item_append(guc, rq);
+ if (!ret)
+ ret = guc_add_request(guc, rq);
+ }
+ } else {
+ guc_set_lrc_tail(rq);
+ ret = guc_add_request(guc, rq);
+ }
if (unlikely(ret == -EPIPE))
disable_submission(guc);
@@ -1049,6 +1563,16 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc,
return ret;
}
+static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
+{
+ struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
+ struct intel_context *ce = request_to_scheduling_context(rq);
+
+ return submission_disabled(guc) || guc->stalled_request ||
+ !i915_sched_engine_is_empty(sched_engine) ||
+ !lrc_desc_registered(guc, ce->guc_id.id);
+}
+
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
@@ -1058,8 +1582,7 @@ static void guc_submit_request(struct i915_request *rq)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&sched_engine->lock, flags);
- if (submission_disabled(guc) || guc->stalled_request ||
- !i915_sched_engine_is_empty(sched_engine))
+ if (need_tasklet(guc, rq))
queue_request(sched_engine, rq, rq_prio(rq));
else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
tasklet_hi_schedule(&sched_engine->tasklet);
@@ -1067,72 +1590,117 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static int new_guc_id(struct intel_guc *guc)
+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
- return ida_simple_get(&guc->guc_ids, 0,
- GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+ int ret;
+
+ GEM_BUG_ON(intel_context_is_child(ce));
+
+ if (intel_context_is_parent(ce))
+ ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID,
+ order_base_2(ce->parallel.number_children
+ + 1));
+ else
+ ret = ida_simple_get(&guc->submission_state.guc_ids,
+ NUMBER_MULTI_LRC_GUC_ID,
+ GUC_MAX_LRC_DESCRIPTORS,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ce->guc_id.id = ret;
+ return 0;
}
static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
+ GEM_BUG_ON(intel_context_is_child(ce));
+
if (!context_guc_id_invalid(ce)) {
- ida_simple_remove(&guc->guc_ids, ce->guc_id);
- reset_lrc_desc(guc, ce->guc_id);
+ if (intel_context_is_parent(ce))
+ bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+ ce->guc_id.id,
+ order_base_2(ce->parallel.number_children
+ + 1));
+ else
+ ida_simple_remove(&guc->submission_state.guc_ids,
+ ce->guc_id.id);
+ reset_lrc_desc(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
}
static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
unsigned long flags;
- spin_lock_irqsave(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
__release_guc_id(guc, ce);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int steal_guc_id(struct intel_guc *guc)
+static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
- struct intel_context *ce;
- int guc_id;
+ struct intel_context *cn;
- lockdep_assert_held(&guc->contexts_lock);
+ lockdep_assert_held(&guc->submission_state.lock);
+ GEM_BUG_ON(intel_context_is_child(ce));
+ GEM_BUG_ON(intel_context_is_parent(ce));
- if (!list_empty(&guc->guc_id_list)) {
- ce = list_first_entry(&guc->guc_id_list,
+ if (!list_empty(&guc->submission_state.guc_id_list)) {
+ cn = list_first_entry(&guc->submission_state.guc_id_list,
struct intel_context,
- guc_id_link);
+ guc_id.link);
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
- GEM_BUG_ON(context_guc_id_invalid(ce));
+ GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
+ GEM_BUG_ON(context_guc_id_invalid(cn));
+ GEM_BUG_ON(intel_context_is_child(cn));
+ GEM_BUG_ON(intel_context_is_parent(cn));
- list_del_init(&ce->guc_id_link);
- guc_id = ce->guc_id;
- clr_context_registered(ce);
- set_context_guc_id_invalid(ce);
- return guc_id;
+ list_del_init(&cn->guc_id.link);
+ ce->guc_id = cn->guc_id;
+
+ spin_lock(&ce->guc_state.lock);
+ clr_context_registered(cn);
+ spin_unlock(&ce->guc_state.lock);
+
+ set_context_guc_id_invalid(cn);
+
+ return 0;
} else {
return -EAGAIN;
}
}
-static int assign_guc_id(struct intel_guc *guc, u16 *out)
+static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
int ret;
- lockdep_assert_held(&guc->contexts_lock);
+ lockdep_assert_held(&guc->submission_state.lock);
+ GEM_BUG_ON(intel_context_is_child(ce));
- ret = new_guc_id(guc);
+ ret = new_guc_id(guc, ce);
if (unlikely(ret < 0)) {
- ret = steal_guc_id(guc);
+ if (intel_context_is_parent(ce))
+ return -ENOSPC;
+
+ ret = steal_guc_id(guc, ce);
if (ret < 0)
return ret;
}
- *out = ret;
+ if (intel_context_is_parent(ce)) {
+ struct intel_context *child;
+ int i = 1;
+
+ for_each_child(ce, child)
+ child->guc_id.id = ce->guc_id.id + i++;
+ }
+
return 0;
}
@@ -1142,26 +1710,28 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
int ret = 0;
unsigned long flags, tries = PIN_GUC_ID_TRIES;
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+ GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
try_again:
- spin_lock_irqsave(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+
+ might_lock(&ce->guc_state.lock);
if (context_guc_id_invalid(ce)) {
- ret = assign_guc_id(guc, &ce->guc_id);
+ ret = assign_guc_id(guc, ce);
if (ret)
goto out_unlock;
ret = 1; /* Indidcates newly assigned guc_id */
}
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
- atomic_inc(&ce->guc_id_ref);
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
+ atomic_inc(&ce->guc_id.ref);
out_unlock:
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
/*
- * -EAGAIN indicates no guc_ids are available, let's retire any
+ * -EAGAIN indicates no guc_id are available, let's retire any
* outstanding requests to see if that frees up a guc_id. If the first
* retire didn't help, insert a sleep with the timeslice duration before
* attempting to retire more requests. Double the sleep period each
@@ -1189,16 +1759,43 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
unsigned long flags;
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
+ GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
+ GEM_BUG_ON(intel_context_is_child(ce));
- if (unlikely(context_guc_id_invalid(ce)))
+ if (unlikely(context_guc_id_invalid(ce) ||
+ intel_context_is_parent(ce)))
return;
- spin_lock_irqsave(&guc->contexts_lock, flags);
- if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
- !atomic_read(&ce->guc_id_ref))
- list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
+ !atomic_read(&ce->guc_id.ref))
+ list_add_tail(&ce->guc_id.link,
+ &guc->submission_state.guc_id_list);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static int __guc_action_register_multi_lrc(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
static int __guc_action_register_context(struct intel_guc *guc,
@@ -1220,21 +1817,31 @@ static int register_context(struct intel_context *ce, bool loop)
{
struct intel_guc *guc = ce_to_guc(ce);
u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
- ce->guc_id * sizeof(struct guc_lrc_desc);
+ ce->guc_id.id * sizeof(struct guc_lrc_desc);
int ret;
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
- if (likely(!ret))
+ if (intel_context_is_parent(ce))
+ ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ ret = __guc_action_register_context(guc, ce->guc_id.id, offset,
+ loop);
+ if (likely(!ret)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_registered(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ }
return ret;
}
static int __guc_action_deregister_context(struct intel_guc *guc,
- u32 guc_id,
- bool loop)
+ u32 guc_id)
{
u32 action[] = {
INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
@@ -1243,33 +1850,38 @@ static int __guc_action_deregister_context(struct intel_guc *guc,
return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
G2H_LEN_DW_DEREGISTER_CONTEXT,
- loop);
+ true);
}
-static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
+static int deregister_context(struct intel_context *ce, u32 guc_id)
{
struct intel_guc *guc = ce_to_guc(ce);
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_deregister(ce);
- return __guc_action_deregister_context(guc, guc_id, loop);
+ return __guc_action_deregister_context(guc, guc_id);
}
-static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
+static inline void clear_children_join_go_memory(struct intel_context *ce)
{
- switch (class) {
- case RENDER_CLASS:
- return mask >> RCS0;
- case VIDEO_ENHANCEMENT_CLASS:
- return mask >> VECS0;
- case VIDEO_DECODE_CLASS:
- return mask >> VCS0;
- case COPY_ENGINE_CLASS:
- return mask >> BCS0;
- default:
- MISSING_CASE(class);
- return 0;
- }
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+ int i;
+
+ ps->go.semaphore = 0;
+ for (i = 0; i < ce->parallel.number_children + 1; ++i)
+ ps->join[i].semaphore = 0;
+}
+
+static inline u32 get_children_go_value(struct intel_context *ce)
+{
+ return __get_parent_scratch(ce)->go.semaphore;
+}
+
+static inline u32 get_children_join_value(struct intel_context *ce,
+ u8 child_index)
+{
+ return __get_parent_scratch(ce)->join[child_index].semaphore;
}
static void guc_context_policy_init(struct intel_engine_cs *engine,
@@ -1285,22 +1897,20 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
}
-static inline u8 map_i915_prio_to_guc_prio(int prio);
-
static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 desc_idx = ce->guc_id;
+ u32 desc_idx = ce->guc_id.id;
struct guc_lrc_desc *desc;
- const struct i915_gem_context *ctx;
- int prio = I915_CONTEXT_DEFAULT_PRIORITY;
bool context_registered;
intel_wakeref_t wakeref;
+ struct intel_context *child;
int ret = 0;
GEM_BUG_ON(!engine->mask);
+ GEM_BUG_ON(!sched_state_is_init(ce));
/*
* Ensure LRC + CT vmas are is same region as write barrier is done
@@ -1311,25 +1921,53 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
context_registered = lrc_desc_registered(guc, desc_idx);
- rcu_read_lock();
- ctx = rcu_dereference(ce->gem_context);
- if (ctx)
- prio = ctx->sched.priority;
- rcu_read_unlock();
-
reset_lrc_desc(guc, desc_idx);
set_lrc_desc_registered(guc, desc_idx, ce);
desc = __get_lrc_desc(guc, desc_idx);
desc->engine_class = engine_class_to_guc_class(engine->class);
- desc->engine_submit_mask = adjust_engine_mask(engine->class,
- engine->mask);
+ desc->engine_submit_mask = engine->logical_mask;
desc->hw_context_desc = ce->lrc.lrca;
- ce->guc_prio = map_i915_prio_to_guc_prio(prio);
- desc->priority = ce->guc_prio;
+ desc->priority = ce->guc_state.prio;
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
- init_sched_state(ce);
+
+ /*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
/*
* The context_lookup xarray is used to determine if the hardware
@@ -1340,26 +1978,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
* registering this context.
*/
if (context_registered) {
+ bool disabled;
+ unsigned long flags;
+
trace_intel_context_steal_guc_id(ce);
- if (!loop) {
+ GEM_BUG_ON(!loop);
+
+ /* Seal race with Reset */
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ disabled = submission_disabled(guc);
+ if (likely(!disabled)) {
set_context_wait_for_deregister_to_register(ce);
intel_context_get(ce);
- } else {
- bool disabled;
- unsigned long flags;
-
- /* Seal race with Reset */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- disabled = submission_disabled(guc);
- if (likely(!disabled)) {
- set_context_wait_for_deregister_to_register(ce);
- intel_context_get(ce);
- }
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(disabled)) {
- reset_lrc_desc(guc, desc_idx);
- return 0; /* Will get registered later */
- }
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(disabled)) {
+ reset_lrc_desc(guc, desc_idx);
+ return 0; /* Will get registered later */
}
/*
@@ -1367,20 +2002,18 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
* context whose guc_id was stolen.
*/
with_intel_runtime_pm(runtime_pm, wakeref)
- ret = deregister_context(ce, ce->guc_id, loop);
- if (unlikely(ret == -EBUSY)) {
- clr_context_wait_for_deregister_to_register(ce);
- intel_context_put(ce);
- } else if (unlikely(ret == -ENODEV)) {
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (unlikely(ret == -ENODEV))
ret = 0; /* Will get registered later */
- }
} else {
with_intel_runtime_pm(runtime_pm, wakeref)
ret = register_context(ce, loop);
- if (unlikely(ret == -EBUSY))
+ if (unlikely(ret == -EBUSY)) {
+ reset_lrc_desc(guc, desc_idx);
+ } else if (unlikely(ret == -ENODEV)) {
reset_lrc_desc(guc, desc_idx);
- else if (unlikely(ret == -ENODEV))
ret = 0; /* Will get registered later */
+ }
}
return ret;
@@ -1419,7 +2052,12 @@ static int guc_context_pre_pin(struct intel_context *ce,
static int guc_context_pin(struct intel_context *ce, void *vaddr)
{
- return __guc_context_pin(ce, ce->engine, vaddr);
+ int ret = __guc_context_pin(ce, ce->engine, vaddr);
+
+ if (likely(!ret && !intel_context_is_barrier(ce)))
+ intel_engine_pm_get(ce->engine);
+
+ return ret;
}
static void guc_context_unpin(struct intel_context *ce)
@@ -1428,6 +2066,9 @@ static void guc_context_unpin(struct intel_context *ce)
unpin_guc_id(guc, ce);
lrc_unpin(ce);
+
+ if (likely(!intel_context_is_barrier(ce)))
+ intel_engine_pm_put_async(ce->engine);
}
static void guc_context_post_unpin(struct intel_context *ce)
@@ -1440,7 +2081,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
- ce->guc_id,
+ ce->guc_id.id,
GUC_CONTEXT_ENABLE
};
@@ -1456,12 +2097,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
- guc_id, /* ce->guc_id not stable */
+ guc_id, /* ce->guc_id.id not stable */
GUC_CONTEXT_DISABLE
};
GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_sched_disable(ce);
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
@@ -1472,24 +2114,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
- if (!i915_sw_fence_done(&ce->guc_blocked))
- i915_sw_fence_complete(&ce->guc_blocked);
+ if (!i915_sw_fence_done(&ce->guc_state.blocked))
+ i915_sw_fence_complete(&ce->guc_state.blocked);
}
static void guc_blocked_fence_reinit(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
- GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked));
+ GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
/*
* This fence is always complete unless a pending schedule disable is
* outstanding. We arm the fence here and complete it when we receive
* the pending schedule disable complete message.
*/
- i915_sw_fence_fini(&ce->guc_blocked);
- i915_sw_fence_reinit(&ce->guc_blocked);
- i915_sw_fence_await(&ce->guc_blocked);
- i915_sw_fence_commit(&ce->guc_blocked);
+ i915_sw_fence_fini(&ce->guc_state.blocked);
+ i915_sw_fence_reinit(&ce->guc_state.blocked);
+ i915_sw_fence_await(&ce->guc_state.blocked);
+ i915_sw_fence_commit(&ce->guc_state.blocked);
}
static u16 prep_context_pending_disable(struct intel_context *ce)
@@ -1501,35 +2143,30 @@ static u16 prep_context_pending_disable(struct intel_context *ce)
guc_blocked_fence_reinit(ce);
intel_context_get(ce);
- return ce->guc_id;
+ return ce->guc_id.id;
}
static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
intel_wakeref_t wakeref;
u16 guc_id;
bool enabled;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
- /*
- * Sync with submission path, increment before below changes to context
- * state.
- */
- spin_lock(&sched_engine->lock);
incr_context_blocked(ce);
- spin_unlock(&sched_engine->lock);
enabled = context_enabled(ce);
if (unlikely(!enabled || submission_disabled(guc))) {
if (enabled)
clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- return &ce->guc_blocked;
+ return &ce->guc_state.blocked;
}
/*
@@ -1545,26 +2182,41 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_disable(guc, ce, guc_id);
- return &ce->guc_blocked;
+ return &ce->guc_state.blocked;
+}
+
+#define SCHED_STATE_MULTI_BLOCKED_MASK \
+ (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
+#define SCHED_STATE_NO_UNBLOCK \
+ (SCHED_STATE_MULTI_BLOCKED_MASK | \
+ SCHED_STATE_PENDING_DISABLE | \
+ SCHED_STATE_BANNED)
+
+static bool context_cant_unblock(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+
+ return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
+ context_guc_id_invalid(ce) ||
+ !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
+ !intel_context_is_pinned(ce);
}
static void guc_context_unblock(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
intel_wakeref_t wakeref;
bool enable;
GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_child(ce));
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (unlikely(submission_disabled(guc) ||
- !intel_context_is_pinned(ce) ||
- context_pending_disable(ce) ||
- context_blocked(ce) > 1)) {
+ context_cant_unblock(ce))) {
enable = false;
} else {
enable = true;
@@ -1573,13 +2225,7 @@ static void guc_context_unblock(struct intel_context *ce)
intel_context_get(ce);
}
- /*
- * Sync with submission path, decrement after above changes to context
- * state.
- */
- spin_lock(&sched_engine->lock);
decr_context_blocked(ce);
- spin_unlock(&sched_engine->lock);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1592,16 +2238,29 @@ static void guc_context_unblock(struct intel_context *ce)
static void guc_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
{
+ struct intel_context *block_context =
+ request_to_scheduling_context(rq);
+
if (i915_sw_fence_signaled(&rq->submit)) {
- struct i915_sw_fence *fence = guc_context_block(ce);
+ struct i915_sw_fence *fence;
+ intel_context_get(ce);
+ fence = guc_context_block(block_context);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
true);
}
- guc_context_unblock(ce);
+
+ /*
+ * XXX: Racey if context is reset, see comment in
+ * __guc_reset_context().
+ */
+ flush_work(&ce_to_guc(ce)->ct.requests.worker);
+
+ guc_context_unblock(block_context);
+ intel_context_put(ce);
}
}
@@ -1626,6 +2285,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
intel_wakeref_t wakeref;
unsigned long flags;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
guc_flush_submissions(guc);
spin_lock_irqsave(&ce->guc_state.lock, flags);
@@ -1662,7 +2323,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
if (!context_guc_id_invalid(ce))
with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_set_preemption_timeout(guc,
- ce->guc_id,
+ ce->guc_id.id,
1);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
@@ -1675,40 +2336,24 @@ static void guc_context_sched_disable(struct intel_context *ce)
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
intel_wakeref_t wakeref;
u16 guc_id;
- bool enabled;
- if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
- !lrc_desc_registered(guc, ce->guc_id)) {
- clr_context_enabled(ce);
- goto unpin;
- }
-
- if (!context_enabled(ce))
- goto unpin;
+ GEM_BUG_ON(intel_context_is_child(ce));
spin_lock_irqsave(&ce->guc_state.lock, flags);
/*
- * We have to check if the context has been disabled by another thread.
- * We also have to check if the context has been pinned again as another
- * pin operation is allowed to pass this function. Checking the pin
- * count, within ce->guc_state.lock, synchronizes this function with
- * guc_request_alloc ensuring a request doesn't slip through the
- * 'context_pending_disable' fence. Checking within the spin lock (can't
- * sleep) ensures another process doesn't pin this context and generate
- * a request before we set the 'context_pending_disable' flag here.
+ * We have to check if the context has been disabled by another thread,
+ * check if submssion has been disabled to seal a race with reset and
+ * finally check if any more requests have been committed to the
+ * context ensursing that a request doesn't slip through the
+ * 'context_pending_disable' fence.
*/
- enabled = context_enabled(ce);
- if (unlikely(!enabled || submission_disabled(guc))) {
- if (enabled)
- clr_context_enabled(ce);
+ if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
+ context_has_committed_requests(ce))) {
+ clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
goto unpin;
}
- if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) {
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- return;
- }
guc_id = prep_context_pending_disable(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1724,21 +2369,41 @@ unpin:
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
+ struct intel_gt *gt = guc_to_gt(guc);
+ unsigned long flags;
+ bool disabled;
- GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
- GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
+ lockdep_assert_held(&guc->submission_state.lock);
+ GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
+ GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
+ GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
GEM_BUG_ON(context_enabled(ce));
- clr_context_registered(ce);
- deregister_context(ce, ce->guc_id, true);
+ /* Seal race with Reset */
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ disabled = submission_disabled(guc);
+ if (likely(!disabled)) {
+ __intel_gt_pm_get(gt);
+ set_context_destroyed(ce);
+ clr_context_registered(ce);
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(disabled)) {
+ __release_guc_id(guc, ce);
+ __guc_context_destroy(ce);
+ return;
+ }
+
+ deregister_context(ce, ce->guc_id.id);
}
static void __guc_context_destroy(struct intel_context *ce)
{
- GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
+ GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
+ GEM_BUG_ON(ce->guc_state.number_committed_requests);
lrc_fini(ce);
intel_context_fini(ce);
@@ -1756,76 +2421,86 @@ static void __guc_context_destroy(struct intel_context *ce)
}
}
+static void guc_flush_destroyed_contexts(struct intel_guc *guc)
+{
+ struct intel_context *ce, *cn;
+ unsigned long flags;
+
+ GEM_BUG_ON(!submission_disabled(guc) &&
+ guc_submission_initialized(guc));
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_for_each_entry_safe(ce, cn,
+ &guc->submission_state.destroyed_contexts,
+ destroyed_link) {
+ list_del_init(&ce->destroyed_link);
+ __release_guc_id(guc, ce);
+ __guc_context_destroy(ce);
+ }
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static void deregister_destroyed_contexts(struct intel_guc *guc)
+{
+ struct intel_context *ce, *cn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_for_each_entry_safe(ce, cn,
+ &guc->submission_state.destroyed_contexts,
+ destroyed_link) {
+ list_del_init(&ce->destroyed_link);
+ guc_lrc_desc_unpin(ce);
+ }
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static void destroyed_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc,
+ submission_state.destroyed_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ int tmp;
+
+ with_intel_gt_pm(gt, tmp)
+ deregister_destroyed_contexts(guc);
+}
+
static void guc_context_destroy(struct kref *kref)
{
struct intel_context *ce = container_of(kref, typeof(*ce), ref);
- struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
struct intel_guc *guc = ce_to_guc(ce);
- intel_wakeref_t wakeref;
unsigned long flags;
- bool disabled;
+ bool destroy;
/*
* If the guc_id is invalid this context has been stolen and we can free
* it immediately. Also can be freed immediately if the context is not
* registered with the GuC or the GuC is in the middle of a reset.
*/
- if (context_guc_id_invalid(ce)) {
- __guc_context_destroy(ce);
- return;
- } else if (submission_disabled(guc) ||
- !lrc_desc_registered(guc, ce->guc_id)) {
- release_guc_id(guc, ce);
- __guc_context_destroy(ce);
- return;
- }
-
- /*
- * We have to acquire the context spinlock and check guc_id again, if it
- * is valid it hasn't been stolen and needs to be deregistered. We
- * delete this context from the list of unpinned guc_ids available to
- * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
- * returns indicating this context has been deregistered the guc_id is
- * returned to the pool of available guc_ids.
- */
- spin_lock_irqsave(&guc->contexts_lock, flags);
- if (context_guc_id_invalid(ce)) {
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
- __guc_context_destroy(ce);
- return;
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
+ !lrc_desc_registered(guc, ce->guc_id.id);
+ if (likely(!destroy)) {
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ } else {
+ __release_guc_id(guc, ce);
}
-
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
-
- /* Seal race with Reset */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- disabled = submission_disabled(guc);
- if (likely(!disabled))
- set_context_destroyed(ce);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(disabled)) {
- release_guc_id(guc, ce);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ if (unlikely(destroy)) {
__guc_context_destroy(ce);
return;
}
/*
- * We defer GuC context deregistration until the context is destroyed
- * in order to save on CTBs. With this optimization ideally we only need
- * 1 CTB to register the context during the first pin and 1 CTB to
- * deregister the context when the context is destroyed. Without this
- * optimization, a CTB would be needed every pin & unpin.
- *
- * XXX: Need to acqiure the runtime wakeref as this can be triggered
- * from context_free_worker when runtime wakeref is not held.
- * guc_lrc_desc_unpin requires the runtime as a GuC register is written
- * in H2G CTB to deregister the context. A future patch may defer this
- * H2G CTB if the runtime wakeref is zero.
+ * We use a worker to issue the H2G to deregister the context as we can
+ * take the GT PM for the first time which isn't allowed from an atomic
+ * context.
*/
- with_intel_runtime_pm(runtime_pm, wakeref)
- guc_lrc_desc_unpin(ce);
+ queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
}
static int guc_context_alloc(struct intel_context *ce)
@@ -1839,20 +2514,23 @@ static void guc_context_set_prio(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
- ce->guc_id,
+ ce->guc_id.id,
prio,
};
GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
prio > GUC_CLIENT_PRIORITY_NORMAL);
+ lockdep_assert_held(&ce->guc_state.lock);
- if (ce->guc_prio == prio || submission_disabled(guc) ||
- !context_registered(ce))
+ if (ce->guc_state.prio == prio || submission_disabled(guc) ||
+ !context_registered(ce)) {
+ ce->guc_state.prio = prio;
return;
+ }
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
- ce->guc_prio = prio;
+ ce->guc_state.prio = prio;
trace_intel_context_set_prio(ce);
}
@@ -1871,25 +2549,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio)
static inline void add_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
- lockdep_assert_held(&ce->guc_active.lock);
- GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
+ lockdep_assert_held(&ce->guc_state.lock);
+ GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
- ++ce->guc_prio_count[guc_prio];
+ ++ce->guc_state.prio_count[guc_prio];
/* Overflow protection */
- GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
+ GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
}
static inline void sub_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
- lockdep_assert_held(&ce->guc_active.lock);
- GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
+ lockdep_assert_held(&ce->guc_state.lock);
+ GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
/* Underflow protection */
- GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
+ GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
- --ce->guc_prio_count[guc_prio];
+ --ce->guc_state.prio_count[guc_prio];
}
static inline void update_context_prio(struct intel_context *ce)
@@ -1900,10 +2578,10 @@ static inline void update_context_prio(struct intel_context *ce)
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
- lockdep_assert_held(&ce->guc_active.lock);
+ lockdep_assert_held(&ce->guc_state.lock);
- for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) {
- if (ce->guc_prio_count[i]) {
+ for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
+ if (ce->guc_state.prio_count[i]) {
guc_context_set_prio(guc, ce, i);
break;
}
@@ -1918,13 +2596,14 @@ static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
static void add_to_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
+ GEM_BUG_ON(intel_context_is_child(ce));
GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
- spin_lock(&ce->guc_active.lock);
- list_move_tail(&rq->sched.link, &ce->guc_active.requests);
+ spin_lock(&ce->guc_state.lock);
+ list_move_tail(&rq->sched.link, &ce->guc_state.requests);
if (rq->guc_prio == GUC_PRIO_INIT) {
rq->guc_prio = new_guc_prio;
@@ -1936,12 +2615,12 @@ static void add_to_context(struct i915_request *rq)
}
update_context_prio(ce);
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
{
- lockdep_assert_held(&ce->guc_active.lock);
+ lockdep_assert_held(&ce->guc_state.lock);
if (rq->guc_prio != GUC_PRIO_INIT &&
rq->guc_prio != GUC_PRIO_FINI) {
@@ -1953,9 +2632,11 @@ static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
static void remove_from_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
- spin_lock_irq(&ce->guc_active.lock);
+ GEM_BUG_ON(intel_context_is_child(ce));
+
+ spin_lock_irq(&ce->guc_state.lock);
list_del_init(&rq->sched.link);
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
@@ -1965,9 +2646,11 @@ static void remove_from_context(struct i915_request *rq)
guc_prio_fini(rq, ce);
- spin_unlock_irq(&ce->guc_active.lock);
+ decr_context_committed_requests(ce);
+
+ spin_unlock_irq(&ce->guc_state.lock);
- atomic_dec(&ce->guc_id_ref);
+ atomic_dec(&ce->guc_id.ref);
i915_request_notify_execute_cb_imm(rq);
}
@@ -1992,19 +2675,35 @@ static const struct intel_context_ops guc_context_ops = {
.destroy = guc_context_destroy,
.create_virtual = guc_create_virtual,
+ .create_parallel = guc_create_parallel,
};
+static void submit_work_cb(struct irq_work *wrk)
+{
+ struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
+
+ might_lock(&rq->engine->sched_engine->lock);
+ i915_sw_fence_complete(&rq->submit);
+}
+
static void __guc_signal_context_fence(struct intel_context *ce)
{
- struct i915_request *rq;
+ struct i915_request *rq, *rn;
lockdep_assert_held(&ce->guc_state.lock);
if (!list_empty(&ce->guc_state.fences))
trace_intel_context_fence_release(ce);
- list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link)
- i915_sw_fence_complete(&rq->submit);
+ /*
+ * Use an IRQ to ensure locking order of sched_engine->lock ->
+ * ce->guc_state.lock is preserved.
+ */
+ list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
+ guc_fence_link) {
+ list_del(&rq->guc_fence_link);
+ irq_work_queue(&rq->submit_work);
+ }
INIT_LIST_HEAD(&ce->guc_state.fences);
}
@@ -2013,6 +2712,8 @@ static void guc_signal_context_fence(struct intel_context *ce)
{
unsigned long flags;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_wait_for_deregister_to_register(ce);
__guc_signal_context_fence(ce);
@@ -2022,13 +2723,28 @@ static void guc_signal_context_fence(struct intel_context *ce)
static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
{
return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
- !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) &&
+ !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
!submission_disabled(ce_to_guc(ce));
}
+static void guc_context_init(struct intel_context *ce)
+{
+ const struct i915_gem_context *ctx;
+ int prio = I915_CONTEXT_DEFAULT_PRIORITY;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(ce->gem_context);
+ if (ctx)
+ prio = ctx->sched.priority;
+ rcu_read_unlock();
+
+ ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
+ set_bit(CONTEXT_GUC_INIT, &ce->flags);
+}
+
static int guc_request_alloc(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
int ret;
@@ -2057,14 +2773,17 @@ static int guc_request_alloc(struct i915_request *rq)
rq->reserved_space -= GUC_REQUEST_SIZE;
+ if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
+ guc_context_init(ce);
+
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
- * guc_ids and creating horrible race conditions. This is especially bad
- * when guc_ids are being stolen due to over subscription. By the time
+ * guc_id and creating horrible race conditions. This is especially bad
+ * when guc_id are being stolen due to over subscription. By the time
* this function is reached, it is guaranteed that the guc_id will be
* persistent until the generated request is retired. Thus, sealing these
- * race conditions. It is still safe to fail here if guc_ids are
+ * race conditions. It is still safe to fail here if guc_id are
* exhausted and return -EAGAIN to the user indicating that they can try
* again in the future.
*
@@ -2074,7 +2793,7 @@ static int guc_request_alloc(struct i915_request *rq)
* decremented on each retire. When it is zero, a lock around the
* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
*/
- if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
+ if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
goto out;
ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
@@ -2087,7 +2806,7 @@ static int guc_request_alloc(struct i915_request *rq)
disable_submission(guc);
goto out; /* GPU will be reset */
}
- atomic_dec(&ce->guc_id_ref);
+ atomic_dec(&ce->guc_id.ref);
unpin_guc_id(guc, ce);
return ret;
}
@@ -2102,22 +2821,16 @@ out:
* schedule enable or context registration if either G2H is pending
* respectfully. Once a G2H returns, the fence is released that is
* blocking these requests (see guc_signal_context_fence).
- *
- * We can safely check the below fields outside of the lock as it isn't
- * possible for these fields to transition from being clear to set but
- * converse is possible, hence the need for the check within the lock.
*/
- if (likely(!context_wait_for_deregister_to_register(ce) &&
- !context_pending_disable(ce)))
- return 0;
-
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (context_wait_for_deregister_to_register(ce) ||
context_pending_disable(ce)) {
+ init_irq_work(&rq->submit_work, submit_work_cb);
i915_sw_fence_await(&rq->submit);
list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
}
+ incr_context_committed_requests(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return 0;
@@ -2135,8 +2848,30 @@ static int guc_virtual_context_pre_pin(struct intel_context *ce,
static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
{
struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+ int ret = __guc_context_pin(ce, engine, vaddr);
+ intel_engine_mask_t tmp, mask = ce->engine->mask;
- return __guc_context_pin(ce, engine, vaddr);
+ if (likely(!ret))
+ for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
+ intel_engine_pm_get(engine);
+
+ return ret;
+}
+
+static void guc_virtual_context_unpin(struct intel_context *ce)
+{
+ intel_engine_mask_t tmp, mask = ce->engine->mask;
+ struct intel_engine_cs *engine;
+ struct intel_guc *guc = ce_to_guc(ce);
+
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+
+ unpin_guc_id(guc, ce);
+ lrc_unpin(ce);
+
+ for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
+ intel_engine_pm_put_async(engine);
}
static void guc_virtual_context_enter(struct intel_context *ce)
@@ -2173,7 +2908,98 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.pre_pin = guc_virtual_context_pre_pin,
.pin = guc_virtual_context_pin,
- .unpin = guc_context_unpin,
+ .unpin = guc_virtual_context_unpin,
+ .post_unpin = guc_context_post_unpin,
+
+ .ban = guc_context_ban,
+
+ .cancel_request = guc_context_cancel_request,
+
+ .enter = guc_virtual_context_enter,
+ .exit = guc_virtual_context_exit,
+
+ .sched_disable = guc_context_sched_disable,
+
+ .destroy = guc_context_destroy,
+
+ .get_sibling = guc_virtual_get_sibling,
+};
+
+static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
+{
+ struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+ struct intel_guc *guc = ce_to_guc(ce);
+ int ret;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ ret = pin_guc_id(guc, ce);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return __guc_context_pin(ce, engine, vaddr);
+}
+
+static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
+{
+ struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ __intel_context_pin(ce->parallel.parent);
+ return __guc_context_pin(ce, engine, vaddr);
+}
+
+static void guc_parent_context_unpin(struct intel_context *ce)
+{
+ struct intel_guc *guc = ce_to_guc(ce);
+
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ if (ce->parallel.last_rq)
+ i915_request_put(ce->parallel.last_rq);
+ unpin_guc_id(guc, ce);
+ lrc_unpin(ce);
+}
+
+static void guc_child_context_unpin(struct intel_context *ce)
+{
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ lrc_unpin(ce);
+}
+
+static void guc_child_context_post_unpin(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ lrc_post_unpin(ce);
+ intel_context_unpin(ce->parallel.parent);
+}
+
+static void guc_child_context_destroy(struct kref *kref)
+{
+ struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
+ __guc_context_destroy(ce);
+}
+
+static const struct intel_context_ops virtual_parent_context_ops = {
+ .alloc = guc_virtual_context_alloc,
+
+ .pre_pin = guc_context_pre_pin,
+ .pin = guc_parent_context_pin,
+ .unpin = guc_parent_context_unpin,
.post_unpin = guc_context_post_unpin,
.ban = guc_context_ban,
@@ -2190,6 +3016,110 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.get_sibling = guc_virtual_get_sibling,
};
+static const struct intel_context_ops virtual_child_context_ops = {
+ .alloc = guc_virtual_context_alloc,
+
+ .pre_pin = guc_context_pre_pin,
+ .pin = guc_child_context_pin,
+ .unpin = guc_child_context_unpin,
+ .post_unpin = guc_child_context_post_unpin,
+
+ .cancel_request = guc_context_cancel_request,
+
+ .enter = guc_virtual_context_enter,
+ .exit = guc_virtual_context_exit,
+
+ .destroy = guc_child_context_destroy,
+
+ .get_sibling = guc_virtual_get_sibling,
+};
+
+/*
+ * The below override of the breadcrumbs is enabled when the user configures a
+ * context for parallel submission (multi-lrc, parent-child).
+ *
+ * The overridden breadcrumbs implements an algorithm which allows the GuC to
+ * safely preempt all the hw contexts configured for parallel submission
+ * between each BB. The contract between the i915 and GuC is if the parent
+ * context can be preempted, all the children can be preempted, and the GuC will
+ * always try to preempt the parent before the children. A handshake between the
+ * parent / children breadcrumbs ensures the i915 holds up its end of the deal
+ * creating a window to preempt between each set of BBs.
+ */
+static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs);
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs);
+
+static struct intel_context *
+guc_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width)
+{
+ struct intel_engine_cs **siblings = NULL;
+ struct intel_context *parent = NULL, *ce, *err;
+ int i, j;
+
+ siblings = kmalloc_array(num_siblings,
+ sizeof(*siblings),
+ GFP_KERNEL);
+ if (!siblings)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < width; ++i) {
+ for (j = 0; j < num_siblings; ++j)
+ siblings[j] = engines[i * num_siblings + j];
+
+ ce = intel_engine_create_virtual(siblings, num_siblings,
+ FORCE_VIRTUAL);
+ if (!ce) {
+ err = ERR_PTR(-ENOMEM);
+ goto unwind;
+ }
+
+ if (i == 0) {
+ parent = ce;
+ parent->ops = &virtual_parent_context_ops;
+ } else {
+ ce->ops = &virtual_child_context_ops;
+ intel_context_bind_parent_child(parent, ce);
+ }
+ }
+
+ parent->parallel.fence_context = dma_fence_context_alloc(1);
+
+ parent->engine->emit_bb_start =
+ emit_bb_start_parent_no_preempt_mid_batch;
+ parent->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_parent_no_preempt_mid_batch;
+ parent->engine->emit_fini_breadcrumb_dw =
+ 12 + 4 * parent->parallel.number_children;
+ for_each_child(parent, ce) {
+ ce->engine->emit_bb_start =
+ emit_bb_start_child_no_preempt_mid_batch;
+ ce->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_child_no_preempt_mid_batch;
+ ce->engine->emit_fini_breadcrumb_dw = 16;
+ }
+
+ kfree(siblings);
+ return parent;
+
+unwind:
+ if (parent)
+ intel_context_put(parent);
+ kfree(siblings);
+ return err;
+}
+
static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
{
@@ -2249,7 +3179,7 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
static void guc_bump_inflight_request_prio(struct i915_request *rq,
int prio)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
@@ -2259,7 +3189,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
!new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
return;
- spin_lock(&ce->guc_active.lock);
+ spin_lock(&ce->guc_state.lock);
if (rq->guc_prio != GUC_PRIO_FINI) {
if (rq->guc_prio != GUC_PRIO_INIT)
sub_context_inflight_prio(ce, rq->guc_prio);
@@ -2267,16 +3197,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
add_context_inflight_prio(ce, rq->guc_prio);
update_context_prio(ce);
}
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void guc_retire_inflight_request_prio(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
- spin_lock(&ce->guc_active.lock);
+ spin_lock(&ce->guc_state.lock);
guc_prio_fini(rq, ce);
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void sanitize_hwsp(struct intel_engine_cs *engine)
@@ -2310,6 +3240,8 @@ static void guc_sanitize(struct intel_engine_cs *engine)
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void setup_hwsp(struct intel_engine_cs *engine)
@@ -2385,9 +3317,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
* and even it did this code would be run again.
*/
- for_each_engine(engine, gt, id)
- if (engine->kernel_context)
- guc_kernel_context_pin(guc, engine->kernel_context);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ list_for_each_entry(ce, &engine->pinned_contexts_list,
+ pinned_contexts_link)
+ guc_kernel_context_pin(guc, ce);
+ }
}
static void guc_release(struct intel_engine_cs *engine)
@@ -2580,13 +3516,13 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
return NULL;
}
- return ce;
-}
+ if (unlikely(intel_context_is_child(ce))) {
+ drm_err(&guc_to_gt(guc)->i915->drm,
+ "Context is child, desc_idx %u", desc_idx);
+ return NULL;
+ }
-static void decr_outstanding_submission_g2h(struct intel_guc *guc)
-{
- if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
- wake_up_all(&guc->ct.wq);
+ return ce;
}
int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
@@ -2607,6 +3543,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
trace_intel_context_deregister_done(ce);
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_deregister)) {
+ ce->drop_deregister = false;
+ return 0;
+ }
+#endif
+
if (context_wait_for_deregister_to_register(ce)) {
struct intel_runtime_pm *runtime_pm =
&ce->engine->gt->i915->runtime_pm;
@@ -2622,6 +3565,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
intel_context_put(ce);
} else if (context_destroyed(ce)) {
/* Context has been destroyed */
+ intel_gt_pm_put_async(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -2652,8 +3596,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
(!context_pending_enable(ce) &&
!context_pending_disable(ce)))) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Bad context sched_state 0x%x, 0x%x, desc_idx %u",
- atomic_read(&ce->guc_sched_state_no_lock),
+ "Bad context sched_state 0x%x, desc_idx %u",
ce->guc_state.sched_state, desc_idx);
return -EPROTO;
}
@@ -2661,10 +3604,26 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
trace_intel_context_sched_done(ce);
if (context_pending_enable(ce)) {
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_schedule_enable)) {
+ ce->drop_schedule_enable = false;
+ return 0;
+ }
+#endif
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_pending_enable(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
} else if (context_pending_disable(ce)) {
bool banned;
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_schedule_disable)) {
+ ce->drop_schedule_disable = false;
+ return 0;
+ }
+#endif
+
/*
* Unpin must be done before __guc_signal_context_fence,
* otherwise a race exists between the requests getting
@@ -2721,7 +3680,12 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
- if (likely(!intel_context_is_banned(ce))) {
+ /*
+ * XXX: Racey if request cancellation has occurred, see comment in
+ * __guc_reset_context().
+ */
+ if (likely(!intel_context_is_banned(ce) &&
+ !context_blocked(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
}
@@ -2797,33 +3761,47 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
+ unsigned long flags;
/* Reset called during driver load? GuC not yet initialised! */
if (unlikely(!guc_submission_initialized(guc)))
return;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- if (!intel_context_is_pinned(ce))
+ if (!kref_get_unless_zero(&ce->ref))
continue;
+ xa_unlock(&guc->context_lookup);
+
+ if (!intel_context_is_pinned(ce))
+ goto next;
+
if (intel_engine_is_virtual(ce->engine)) {
if (!(ce->engine->mask & engine->mask))
- continue;
+ goto next;
} else {
if (ce->engine != engine)
- continue;
+ goto next;
}
- list_for_each_entry(rq, &ce->guc_active.requests, sched.link) {
+ list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
continue;
intel_engine_set_hung_context(engine, ce);
/* Can only cope with one hang at a time... */
- return;
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
+ goto done;
}
+next:
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+done:
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
@@ -2839,23 +3817,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
if (unlikely(!guc_submission_initialized(guc)))
return;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- if (!intel_context_is_pinned(ce))
+ if (!kref_get_unless_zero(&ce->ref))
continue;
+ xa_unlock(&guc->context_lookup);
+
+ if (!intel_context_is_pinned(ce))
+ goto next;
+
if (intel_engine_is_virtual(ce->engine)) {
if (!(ce->engine->mask & engine->mask))
- continue;
+ goto next;
} else {
if (ce->engine != engine)
- continue;
+ goto next;
}
- spin_lock_irqsave(&ce->guc_active.lock, flags);
- intel_engine_dump_active_requests(&ce->guc_active.requests,
+ spin_lock(&ce->guc_state.lock);
+ intel_engine_dump_active_requests(&ce->guc_state.requests,
hung_rq, m);
- spin_unlock_irqrestore(&ce->guc_active.lock, flags);
+ spin_unlock(&ce->guc_state.lock);
+
+next:
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
void intel_guc_submission_print_info(struct intel_guc *guc,
@@ -2881,7 +3870,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc,
priolist_for_each_request(rq, pl)
drm_printf(p, "guc_id=%u, seqno=%llu\n",
- rq->context->guc_id,
+ rq->context->guc_id.id,
rq->fence.seqno);
}
spin_unlock_irqrestore(&sched_engine->lock, flags);
@@ -2893,46 +3882,348 @@ static inline void guc_log_context_priority(struct drm_printer *p,
{
int i;
- drm_printf(p, "\t\tPriority: %d\n",
- ce->guc_prio);
+ drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
i < GUC_CLIENT_PRIORITY_NUM; ++i) {
drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
- i, ce->guc_prio_count[i]);
+ i, ce->guc_state.prio_count[i]);
}
drm_printf(p, "\n");
}
+static inline void guc_log_context(struct drm_printer *p,
+ struct intel_context *ce)
+{
+ drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
+ drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ drm_printf(p, "\t\tContext Pin Count: %u\n",
+ atomic_read(&ce->pin_count));
+ drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
+ atomic_read(&ce->guc_id.ref));
+ drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
+ ce->guc_state.sched_state);
+}
+
void intel_guc_submission_print_context_info(struct intel_guc *guc,
struct drm_printer *p)
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id);
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
- drm_printf(p, "\t\tContext Pin Count: %u\n",
- atomic_read(&ce->pin_count));
- drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
- atomic_read(&ce->guc_id_ref));
- drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
- ce->guc_state.sched_state,
- atomic_read(&ce->guc_sched_state_no_lock));
+ GEM_BUG_ON(intel_context_is_child(ce));
+ guc_log_context(p, ce);
guc_log_context_priority(p, ce);
+
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ struct intel_context *child;
+
+ drm_printf(p, "\t\tNumber children: %u\n",
+ ce->parallel.number_children);
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(desc->head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(desc->tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(desc->wq_status));
+
+ if (ce->engine->emit_bb_start ==
+ emit_bb_start_parent_no_preempt_mid_batch) {
+ u8 i;
+
+ drm_printf(p, "\t\tChildren Go: %u\n\n",
+ get_children_go_value(ce));
+ for (i = 0; i < ce->parallel.number_children; ++i)
+ drm_printf(p, "\t\tChildren Join: %u\n",
+ get_children_join_value(ce, i));
+ }
+
+ for_each_child(ce, child)
+ guc_log_context(p, child);
+ }
+ }
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+}
+
+static inline u32 get_children_go_addr(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce) +
+ offsetof(struct parent_scratch, go.semaphore);
+}
+
+static inline u32 get_children_join_addr(struct intel_context *ce,
+ u8 child_index)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce) +
+ offsetof(struct parent_scratch, join[child_index].semaphore);
+}
+
+#define PARENT_GO_BB 1
+#define PARENT_GO_FINI_BREADCRUMB 0
+#define CHILD_GO_BB 1
+#define CHILD_GO_FINI_BREADCRUMB 0
+static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Wait on children */
+ for (i = 0; i < ce->parallel.number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_BB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_BB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_context *parent = intel_context_to_parent(ce);
+ u32 *cs;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_BB,
+ get_children_join_addr(parent,
+ ce->parallel.child_index),
+ 0);
+
+ /* Wait on parent for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_BB;
+ *cs++ = get_children_go_addr(parent);
+ *cs++ = 0;
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /* Wait on children */
+ for (i = 0; i < ce->parallel.number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_FINI_BREADCRUMB,
+ get_children_go_addr(ce),
+ 0);
+
+ return cs;
+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+ return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ if (unlikely(skip_handshake(rq))) {
+ /*
+ * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+ * the -6 comes from the length of the emits below.
+ */
+ memset(cs, 0, sizeof(u32) *
+ (ce->engine->emit_fini_breadcrumb_dw - 6));
+ cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ } else {
+ cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+ }
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
+static u32 *
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_context *parent = intel_context_to_parent(ce);
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_FINI_BREADCRUMB,
+ get_children_join_addr(parent,
+ ce->parallel.child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_go_addr(parent);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ if (unlikely(skip_handshake(rq))) {
+ /*
+ * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
+ * the -6 comes from the length of the emits below.
+ */
+ memset(cs, 0, sizeof(u32) *
+ (ce->engine->emit_fini_breadcrumb_dw - 6));
+ cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ } else {
+ cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
}
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
}
static struct intel_context *
-guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
+guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags)
{
struct guc_virtual_engine *ve;
struct intel_guc *guc;
@@ -2981,6 +4272,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
}
ve->base.mask |= sibling->mask;
+ ve->base.logical_mask |= sibling->logical_mask;
if (n != 0 && ve->base.class != sibling->class) {
DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
@@ -3036,3 +4328,8 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
return false;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_guc.c"
+#include "selftest_guc_multi_lrc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index fc5387b410a2..ff4b6869b80b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
+ err = PTR_ERR(vaddr);
+ goto unpin_out;
}
copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
- GEM_BUG_ON(copied < huc->fw.rsa_size);
-
i915_gem_object_unpin_map(vma->obj);
+ if (copied < huc->fw.rsa_size) {
+ err = -ENOMEM;
+ goto unpin_out;
+ }
+
huc->rsa_data = vma;
return 0;
+
+unpin_out:
+ i915_vma_unpin_and_release(&vma, 0);
+ return err;
}
static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
index 5733c15fd123..15998963b863 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
@@ -5,7 +5,7 @@
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_huc.h"
#include "intel_huc_debugfs.h"
@@ -21,11 +21,11 @@ static int huc_info_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(huc_info);
void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "huc_info", &huc_info_fops, NULL },
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 86c318516e14..2fef3b0bbe95 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc)
}
/* Intermediate platforms are HuC authentication only */
- if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) {
+ if (IS_ALDERLAKE_S(i915)) {
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
return;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 089d98662f46..c2f7924295e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -6,7 +6,7 @@
#include <linux/debugfs.h>
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_guc_debugfs.h"
#include "intel_huc_debugfs.h"
#include "intel_uc.h"
@@ -32,11 +32,11 @@ static int uc_usage_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(uc_usage);
void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "usage", &uc_usage_fops, NULL },
};
struct dentry *root;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 3a16d08608a5..3aa87be4f2e4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -7,6 +7,7 @@
#include <linux/firmware.h>
#include <drm/drm_print.h>
+#include "gem/i915_gem_lmem.h"
#include "intel_uc_fw.h"
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
@@ -50,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
+ fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \
fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
@@ -370,7 +372,14 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
uc_fw->private_data_size = css->private_data_size;
- obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ if (HAS_LMEM(i915)) {
+ obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
+ if (!IS_ERR(obj))
+ obj->flags |= I915_BO_ALLOC_PM_EARLY;
+ } else {
+ obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ }
+
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto fail;
@@ -413,20 +422,25 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma dummy = {
- .node.start = uc_fw_ggtt_offset(uc_fw),
- .node.size = obj->base.size,
- .pages = obj->mm.pages,
- .vm = &ggtt->vm,
- };
+ struct i915_vma *dummy = &uc_fw->dummy;
+ u32 pte_flags = 0;
+
+ dummy->node.start = uc_fw_ggtt_offset(uc_fw);
+ dummy->node.size = obj->base.size;
+ dummy->pages = obj->mm.pages;
+ dummy->vm = &ggtt->vm;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
+ GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */
- drm_clflush_sg(dummy.pages);
+ if (i915_gem_object_has_struct_page(obj))
+ drm_clflush_sg(dummy->pages);
- ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
+ if (i915_gem_object_is_lmem(obj))
+ pte_flags |= PTE_LM;
+
+ ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
}
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
@@ -585,13 +599,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
*/
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
{
- struct sg_table *pages = uc_fw->obj->mm.pages;
+ struct intel_memory_region *mr = uc_fw->obj->mm.region;
u32 size = min_t(u32, uc_fw->rsa_size, max_len);
u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
+ struct sgt_iter iter;
+ size_t count = 0;
+ int idx;
+ /* Called during reset handling, must be atomic [no fs_reclaim] */
GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
- return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
+ idx = offset >> PAGE_SHIFT;
+ offset = offset_in_page(offset);
+ if (i915_gem_object_has_struct_page(uc_fw->obj)) {
+ struct page *page;
+
+ for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
+ u32 len = min_t(u32, size, PAGE_SIZE - offset);
+ void *vaddr;
+
+ if (idx > 0) {
+ idx--;
+ continue;
+ }
+
+ vaddr = kmap_atomic(page);
+ memcpy(dst, vaddr + offset, len);
+ kunmap_atomic(vaddr);
+
+ offset = 0;
+ dst += len;
+ size -= len;
+ count += len;
+ if (!size)
+ break;
+ }
+ } else {
+ dma_addr_t addr;
+
+ for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) {
+ u32 len = min_t(u32, size, PAGE_SIZE - offset);
+ void __iomem *vaddr;
+
+ if (idx > 0) {
+ idx--;
+ continue;
+ }
+
+ vaddr = io_mapping_map_atomic_wc(&mr->iomap,
+ addr - mr->region.start);
+ memcpy_fromio(dst, vaddr + offset, len);
+ io_mapping_unmap_atomic(vaddr);
+
+ offset = 0;
+ dst += len;
+ size -= len;
+ count += len;
+ if (!size)
+ break;
+ }
+ }
+
+ return count;
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 99bb1fe1af66..1e00bf65639e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -10,6 +10,7 @@
#include "intel_uc_fw_abi.h"
#include "intel_device_info.h"
#include "i915_gem.h"
+#include "i915_vma.h"
struct drm_printer;
struct drm_i915_private;
@@ -75,6 +76,14 @@ struct intel_uc_fw {
bool user_overridden;
size_t size;
struct drm_i915_gem_object *obj;
+ /**
+ * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
+ * vma on the stack as it can lead to a stack overflow, so we define it
+ * here. Safe to have 1 copy per uc fw because the binding is single
+ * threaded as it done during driver load (inherently single threaded)
+ * or during a GT reset (mutex guarantees single threaded).
+ */
+ struct i915_vma dummy;
/*
* The firmware build process will generate a version header file with major and
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
new file mode 100644
index 000000000000..fb0e4a7bd8ca
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright �� 2021 Intel Corporation
+ */
+
+#include "selftests/intel_scheduler_helpers.h"
+
+static struct i915_request *nop_user_request(struct intel_context *ce,
+ struct i915_request *from)
+{
+ struct i915_request *rq;
+ int ret;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ if (from) {
+ ret = i915_sw_fence_await_dma_fence(&rq->submit,
+ &from->fence, 0,
+ I915_FENCE_GFP);
+ if (ret < 0) {
+ i915_request_put(rq);
+ return ERR_PTR(ret);
+ }
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int intel_guc_scrub_ctbs(void *arg)
+{
+ struct intel_gt *gt = arg;
+ int ret = 0;
+ int i;
+ struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ engine = intel_selftest_find_any_engine(gt);
+
+ /* Submit requests and inject errors forcing G2H to be dropped */
+ for (i = 0; i < 3; ++i) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ pr_err("Failed to create context, %d: %d\n", i, ret);
+ goto err;
+ }
+
+ switch (i) {
+ case 0:
+ ce->drop_schedule_enable = true;
+ break;
+ case 1:
+ ce->drop_schedule_disable = true;
+ break;
+ case 2:
+ ce->drop_deregister = true;
+ break;
+ }
+
+ rq = nop_user_request(ce, NULL);
+ intel_context_put(ce);
+
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed to create request, %d: %d\n", i, ret);
+ goto err;
+ }
+
+ last[i] = rq;
+ }
+
+ for (i = 0; i < 3; ++i) {
+ ret = i915_request_wait(last[i], 0, HZ);
+ if (ret < 0) {
+ pr_err("Last request failed to complete: %d\n", ret);
+ goto err;
+ }
+ i915_request_put(last[i]);
+ last[i] = NULL;
+ }
+
+ /* Force all H2G / G2H to be submitted / processed */
+ intel_gt_retire_requests(gt);
+ msleep(500);
+
+ /* Scrub missing G2H */
+ intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
+
+ /* GT will not idle if G2H are lost */
+ ret = intel_gt_wait_for_idle(gt, HZ);
+ if (ret < 0) {
+ pr_err("GT failed to idle: %d\n", ret);
+ goto err;
+ }
+
+err:
+ for (i = 0; i < 3; ++i)
+ if (last[i])
+ i915_request_put(last[i]);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+ return ret;
+}
+
+int intel_guc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_guc_scrub_ctbs),
+ };
+ struct intel_gt *gt = &i915->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
new file mode 100644
index 000000000000..50953c8e8b53
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright �� 2019 Intel Corporation
+ */
+
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_reset.h"
+#include "selftests/intel_scheduler_helpers.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gem/selftests/mock_context.h"
+
+static void logical_sort(struct intel_engine_cs **engines, int num_engines)
+{
+ struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1];
+ int i, j;
+
+ for (i = 0; i < num_engines; ++i)
+ for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) {
+ if (engines[j]->logical_mask & BIT(i)) {
+ sorted[i] = engines[j];
+ break;
+ }
+ }
+
+ memcpy(*engines, *sorted,
+ sizeof(struct intel_engine_cs *) * num_engines);
+}
+
+static struct intel_context *
+multi_lrc_create_parent(struct intel_gt *gt, u8 class,
+ unsigned long flags)
+{
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int i = 0;
+
+ for_each_engine(engine, gt, id) {
+ if (engine->class != class)
+ continue;
+
+ siblings[i++] = engine;
+ }
+
+ if (i <= 1)
+ return ERR_PTR(0);
+
+ logical_sort(siblings, i);
+
+ return intel_engine_create_parallel(siblings, 1, i);
+}
+
+static void multi_lrc_context_unpin(struct intel_context *ce)
+{
+ struct intel_context *child;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ for_each_child(ce, child)
+ intel_context_unpin(child);
+ intel_context_unpin(ce);
+}
+
+static void multi_lrc_context_put(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /*
+ * Only the parent gets the creation ref put in the uAPI, the parent
+ * itself is responsible for creation ref put on the children.
+ */
+ intel_context_put(ce);
+}
+
+static struct i915_request *
+multi_lrc_nop_request(struct intel_context *ce)
+{
+ struct intel_context *child;
+ struct i915_request *rq, *child_rq;
+ int i = 0;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ for_each_child(ce, child) {
+ child_rq = intel_context_create_request(child);
+ if (IS_ERR(child_rq))
+ goto child_error;
+
+ if (++i == ce->parallel.number_children)
+ set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
+ &child_rq->fence.flags);
+ i915_request_add(child_rq);
+ }
+
+ return rq;
+
+child_error:
+ i915_request_put(rq);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
+{
+ struct intel_context *parent;
+ struct i915_request *rq;
+ int ret;
+
+ parent = multi_lrc_create_parent(gt, class, 0);
+ if (IS_ERR(parent)) {
+ pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
+ return PTR_ERR(parent);
+ } else if (!parent) {
+ pr_debug("Not enough engines in class: %d", class);
+ return 0;
+ }
+
+ rq = multi_lrc_nop_request(parent);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed creating requests: %d", ret);
+ goto out;
+ }
+
+ ret = intel_selftest_wait_for_rq(rq);
+ if (ret)
+ pr_err("Failed waiting on request: %d", ret);
+
+ i915_request_put(rq);
+
+ if (ret >= 0) {
+ ret = intel_gt_wait_for_idle(gt, HZ * 5);
+ if (ret < 0)
+ pr_err("GT failed to idle: %d\n", ret);
+ }
+
+out:
+ multi_lrc_context_unpin(parent);
+ multi_lrc_context_put(parent);
+ return ret;
+}
+
+static int intel_guc_multi_lrc_basic(void *arg)
+{
+ struct intel_gt *gt = arg;
+ unsigned int class;
+ int ret;
+
+ for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+ ret = __intel_guc_multi_lrc_basic(gt, class);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_guc_multi_lrc_basic),
+ };
+ struct intel_gt *gt = &i915->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}