summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_engine_cs.c
diff options
context:
space:
mode:
authorRodrigo Vivi <rodrigo.vivi@intel.com>2022-11-14 14:32:34 -0500
committerRodrigo Vivi <rodrigo.vivi@intel.com>2022-11-14 14:32:34 -0500
commit002c6ca75289a4ac4f6738213dd2d258704886e4 (patch)
treed99ec659974b2121cef3e06f54a97d719d0a94dd /drivers/gpu/drm/i915/gt/intel_engine_cs.c
parent801543b2593b04eef974a73d3ea03e7efbd5ffae (diff)
parentc02f20d38fb90eba606277874581db124ace42c4 (diff)
Merge drm/drm-next into drm-intel-next
Catch up on 6.1-rc cycle in order to solve the intel_backlight conflict on linux-next. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c271
1 files changed, 180 insertions, 91 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ee8a38d02e14..6ae8b07cfaa1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -488,6 +488,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
+ if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
+ __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
+ engine->class == RENDER_CLASS)
+ engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
+
+ /* features common between engines sharing EUs */
+ if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
+ engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
+ engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
+ }
+
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
engine->props.max_busywait_duration_ns =
@@ -499,20 +510,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
- /* Override to uninterruptible for OpenCL workloads. */
- if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
- engine->props.preempt_timeout_ms = 0;
-
- if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
- __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
- engine->class == RENDER_CLASS)
- engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
-
- /* features common between engines sharing EUs */
- if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
- engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
- engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
- }
+ /*
+ * Mid-thread pre-emption is not available in Gen12. Unfortunately,
+ * some compute workloads run quite long threads. That means they get
+ * reset due to not pre-empting in a timely manner. So, bump the
+ * pre-emption timeout value to be much higher for compute engines.
+ */
+ if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
+ engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
+
+ /* Cap properties according to any system limits */
+#define CLAMP_PROP(field) \
+ do { \
+ u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+ if (clamp != engine->props.field) { \
+ drm_notice(&engine->i915->drm, \
+ "Warning, clamping %s to %lld to prevent overflow\n", \
+ #field, clamp); \
+ engine->props.field = clamp; \
+ } \
+ } while (0)
+
+ CLAMP_PROP(heartbeat_interval_ms);
+ CLAMP_PROP(max_busywait_duration_ns);
+ CLAMP_PROP(preempt_timeout_ms);
+ CLAMP_PROP(stop_timeout_ms);
+ CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
engine->defaults = engine->props; /* never to change again */
@@ -536,6 +561,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
return 0;
}
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
+{
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
+{
+ value = min(value, jiffies_to_nsecs(2));
+
+ return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+ /*
+ * NB: The GuC API only supports 32bit values. However, the limit is further
+ * reduced due to internal calculations which would otherwise overflow.
+ */
+ if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
+{
+ /*
+ * NB: The GuC API only supports 32bit values. However, the limit is further
+ * reduced due to internal calculations which would otherwise overflow.
+ */
+ if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@@ -656,16 +730,83 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt,
*/
if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
return false;
- else if (GRAPHICS_VER(i915) == 12)
+ else if (MEDIA_VER(i915) >= 12)
return (physical_vdbox % 2 == 0) ||
!(BIT(physical_vdbox - 1) & vdbox_mask);
- else if (GRAPHICS_VER(i915) == 11)
+ else if (MEDIA_VER(i915) == 11)
return logical_vdbox % 2 == 0;
- MISSING_CASE(GRAPHICS_VER(i915));
return false;
}
+static void engine_mask_apply_media_fuses(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ unsigned int logical_vdbox = 0;
+ unsigned int i;
+ u32 media_fuse, fuse1;
+ u16 vdbox_mask;
+ u16 vebox_mask;
+
+ if (MEDIA_VER(gt->i915) < 11)
+ return;
+
+ /*
+ * On newer platforms the fusing register is called 'enable' and has
+ * enable semantics, while on older platforms it is called 'disable'
+ * and bits have disable semantices.
+ */
+ media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
+ if (MEDIA_VER_FULL(i915) < IP_VER(12, 50))
+ media_fuse = ~media_fuse;
+
+ vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+ vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+ GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+ if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) {
+ fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
+ gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
+ } else {
+ gt->info.sfc_mask = ~0;
+ }
+
+ for (i = 0; i < I915_MAX_VCS; i++) {
+ if (!HAS_ENGINE(gt, _VCS(i))) {
+ vdbox_mask &= ~BIT(i);
+ continue;
+ }
+
+ if (!(BIT(i) & vdbox_mask)) {
+ gt->info.engine_mask &= ~BIT(_VCS(i));
+ drm_dbg(&i915->drm, "vcs%u fused off\n", i);
+ continue;
+ }
+
+ if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
+ gt->info.vdbox_sfc_access |= BIT(i);
+ logical_vdbox++;
+ }
+ drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
+ vdbox_mask, VDBOX_MASK(gt));
+ GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
+
+ for (i = 0; i < I915_MAX_VECS; i++) {
+ if (!HAS_ENGINE(gt, _VECS(i))) {
+ vebox_mask &= ~BIT(i);
+ continue;
+ }
+
+ if (!(BIT(i) & vebox_mask)) {
+ gt->info.engine_mask &= ~BIT(_VECS(i));
+ drm_dbg(&i915->drm, "vecs%u fused off\n", i);
+ }
+ }
+ drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
+ vebox_mask, VEBOX_MASK(gt));
+ GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+}
+
static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -674,7 +815,10 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
unsigned long ccs_mask;
unsigned int i;
- if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER(i915) < 11)
+ return;
+
+ if (hweight32(CCS_MASK(gt)) <= 1)
return;
ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
@@ -696,6 +840,10 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
unsigned long meml3_mask;
unsigned long quad;
+ if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) &&
+ GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)))
+ return;
+
meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);
@@ -729,75 +877,11 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
*/
static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
{
- struct drm_i915_private *i915 = gt->i915;
struct intel_gt_info *info = &gt->info;
- struct intel_uncore *uncore = gt->uncore;
- unsigned int logical_vdbox = 0;
- unsigned int i;
- u32 media_fuse, fuse1;
- u16 vdbox_mask;
- u16 vebox_mask;
-
- info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
-
- if (GRAPHICS_VER(i915) < 11)
- return info->engine_mask;
-
- /*
- * On newer platforms the fusing register is called 'enable' and has
- * enable semantics, while on older platforms it is called 'disable'
- * and bits have disable semantices.
- */
- media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
- if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
- media_fuse = ~media_fuse;
- vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
- vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
- GEN11_GT_VEBOX_DISABLE_SHIFT;
-
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1);
- gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
- } else {
- gt->info.sfc_mask = ~0;
- }
-
- for (i = 0; i < I915_MAX_VCS; i++) {
- if (!HAS_ENGINE(gt, _VCS(i))) {
- vdbox_mask &= ~BIT(i);
- continue;
- }
-
- if (!(BIT(i) & vdbox_mask)) {
- info->engine_mask &= ~BIT(_VCS(i));
- drm_dbg(&i915->drm, "vcs%u fused off\n", i);
- continue;
- }
-
- if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
- gt->info.vdbox_sfc_access |= BIT(i);
- logical_vdbox++;
- }
- drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
- vdbox_mask, VDBOX_MASK(gt));
- GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
-
- for (i = 0; i < I915_MAX_VECS; i++) {
- if (!HAS_ENGINE(gt, _VECS(i))) {
- vebox_mask &= ~BIT(i);
- continue;
- }
-
- if (!(BIT(i) & vebox_mask)) {
- info->engine_mask &= ~BIT(_VECS(i));
- drm_dbg(&i915->drm, "vecs%u fused off\n", i);
- }
- }
- drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
- vebox_mask, VEBOX_MASK(gt));
- GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+ GEM_BUG_ON(!info->engine_mask);
+ engine_mask_apply_media_fuses(gt);
engine_mask_apply_compute_fuses(gt);
engine_mask_apply_copy_fuses(gt);
@@ -1266,8 +1350,13 @@ int intel_engines_init(struct intel_gt *gt)
return err;
err = setup(engine);
- if (err)
+ if (err) {
+ intel_engine_cleanup_common(engine);
return err;
+ }
+
+ /* The backend should now be responsible for cleanup */
+ GEM_BUG_ON(engine->release == NULL);
err = engine_init_common(engine);
if (err)
@@ -1546,11 +1635,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
for_each_ss_steering(iter, engine->gt, slice, subslice) {
instdone->sampler[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_SAMPLER_INSTDONE,
+ GEN8_SAMPLER_INSTDONE,
slice, subslice);
instdone->row[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_ROW_INSTDONE,
+ GEN8_ROW_INSTDONE,
slice, subslice);
}
@@ -1690,9 +1779,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs *engine)
return false;
/* Caller disables interrupts */
- spin_lock(&engine->gt->irq_lock);
+ spin_lock(engine->gt->irq_lock);
engine->irq_enable(engine);
- spin_unlock(&engine->gt->irq_lock);
+ spin_unlock(engine->gt->irq_lock);
return true;
}
@@ -1703,9 +1792,9 @@ void intel_engine_irq_disable(struct intel_engine_cs *engine)
return;
/* Caller disables interrupts */
- spin_lock(&engine->gt->irq_lock);
+ spin_lock(engine->gt->irq_lock);
engine->irq_disable(engine);
- spin_unlock(&engine->gt->irq_lock);
+ spin_unlock(engine->gt->irq_lock);
}
void intel_engines_reset_default_submission(struct intel_gt *gt)