summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2022-04-11 16:01:56 +0300
committerJani Nikula <jani.nikula@intel.com>2022-04-11 16:01:56 +0300
commit83970cd63b9f864525761137b500113ab0b49c94 (patch)
tree747b97113d60666cbb44f8a5633b961b7eda3c86 /drivers/gpu/drm/i915/gt
parent618f5df1f6a5a3f29fad824116da291a7d14ab5e (diff)
parent3123109284176b1532874591f7c81f3837bbdc17 (diff)
Merge drm/drm-next into drm-intel-next
Sync up with v5.18-rc1, in particular to get 5e3094cfd9fb ("drm/i915/xehpsdv: Add has_flat_ccs to device info"). Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c34
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c158
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c157
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h48
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c196
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c140
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c138
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c32
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c240
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h32
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c63
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c4
33 files changed, 1079 insertions, 439 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 1f8cf4f790b2..b1b9c3fd7bf9 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -201,6 +201,8 @@ static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
+ struct intel_engine_cs *engine = rq->engine;
+
if (mode & EMIT_FLUSH) {
u32 flags = 0;
u32 *cs;
@@ -219,6 +221,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CS_STALL;
+ if (engine->class == COMPUTE_CLASS)
+ flags &= ~PIPE_CONTROL_3D_FLAGS;
+
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -246,6 +251,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CS_STALL;
+ if (engine->class == COMPUTE_CLASS)
+ flags &= ~PIPE_CONTROL_3D_FLAGS;
+
cs = intel_ring_begin(rq, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -618,19 +626,27 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
+ struct drm_i915_private *i915 = rq->engine->i915;
+ u32 flags = (PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+ if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ /* Wa_1409600907 */
+ flags |= PIPE_CONTROL_DEPTH_STALL;
+
+ if (rq->engine->class == COMPUTE_CLASS)
+ flags &= ~PIPE_CONTROL_3D_FLAGS;
+
cs = gen12_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- /* Wa_1409600907:tgl */
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE);
+ flags);
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c43e724afa9f..f574da00eff1 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
start, end, lvl);
} else {
unsigned int count;
+ unsigned int pte = gen8_pd_index(start, 0);
+ unsigned int num_ptes;
u64 *vaddr;
count = gen8_pt_count(start, end);
@@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
atomic_read(&pt->used));
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
+ num_ptes = count;
+ if (pt->is_compact) {
+ GEM_BUG_ON(num_ptes % 16);
+ GEM_BUG_ON(pte % 16);
+ num_ptes /= 16;
+ pte /= 16;
+ }
+
vaddr = px_vaddr(pt);
- memset64(vaddr + gen8_pd_index(start, 0),
+ memset64(vaddr + pte,
vm->scratch[0]->encode,
- count);
+ num_ptes);
atomic_sub(count, &pt->used);
start += count;
@@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
return idx;
}
+static void
+xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ struct sgt_dma *iter,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
+ unsigned int rem = sg_dma_len(iter->sg);
+ u64 start = vma_res->start;
+
+ GEM_BUG_ON(!i915_vm_is_4lvl(vm));
+
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_address(vm, start);
+ struct i915_page_directory * const pd =
+ i915_pd_entry(pdp, __gen8_pte_index(start, 2));
+ struct i915_page_table *pt =
+ i915_pt_entry(pd, __gen8_pte_index(start, 1));
+ gen8_pte_t encode = pte_encode;
+ unsigned int page_size;
+ gen8_pte_t *vaddr;
+ u16 index, max;
+
+ max = I915_PDES;
+
+ if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+ rem >= I915_GTT_PAGE_SIZE_2M &&
+ !__gen8_pte_index(start, 0)) {
+ index = __gen8_pte_index(start, 1);
+ encode |= GEN8_PDE_PS_2M;
+ page_size = I915_GTT_PAGE_SIZE_2M;
+
+ vaddr = px_vaddr(pd);
+ } else {
+ if (encode & GEN12_PPGTT_PTE_LM) {
+ GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
+ GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
+ GEM_BUG_ON(!IS_ALIGNED(iter->dma,
+ I915_GTT_PAGE_SIZE_64K));
+
+ index = __gen8_pte_index(start, 0) / 16;
+ page_size = I915_GTT_PAGE_SIZE_64K;
+
+ max /= 16;
+
+ vaddr = px_vaddr(pd);
+ vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+
+ pt->is_compact = true;
+ } else {
+ GEM_BUG_ON(pt->is_compact);
+ index = __gen8_pte_index(start, 0);
+ page_size = I915_GTT_PAGE_SIZE;
+ }
+
+ vaddr = px_vaddr(pt);
+ }
+
+ do {
+ GEM_BUG_ON(rem < page_size);
+ vaddr[index++] = encode | iter->dma;
+
+ start += page_size;
+ iter->dma += page_size;
+ rem -= page_size;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg)
+ break;
+
+ rem = sg_dma_len(iter->sg);
+ if (!rem)
+ break;
+
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + rem;
+
+ if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+ break;
+ }
+ } while (rem >= page_size && index < max);
+
+ vma_res->page_sizes_gtt |= page_size;
+ } while (iter->sg && sg_dma_len(iter->sg));
+}
+
static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
struct sgt_dma *iter,
@@ -586,7 +685,10 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma_res);
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
- gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
+ if (HAS_64K_PAGES(vm->i915))
+ xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
+ else
+ gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
} else {
u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
@@ -613,13 +715,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
gen8_pdp_for_page_index(vm, idx);
struct i915_page_directory *pd =
i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+ struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
gen8_pte_t *vaddr;
- vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+ GEM_BUG_ON(pt->is_compact);
+
+ vaddr = px_vaddr(pt);
vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}
+static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ u64 idx = offset >> GEN8_PTE_SHIFT;
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_index(vm, idx);
+ struct i915_page_directory *pd =
+ i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+ struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
+ gen8_pte_t *vaddr;
+
+ GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
+ GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
+
+ if (!pt->is_compact) {
+ vaddr = px_vaddr(pd);
+ vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
+ pt->is_compact = true;
+ }
+
+ vaddr = px_vaddr(pt);
+ vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+}
+
+static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ if (flags & PTE_LM)
+ return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
+ level, flags);
+
+ return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
+}
+
static int gen8_init_scratch(struct i915_address_space *vm)
{
u32 pte_flags;
@@ -819,7 +964,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
- ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
+ if (HAS_64K_PAGES(gt->i915))
+ ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
+ else
+ ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
ppgtt->vm.clear_range = gen8_ppgtt_clear;
ppgtt->vm.foreach = gen8_ppgtt_foreach;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index be4b1e65442f..1c0ab05c3c40 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
void intel_engine_destroy_pinned_context(struct intel_context *ce);
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine);
+
#define ENGINE_PHYSICAL 0
#define ENGINE_MOCK 1
#define ENGINE_VIRTUAL 2
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 501e7b37bc26..7447411a5b26 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -23,6 +23,7 @@
#include "intel_gt.h"
#include "intel_gt_requests.h"
#include "intel_gt_pm.h"
+#include "intel_lrc.h"
#include "intel_lrc_reg.h"
#include "intel_reset.h"
#include "intel_ring.h"
@@ -158,6 +159,34 @@ static const struct engine_info intel_engines[] = {
{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
},
},
+ [CCS0] = {
+ .class = COMPUTE_CLASS,
+ .instance = 0,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }
+ }
+ },
+ [CCS1] = {
+ .class = COMPUTE_CLASS,
+ .instance = 1,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }
+ }
+ },
+ [CCS2] = {
+ .class = COMPUTE_CLASS,
+ .instance = 2,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }
+ }
+ },
+ [CCS3] = {
+ .class = COMPUTE_CLASS,
+ .instance = 3,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
+ }
+ },
};
/**
@@ -182,6 +211,8 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
switch (class) {
+ case COMPUTE_CLASS:
+ fallthrough;
case RENDER_CLASS:
switch (GRAPHICS_VER(gt->i915)) {
default:
@@ -295,6 +326,50 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
GEM_DEBUG_WARN_ON(iir);
}
+static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
+{
+ u32 reset_domain;
+
+ if (ver >= 11) {
+ static const u32 engine_reset_domains[] = {
+ [RCS0] = GEN11_GRDOM_RENDER,
+ [BCS0] = GEN11_GRDOM_BLT,
+ [VCS0] = GEN11_GRDOM_MEDIA,
+ [VCS1] = GEN11_GRDOM_MEDIA2,
+ [VCS2] = GEN11_GRDOM_MEDIA3,
+ [VCS3] = GEN11_GRDOM_MEDIA4,
+ [VCS4] = GEN11_GRDOM_MEDIA5,
+ [VCS5] = GEN11_GRDOM_MEDIA6,
+ [VCS6] = GEN11_GRDOM_MEDIA7,
+ [VCS7] = GEN11_GRDOM_MEDIA8,
+ [VECS0] = GEN11_GRDOM_VECS,
+ [VECS1] = GEN11_GRDOM_VECS2,
+ [VECS2] = GEN11_GRDOM_VECS3,
+ [VECS3] = GEN11_GRDOM_VECS4,
+ [CCS0] = GEN11_GRDOM_RENDER,
+ [CCS1] = GEN11_GRDOM_RENDER,
+ [CCS2] = GEN11_GRDOM_RENDER,
+ [CCS3] = GEN11_GRDOM_RENDER,
+ };
+ GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
+ !engine_reset_domains[id]);
+ reset_domain = engine_reset_domains[id];
+ } else {
+ static const u32 engine_reset_domains[] = {
+ [RCS0] = GEN6_GRDOM_RENDER,
+ [BCS0] = GEN6_GRDOM_BLT,
+ [VCS0] = GEN6_GRDOM_MEDIA,
+ [VCS1] = GEN8_GRDOM_MEDIA2,
+ [VECS0] = GEN6_GRDOM_VECS,
+ };
+ GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
+ !engine_reset_domains[id]);
+ reset_domain = engine_reset_domains[id];
+ }
+
+ return reset_domain;
+}
+
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
u8 logical_instance)
{
@@ -330,38 +405,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
- if (GRAPHICS_VER(gt->i915) >= 11) {
- static const u32 engine_reset_domains[] = {
- [RCS0] = GEN11_GRDOM_RENDER,
- [BCS0] = GEN11_GRDOM_BLT,
- [VCS0] = GEN11_GRDOM_MEDIA,
- [VCS1] = GEN11_GRDOM_MEDIA2,
- [VCS2] = GEN11_GRDOM_MEDIA3,
- [VCS3] = GEN11_GRDOM_MEDIA4,
- [VCS4] = GEN11_GRDOM_MEDIA5,
- [VCS5] = GEN11_GRDOM_MEDIA6,
- [VCS6] = GEN11_GRDOM_MEDIA7,
- [VCS7] = GEN11_GRDOM_MEDIA8,
- [VECS0] = GEN11_GRDOM_VECS,
- [VECS1] = GEN11_GRDOM_VECS2,
- [VECS2] = GEN11_GRDOM_VECS3,
- [VECS3] = GEN11_GRDOM_VECS4,
- };
- GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
- !engine_reset_domains[id]);
- engine->reset_domain = engine_reset_domains[id];
- } else {
- static const u32 engine_reset_domains[] = {
- [RCS0] = GEN6_GRDOM_RENDER,
- [BCS0] = GEN6_GRDOM_BLT,
- [VCS0] = GEN6_GRDOM_MEDIA,
- [VCS1] = GEN8_GRDOM_MEDIA2,
- [VECS0] = GEN6_GRDOM_VECS,
- };
- GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
- !engine_reset_domains[id]);
- engine->reset_domain = engine_reset_domains[id];
- }
+ engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
+ id);
engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
@@ -391,6 +436,12 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
+ /* features common between engines sharing EUs */
+ if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
+ engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
+ engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
+ }
+
engine->defaults = engine->props; /* never to change again */
engine->context_size = intel_engine_context_size(gt, engine->class);
@@ -543,6 +594,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt,
return false;
}
+static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_gt_info *info = &gt->info;
+ int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
+ unsigned long ccs_mask;
+ unsigned int i;
+
+ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ return;
+
+ ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
+ ss_per_ccs);
+ /*
+ * If all DSS in a quadrant are fused off, the corresponding CCS
+ * engine is not available for use.
+ */
+ for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
+ info->engine_mask &= ~BIT(_CCS(i));
+ drm_dbg(&i915->drm, "ccs%u fused off\n", i);
+ }
+}
+
/*
* Determine which engines are fused off in our particular hardware.
* Note that we have a catch-22 situation where we need to be able to access
@@ -624,6 +698,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
vebox_mask, VEBOX_MASK(gt));
GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+ engine_mask_apply_compute_fuses(gt);
+
return info->engine_mask;
}
@@ -2020,6 +2096,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
return active;
}
+void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
+{
+ /*
+ * If there are any non-fused-off CCS engines, we need to enable CCS
+ * support in the RCU_MODE register. This only needs to be done once,
+ * so for simplicity we'll take care of this in the RCS engine's
+ * resume handler; since the RCS and all CCS engines belong to the
+ * same reset domain and are reset together, this will also take care
+ * of re-applying the setting after i915-triggered resets.
+ */
+ if (!CCS_MASK(engine->gt))
+ return;
+
+ intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
+ _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "mock_engine.c"
#include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 36365bdbe1ee..19ff8758e34d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -33,7 +33,8 @@
#define VIDEO_ENHANCEMENT_CLASS 2
#define COPY_ENGINE_CLASS 3
#define OTHER_CLASS 4
-#define MAX_ENGINE_CLASS 4
+#define COMPUTE_CLASS 5
+#define MAX_ENGINE_CLASS 5
#define MAX_ENGINE_INSTANCE 7
#define I915_MAX_SLICES 3
@@ -95,6 +96,7 @@ struct i915_ctx_workarounds {
#define I915_MAX_VCS 8
#define I915_MAX_VECS 4
+#define I915_MAX_CCS 4
/*
* Engine IDs definitions.
@@ -117,6 +119,11 @@ enum intel_engine_id {
VECS2,
VECS3,
#define _VECS(n) (VECS0 + (n))
+ CCS0,
+ CCS1,
+ CCS2,
+ CCS3,
+#define _CCS(n) (CCS0 + (n))
I915_NUM_ENGINES
#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
@@ -517,6 +524,8 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
+#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
+#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
unsigned int flags;
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 9ce85a845105..b8c9b6b89003 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -47,6 +47,7 @@ static const u8 uabi_classes[] = {
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
+ /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
};
static int engine_cmp(void *priv, const struct list_head *A,
@@ -139,6 +140,7 @@ const char *intel_engine_class_repr(u8 class)
[COPY_ENGINE_CLASS] = "bcs",
[VIDEO_DECODE_CLASS] = "vcs",
[VIDEO_ENHANCEMENT_CLASS] = "vecs",
+ [COMPUTE_CLASS] = "ccs",
};
if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class])
@@ -162,6 +164,7 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
[COPY_ENGINE_CLASS] = { BCS0, 1 },
[VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS },
[VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS },
+ [COMPUTE_CLASS] = { CCS0, I915_MAX_CCS },
};
if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
@@ -190,7 +193,7 @@ static void add_legacy_ring(struct legacy_ring *ring,
void intel_engines_driver_register(struct drm_i915_private *i915)
{
struct legacy_ring ring = {};
- u8 uabi_instances[4] = {};
+ u8 uabi_instances[5] = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
LIST_HEAD(engines);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 42bfbafad32a..1c602d4ae297 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -666,9 +666,13 @@ static inline void execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- u64 desc = ce->lrc.desc;
+ u64 desc;
u32 tail, prev;
+ desc = ce->lrc.desc;
+ if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
+ desc |= lrc_desc_priority(rq_prio(rq));
+
/*
* WaIdleLiteRestore:bdw,skl
*
@@ -2908,6 +2912,9 @@ static int execlists_resume(struct intel_engine_cs *engine)
enable_execlists(engine);
+ if (engine->class == RENDER_CLASS)
+ xehp_enable_ccs_engines(engine);
+
return 0;
}
@@ -3481,7 +3488,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
- if (engine->class == RENDER_CLASS)
+ if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f8253012d166..d112ffd56418 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -228,11 +228,14 @@
#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */
#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */
#define PIPE_CONTROL_FLUSH_L3 (1<<27)
+#define PIPE_CONTROL_AMFS_FLUSH (1<<25) /* gen12+ */
#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
#define PIPE_CONTROL_MMIO_WRITE (1<<23)
#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21)
#define PIPE_CONTROL_CS_STALL (1<<20)
+#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19)
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
+#define PIPE_CONTROL_PSD_SYNC (1<<17) /* gen11+ */
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
#define PIPE_CONTROL_QW_WRITE (1<<14)
@@ -254,6 +257,18 @@
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+/* 3D-related flags can't be set on compute engine */
+#define PIPE_CONTROL_3D_FLAGS (\
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+ PIPE_CONTROL_TILE_CACHE_FLUSH | \
+ PIPE_CONTROL_DEPTH_STALL | \
+ PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+ PIPE_CONTROL_PSD_SYNC | \
+ PIPE_CONTROL_AMFS_FLUSH | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+ PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
/* Opcodes for MI_MATH_INSTR */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index e8403fa53909..8a2483ccbfb9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -65,8 +65,6 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
int err;
mem = intel_gt_setup_lmem(gt);
- if (mem == ERR_PTR(-ENODEV))
- mem = intel_gt_setup_fake_lmem(gt);
if (IS_ERR(mem)) {
err = PTR_ERR(mem);
if (err == -ENODEV)
@@ -458,7 +456,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
struct i915_vma *vma;
int ret;
- obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+ obj = i915_gem_object_create_lmem(i915, size,
+ I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_GPU_ONLY);
if (IS_ERR(obj))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
@@ -913,6 +913,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
return intel_uncore_read_fw(gt->uncore, reg);
}
+u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
+{
+ int type;
+ u8 sliceid, subsliceid;
+
+ for (type = 0; type < NUM_STEERING_TYPES; type++) {
+ if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
+ intel_gt_get_valid_steering(gt, type, &sliceid,
+ &subsliceid);
+ return intel_uncore_read_with_mcr_steering(gt->uncore,
+ reg,
+ sliceid,
+ subsliceid);
+ }
+ }
+
+ return intel_uncore_read(gt->uncore, reg);
+}
+
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 2dad46c3eff2..0f571c8ee22b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -85,6 +85,7 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt,
}
u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 983264e10e0a..e443ac4c8059 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -100,7 +100,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity)
if (unlikely(!intr))
return;
- if (class <= COPY_ENGINE_CLASS)
+ if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)
return gen11_engine_irq_handler(gt, class, instance, intr);
if (class == OTHER_CLASS)
@@ -182,6 +182,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
/* Disable RCS, BCS, VCS and VECS class engines. */
intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0);
+ if (CCS_MASK(gt))
+ intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0);
@@ -195,6 +197,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0);
if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0);
+ if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1))
+ intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
+ if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
+ intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
@@ -225,6 +231,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
/* Enable RCS, BCS, VCS and VECS class interrupts. */
intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask);
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask);
+ if (CCS_MASK(gt))
+ intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
@@ -238,6 +246,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask);
if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
+ if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1))
+ intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask);
+ if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
+ intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask);
+
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
* is enabled/disabled.
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 1545867c6da1..201b507c9dde 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -465,6 +465,9 @@
#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12)
#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14)
+#define VF_PREEMPTION _MMIO(0x83a4)
+#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
+
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
#define GEN12_SQCM _MMIO(0x8724)
@@ -1057,6 +1060,7 @@
#define FLOW_CONTROL_ENABLE REG_BIT(15)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
+#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10)
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
#define STALL_DOP_GATING_DISABLE REG_BIT(5)
#define THROTTLE_12_5 REG_GENMASK(4, 2)
@@ -1324,6 +1328,9 @@
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_RCU_MODE _MMIO(0x14800)
+#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
@@ -1448,6 +1455,10 @@
#define GEN11_KCR (19)
#define GEN11_GTPM (16)
#define GEN11_BCS (15)
+#define GEN12_CCS3 (7)
+#define GEN12_CCS2 (6)
+#define GEN12_CCS1 (5)
+#define GEN12_CCS0 (4)
#define GEN11_RCS0 (0)
#define GEN11_VECS(x) (31 - (x))
#define GEN11_VCS(x) (x)
@@ -1460,6 +1471,7 @@
#define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c)
#define GEN11_CRYPTO_RSVD_INTR_ENABLE _MMIO(0x190040)
#define GEN11_GUNIT_CSME_INTR_ENABLE _MMIO(0x190044)
+#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048)
#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4))
#define GEN11_INTR_DATA_VALID (1 << 31)
@@ -1485,41 +1497,9 @@
#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec)
#define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0)
#define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4)
+#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100)
+#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104)
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
-enum {
- INTEL_ADVANCED_CONTEXT = 0,
- INTEL_LEGACY_32B_CONTEXT,
- INTEL_ADVANCED_AD_CONTEXT,
- INTEL_LEGACY_64B_CONTEXT
-};
-
-enum {
- FAULT_AND_HANG = 0,
- FAULT_AND_HALT, /* Debug only */
- FAULT_AND_STREAM,
- FAULT_AND_CONTINUE /* Unsupported */
-};
-
-#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
-#define GEN8_CTX_VALID (1 << 0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
-#define GEN8_CTX_FORCE_RESTORE (1 << 2)
-#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
-#define GEN8_CTX_PRIVILEGE (1 << 8)
-#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
-#define GEN8_CTX_ID_SHIFT 32
-#define GEN8_CTX_ID_WIDTH 21
-#define GEN11_SW_CTX_ID_SHIFT 37
-#define GEN11_SW_CTX_ID_WIDTH 11
-#define GEN11_ENGINE_CLASS_SHIFT 61
-#define GEN11_ENGINE_CLASS_WIDTH 3
-#define GEN11_ENGINE_INSTANCE_SHIFT 48
-#define GEN11_ENGINE_INSTANCE_WIDTH 6
-#define XEHP_SW_CTX_ID_SHIFT 39
-#define XEHP_SW_CTX_ID_WIDTH 16
-#define XEHP_SW_COUNTER_SHIFT 58
-#define XEHP_SW_COUNTER_WIDTH 6
-
#endif /* __INTEL_GT_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2f9b6694ebdd..cc53b481a1c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -119,14 +119,19 @@ void __i915_vm_close(struct i915_address_space *vm)
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
- /* Keep the obj (and hence the vma) alive as _we_ destroy it */
- if (!kref_get_unless_zero(&obj->base.refcount))
+ if (!kref_get_unless_zero(&obj->base.refcount)) {
+ /*
+ * Unbind the dying vma to ensure the bound_list
+ * is completely drained. We leave the destruction to
+ * the object destructor.
+ */
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ WARN_ON(__i915_vma_unbind(vma));
continue;
+ }
- atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
- WARN_ON(__i915_vma_unbind(vma));
- __i915_vma_put(vma);
-
+ /* Keep the obj (and hence the vma) alive as _we_ destroy it */
+ i915_vma_destroy_locked(vma);
i915_gem_object_put(obj);
}
GEM_BUG_ON(!list_empty(&vm->bound_list));
@@ -237,6 +242,19 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
GEM_BUG_ON(!vm->total);
drm_mm_init(&vm->mm, 0, vm->total);
+
+ memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
+ ARRAY_SIZE(vm->min_alignment));
+
+ if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
+ subclass == VM_CLASS_PPGTT) {
+ vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
+ vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
+ } else if (HAS_64K_PAGES(vm->i915)) {
+ vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
+ vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
+ }
+
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
INIT_LIST_HEAD(&vm->bound_list);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b12ecee49de0..ba7025388a5e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -29,6 +29,8 @@
#include "i915_selftest.h"
#include "i915_vma_resource.h"
#include "i915_vma_types.h"
+#include "i915_params.h"
+#include "intel_memory_region.h"
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -90,6 +92,8 @@ typedef u64 gen8_pte_t;
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
+#define GEN12_PDE_64K BIT(6)
+
/*
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
* 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
@@ -158,6 +162,7 @@ struct i915_page_table {
atomic_t used;
struct i915_page_table *stash;
};
+ bool is_compact;
};
struct i915_page_directory {
@@ -195,6 +200,14 @@ void *__px_vaddr(struct drm_i915_gem_object *p);
struct i915_vm_pt_stash {
/* preallocated chains of page tables/directories */
struct i915_page_table *pt[2];
+ /*
+ * Optionally override the alignment/size of the physical page that
+ * contains each PT. If not set defaults back to the usual
+ * I915_GTT_PAGE_SIZE_4K. This does not influence the other paging
+ * structures. MUST be a power-of-two. ONLY applicable on discrete
+ * platforms.
+ */
+ int pt_sz;
};
struct i915_vma_ops {
@@ -223,6 +236,7 @@ struct i915_address_space {
struct device *dma;
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved; /* size addr space reserved */
+ u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1];
unsigned int bind_async_flags;
@@ -386,6 +400,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm)
return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
}
+static inline u64 i915_vm_min_alignment(struct i915_address_space *vm,
+ enum intel_memory_type type)
+{
+ /* avoid INTEL_MEMORY_MOCK overflow */
+ if ((int)type >= ARRAY_SIZE(vm->min_alignment))
+ type = INTEL_MEMORY_SYSTEM;
+
+ return vm->min_alignment[type];
+}
+
+static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm,
+ struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+ enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM;
+
+ return i915_vm_min_alignment(vm, type);
+}
+
static inline bool
i915_vm_has_cache_coloring(struct i915_address_space *vm)
{
@@ -572,7 +605,7 @@ void free_scratch(struct i915_address_space *vm);
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz);
-struct i915_page_table *alloc_pt(struct i915_address_space *vm);
+struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz);
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
struct i915_page_directory *__alloc_pd(int npde);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 004e1216e654..07bef7128fdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -623,7 +623,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
!intel_engine_has_relative_mmio(engine));
- if (engine->class == RENDER_CLASS) {
+ if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
@@ -1217,6 +1217,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_timestamp_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
+ /* Wa_16013000631:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_G11(ce->engine->i915))
+ if (ce->engine->class == COMPUTE_CLASS)
+ cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
+ 0);
+
return cs;
}
@@ -1619,7 +1627,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
unsigned int i;
int err;
- if (engine->class != RENDER_CLASS)
+ if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
return;
switch (GRAPHICS_VER(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 0b76f096b559..6e4f9f58fca5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -6,6 +6,9 @@
#ifndef __INTEL_LRC_H__
#define __INTEL_LRC_H__
+#include "i915_priolist_types.h"
+
+#include <linux/bitfield.h>
#include <linux/types.h>
struct drm_i915_gem_object;
@@ -69,4 +72,52 @@ void lrc_check_regs(const struct intel_context *ce,
void lrc_update_runtime(struct intel_context *ce);
+enum {
+ INTEL_ADVANCED_CONTEXT = 0,
+ INTEL_LEGACY_32B_CONTEXT,
+ INTEL_ADVANCED_AD_CONTEXT,
+ INTEL_LEGACY_64B_CONTEXT
+};
+
+enum {
+ FAULT_AND_HANG = 0,
+ FAULT_AND_HALT, /* Debug only */
+ FAULT_AND_STREAM,
+ FAULT_AND_CONTINUE /* Unsupported */
+};
+
+#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
+#define GEN8_CTX_VALID (1 << 0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
+#define GEN8_CTX_FORCE_RESTORE (1 << 2)
+#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
+#define GEN8_CTX_PRIVILEGE (1 << 8)
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
+#define GEN12_CTX_PRIORITY_MASK GENMASK(10, 9)
+#define GEN12_CTX_PRIORITY_HIGH FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 2)
+#define GEN12_CTX_PRIORITY_NORMAL FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 1)
+#define GEN12_CTX_PRIORITY_LOW FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 0)
+#define GEN8_CTX_ID_SHIFT 32
+#define GEN8_CTX_ID_WIDTH 21
+#define GEN11_SW_CTX_ID_SHIFT 37
+#define GEN11_SW_CTX_ID_WIDTH 11
+#define GEN11_ENGINE_CLASS_SHIFT 61
+#define GEN11_ENGINE_CLASS_WIDTH 3
+#define GEN11_ENGINE_INSTANCE_SHIFT 48
+#define GEN11_ENGINE_INSTANCE_WIDTH 6
+#define XEHP_SW_CTX_ID_SHIFT 39
+#define XEHP_SW_CTX_ID_WIDTH 16
+#define XEHP_SW_COUNTER_SHIFT 58
+#define XEHP_SW_COUNTER_WIDTH 6
+
+static inline u32 lrc_desc_priority(int prio)
+{
+ if (prio > I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_HIGH;
+ else if (prio < I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_LOW;
+ else
+ return GEN12_CTX_PRIORITY_NORMAL;
+}
+
#endif /* __INTEL_LRC_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 18b44af56969..20444d6ceb3c 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -32,6 +32,38 @@ static bool engine_supports_migration(struct intel_engine_cs *engine)
return true;
}
+static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
+{
+ struct insert_pte_data *d = data;
+
+ /*
+ * Insert a dummy PTE into every PT that will map to LMEM to ensure
+ * we have a correctly setup PDE structure for later use.
+ */
+ vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM);
+ GEM_BUG_ON(!pt->is_compact);
+ d->offset += SZ_2M;
+}
+
+static void xehpsdv_insert_pte(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
+{
+ struct insert_pte_data *d = data;
+
+ /*
+ * We are playing tricks here, since the actual pt, from the hw
+ * pov, is only 256bytes with 32 entries, or 4096bytes with 512
+ * entries, but we are still guaranteed that the physical
+ * alignment is 64K underneath for the pt, and we are careful
+ * not to access the space in the void.
+ */
+ vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM);
+ d->offset += SZ_64K;
+}
+
static void insert_pte(struct i915_address_space *vm,
struct i915_page_table *pt,
void *data)
@@ -74,7 +106,32 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* i.e. within the same non-preemptible window so that we do not switch
* to another migration context that overwrites the PTE.
*
- * TODO: Add support for huge LMEM PTEs
+ * This changes quite a bit on platforms with HAS_64K_PAGES support,
+ * where we instead have three windows, each CHUNK_SIZE in size. The
+ * first is reserved for mapping system-memory, and that just uses the
+ * 512 entry layout using 4K GTT pages. The other two windows just map
+ * lmem pages and must use the new compact 32 entry layout using 64K GTT
+ * pages, which ensures we can address any lmem object that the user
+ * throws at us. We then also use the xehpsdv_toggle_pdes as a way of
+ * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the
+ * compact layout for each of these page-tables, that fall within the
+ * [CHUNK_SIZE, 3 * CHUNK_SIZE) range.
+ *
+ * We lay the ppGTT out as:
+ *
+ * [0, CHUNK_SZ) -> first window/object, maps smem
+ * [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src
+ * [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst
+ *
+ * For the PTE window it's also quite different, since each PTE must
+ * point to some 64K page, one for each PT(since it's in lmem), and yet
+ * each is only <= 4096bytes, but since the unused space within that PTE
+ * range is never touched, this should be fine.
+ *
+ * So basically each PT now needs 64K of virtual memory, instead of 4K,
+ * which looks like:
+ *
+ * [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE
*/
vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
@@ -86,6 +143,9 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
goto err_vm;
}
+ if (HAS_64K_PAGES(gt->i915))
+ stash.pt_sz = I915_GTT_PAGE_SIZE_64K;
+
/*
* Each engine instance is assigned its own chunk in the VM, so
* that we can run multiple instances concurrently
@@ -105,14 +165,20 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* We copy in 8MiB chunks. Each PDE covers 2MiB, so we need
* 4x2 page directories for source/destination.
*/
- sz = 2 * CHUNK_SZ;
+ if (HAS_64K_PAGES(gt->i915))
+ sz = 3 * CHUNK_SZ;
+ else
+ sz = 2 * CHUNK_SZ;
d.offset = base + sz;
/*
* We need another page directory setup so that we can write
* the 8x512 PTE in each chunk.
*/
- sz += (sz >> 12) * sizeof(u64);
+ if (HAS_64K_PAGES(gt->i915))
+ sz += (sz / SZ_2M) * SZ_64K;
+ else
+ sz += (sz >> 12) * sizeof(u64);
err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz);
if (err)
@@ -133,7 +199,18 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
goto err_vm;
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
- vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d);
+ if (HAS_64K_PAGES(gt->i915)) {
+ vm->vm.foreach(&vm->vm, base, d.offset - base,
+ xehpsdv_insert_pte, &d);
+ d.offset = base + CHUNK_SZ;
+ vm->vm.foreach(&vm->vm,
+ d.offset,
+ 2 * CHUNK_SZ,
+ xehpsdv_toggle_pdes, &d);
+ } else {
+ vm->vm.foreach(&vm->vm, base, d.offset - base,
+ insert_pte, &d);
+ }
}
return &vm->vm;
@@ -269,19 +346,38 @@ static int emit_pte(struct i915_request *rq,
u64 offset,
int length)
{
+ bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
const u64 encode = rq->context->vm->pte_encode(0, cache_level,
is_lmem ? PTE_LM : 0);
struct intel_ring *ring = rq->ring;
- int total = 0;
+ int pkt, dword_length;
+ u32 total = 0;
+ u32 page_size;
u32 *hdr, *cs;
- int pkt;
GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
+ page_size = I915_GTT_PAGE_SIZE;
+ dword_length = 0x400;
+
/* Compute the page directory offset for the target address range */
- offset >>= 12;
- offset *= sizeof(u64);
- offset += 2 * CHUNK_SZ;
+ if (has_64K_pages) {
+ GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M));
+
+ offset /= SZ_2M;
+ offset *= SZ_64K;
+ offset += 3 * CHUNK_SZ;
+
+ if (is_lmem) {
+ page_size = I915_GTT_PAGE_SIZE_64K;
+ dword_length = 0x40;
+ }
+ } else {
+ offset >>= 12;
+ offset *= sizeof(u64);
+ offset += 2 * CHUNK_SZ;
+ }
+
offset += (u64)rq->engine->instance << 32;
cs = intel_ring_begin(rq, 6);
@@ -289,7 +385,7 @@ static int emit_pte(struct i915_request *rq,
return PTR_ERR(cs);
/* Pack as many PTE updates as possible into a single MI command */
- pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5);
+ pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5);
pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
hdr = cs;
@@ -299,6 +395,8 @@ static int emit_pte(struct i915_request *rq,
do {
if (cs - hdr >= pkt) {
+ int dword_rem;
+
*hdr += cs - hdr - 2;
*cs++ = MI_NOOP;
@@ -310,7 +408,18 @@ static int emit_pte(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5);
+ dword_rem = dword_length;
+ if (has_64K_pages) {
+ if (IS_ALIGNED(total, SZ_2M)) {
+ offset = round_up(offset, SZ_64K);
+ } else {
+ dword_rem = SZ_2M - (total & (SZ_2M - 1));
+ dword_rem /= page_size;
+ dword_rem *= 2;
+ }
+ }
+
+ pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5);
pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
hdr = cs;
@@ -319,13 +428,15 @@ static int emit_pte(struct i915_request *rq,
*cs++ = upper_32_bits(offset);
}
+ GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size));
+
*cs++ = lower_32_bits(encode | it->dma);
*cs++ = upper_32_bits(encode | it->dma);
offset += 8;
- total += I915_GTT_PAGE_SIZE;
+ total += page_size;
- it->dma += I915_GTT_PAGE_SIZE;
+ it->dma += page_size;
if (it->dma >= it->max) {
it->sg = __sg_next(it->sg);
if (!it->sg || sg_dma_len(it->sg) == 0)
@@ -356,7 +467,8 @@ static bool wa_1209644611_applies(int ver, u32 size)
return height % 4 == 3 && height <= 8;
}
-static int emit_copy(struct i915_request *rq, int size)
+static int emit_copy(struct i915_request *rq,
+ u32 dst_offset, u32 src_offset, int size)
{
const int ver = GRAPHICS_VER(rq->engine->i915);
u32 instance = rq->engine->instance;
@@ -371,31 +483,31 @@ static int emit_copy(struct i915_request *rq, int size)
*cs++ = BLT_DEPTH_32 | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = CHUNK_SZ; /* dst offset */
+ *cs++ = dst_offset;
*cs++ = instance;
*cs++ = 0;
*cs++ = PAGE_SIZE;
- *cs++ = 0; /* src offset */
+ *cs++ = src_offset;
*cs++ = instance;
} else if (ver >= 8) {
*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = CHUNK_SZ; /* dst offset */
+ *cs++ = dst_offset;
*cs++ = instance;
*cs++ = 0;
*cs++ = PAGE_SIZE;
- *cs++ = 0; /* src offset */
+ *cs++ = src_offset;
*cs++ = instance;
} else {
GEM_BUG_ON(instance);
*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
- *cs++ = CHUNK_SZ; /* dst offset */
+ *cs++ = dst_offset;
*cs++ = PAGE_SIZE;
- *cs++ = 0; /* src offset */
+ *cs++ = src_offset;
}
intel_ring_advance(rq, cs);
@@ -423,6 +535,7 @@ intel_context_migrate_copy(struct intel_context *ce,
GEM_BUG_ON(ce->ring->size < SZ_64K);
do {
+ u32 src_offset, dst_offset;
int len;
rq = i915_request_create(ce);
@@ -450,15 +563,28 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0,
- CHUNK_SZ);
+ src_offset = 0;
+ dst_offset = CHUNK_SZ;
+ if (HAS_64K_PAGES(ce->engine->i915)) {
+ GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
+
+ src_offset = 0;
+ dst_offset = 0;
+ if (src_is_lmem)
+ src_offset = CHUNK_SZ;
+ if (dst_is_lmem)
+ dst_offset = 2 * CHUNK_SZ;
+ }
+
+ len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
+ src_offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
}
err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
- CHUNK_SZ, len);
+ dst_offset, len);
if (err < 0)
goto out_rq;
if (err < len) {
@@ -470,7 +596,7 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_copy(rq, len);
+ err = emit_copy(rq, dst_offset, src_offset, len);
/* Arbitration is re-enabled between requests. */
out_rq:
@@ -488,14 +614,15 @@ out_ce:
return err;
}
-static int emit_clear(struct i915_request *rq, int size, u32 value)
+static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value)
{
const int ver = GRAPHICS_VER(rq->engine->i915);
- u32 instance = rq->engine->instance;
u32 *cs;
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+ offset += (u64)rq->engine->instance << 32;
+
cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -505,17 +632,17 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = 0; /* offset */
- *cs++ = instance;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
*cs++ = value;
*cs++ = MI_NOOP;
} else {
- GEM_BUG_ON(instance);
+ GEM_BUG_ON(upper_32_bits(offset));
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = 0;
+ *cs++ = lower_32_bits(offset);
*cs++ = value;
}
@@ -542,6 +669,7 @@ intel_context_migrate_clear(struct intel_context *ce,
GEM_BUG_ON(ce->ring->size < SZ_64K);
do {
+ u32 offset;
int len;
rq = i915_request_create(ce);
@@ -569,7 +697,11 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ);
+ offset = 0;
+ if (HAS_64K_PAGES(ce->engine->i915) && is_lmem)
+ offset = CHUNK_SZ;
+
+ len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
@@ -579,7 +711,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_clear(rq, len, value);
+ err = emit_clear(rq, offset, len, value);
/* Arbitration is re-enabled between requests. */
out_rq:
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 48e6e2f87700..d91e2beb7517 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -12,7 +12,7 @@
#include "gen6_ppgtt.h"
#include "gen8_ppgtt.h"
-struct i915_page_table *alloc_pt(struct i915_address_space *vm)
+struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz)
{
struct i915_page_table *pt;
@@ -20,12 +20,13 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
- pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ pt->base = vm->alloc_pt_dma(vm, sz);
if (IS_ERR(pt->base)) {
kfree(pt);
return ERR_PTR(-ENOMEM);
}
+ pt->is_compact = false;
atomic_set(&pt->used, 0);
return pt;
}
@@ -220,17 +221,25 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
u64 size)
{
unsigned long count;
- int shift, n;
+ int shift, n, pt_sz;
shift = vm->pd_shift;
if (!shift)
return 0;
+ pt_sz = stash->pt_sz;
+ if (!pt_sz)
+ pt_sz = I915_GTT_PAGE_SIZE_4K;
+ else
+ GEM_BUG_ON(!IS_DGFX(vm->i915));
+
+ GEM_BUG_ON(!is_power_of_2(pt_sz));
+
count = pd_count(size, shift);
while (count--) {
struct i915_page_table *pt;
- pt = alloc_pt(vm);
+ pt = alloc_pt(vm, pt_sz);
if (IS_ERR(pt)) {
i915_vm_free_pt_stash(vm, stash);
return PTR_ERR(pt);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index a04e0cf4a94b..6cecfdae07ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -14,60 +14,6 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_regs.h"
-static int init_fake_lmem_bar(struct intel_memory_region *mem)
-{
- struct drm_i915_private *i915 = mem->i915;
- struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
- unsigned long n;
- int ret;
-
- /* We want to 1:1 map the mappable aperture to our reserved region */
-
- mem->fake_mappable.start = 0;
- mem->fake_mappable.size = resource_size(&mem->region);
- mem->fake_mappable.color = I915_COLOR_UNEVICTABLE;
-
- ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable);
- if (ret)
- return ret;
-
- mem->remap_addr = dma_map_resource(i915->drm.dev,
- mem->region.start,
- mem->fake_mappable.size,
- DMA_BIDIRECTIONAL,
- DMA_ATTR_FORCE_CONTIGUOUS);
- if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) {
- drm_mm_remove_node(&mem->fake_mappable);
- return -EINVAL;
- }
-
- for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) {
- ggtt->vm.insert_page(&ggtt->vm,
- mem->remap_addr + (n << PAGE_SHIFT),
- n << PAGE_SHIFT,
- I915_CACHE_NONE, 0);
- }
-
- mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr,
- mem->fake_mappable.size);
-
- return 0;
-}
-
-static void release_fake_lmem_bar(struct intel_memory_region *mem)
-{
- if (!drm_mm_node_allocated(&mem->fake_mappable))
- return;
-
- drm_mm_remove_node(&mem->fake_mappable);
-
- dma_unmap_resource(mem->i915->drm.dev,
- mem->remap_addr,
- mem->fake_mappable.size,
- DMA_BIDIRECTIONAL,
- DMA_ATTR_FORCE_CONTIGUOUS);
-}
-
static int
region_lmem_release(struct intel_memory_region *mem)
{
@@ -75,7 +21,6 @@ region_lmem_release(struct intel_memory_region *mem)
ret = intel_region_ttm_fini(mem);
io_mapping_fini(&mem->iomap);
- release_fake_lmem_bar(mem);
return ret;
}
@@ -85,17 +30,10 @@ region_lmem_init(struct intel_memory_region *mem)
{
int ret;
- if (mem->i915->params.fake_lmem_start) {
- ret = init_fake_lmem_bar(mem);
- GEM_BUG_ON(ret);
- }
-
if (!io_mapping_init_wc(&mem->iomap,
mem->io_start,
- resource_size(&mem->region))) {
- ret = -EIO;
- goto out_no_io;
- }
+ mem->io_size))
+ return -EIO;
ret = intel_region_ttm_init(mem);
if (ret)
@@ -105,8 +43,6 @@ region_lmem_init(struct intel_memory_region *mem)
out_no_buddy:
io_mapping_fini(&mem->iomap);
-out_no_io:
- release_fake_lmem_bar(mem);
return ret;
}
@@ -117,50 +53,6 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = {
.init_object = __i915_gem_ttm_object_init,
};
-struct intel_memory_region *
-intel_gt_setup_fake_lmem(struct intel_gt *gt)
-{
- struct drm_i915_private *i915 = gt->i915;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- struct intel_memory_region *mem;
- resource_size_t mappable_end;
- resource_size_t io_start;
- resource_size_t start;
-
- if (!HAS_LMEM(i915))
- return ERR_PTR(-ENODEV);
-
- if (!i915->params.fake_lmem_start)
- return ERR_PTR(-ENODEV);
-
- GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt));
-
- /* Your mappable aperture belongs to me now! */
- mappable_end = pci_resource_len(pdev, 2);
- io_start = pci_resource_start(pdev, 2);
- start = i915->params.fake_lmem_start;
-
- mem = intel_memory_region_create(i915,
- start,
- mappable_end,
- PAGE_SIZE,
- io_start,
- INTEL_MEMORY_LOCAL,
- 0,
- &intel_region_lmem_ops);
- if (!IS_ERR(mem)) {
- drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n",
- &mem->region);
- drm_info(&i915->drm,
- "Intel graphics fake LMEM IO start: %llx\n",
- (u64)mem->io_start);
- drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n",
- (u64)resource_size(&mem->region));
- }
-
- return mem;
-}
-
static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
u64 *start, u32 *size)
{
@@ -207,8 +99,29 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
if (!IS_DGFX(i915))
return ERR_PTR(-ENODEV);
- /* Stolen starts from GSMBASE on DG1 */
- lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
+ if (HAS_FLAT_CCS(i915)) {
+ u64 tile_stolen, flat_ccs_base;
+
+ lmem_size = pci_resource_len(pdev, 2);
+ flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR);
+ flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K;
+
+ if (GEM_WARN_ON(lmem_size < flat_ccs_base))
+ return ERR_PTR(-ENODEV);
+
+ tile_stolen = lmem_size - flat_ccs_base;
+
+ /* If the FLAT_CCS_BASE_ADDR register is not populated, flag an error */
+ if (tile_stolen == lmem_size)
+ drm_err(&i915->drm,
+ "CCS_BASE_ADDR register did not have expected value\n");
+
+ lmem_size -= tile_stolen;
+ } else {
+ /* Stolen starts from GSMBASE without CCS */
+ lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
+ }
+
io_start = pci_resource_start(pdev, 2);
if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
@@ -221,6 +134,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size,
min_page_size,
io_start,
+ lmem_size,
INTEL_MEMORY_LOCAL,
0,
&intel_region_lmem_ops);
@@ -234,6 +148,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
&mem->io_start);
+ drm_info(&i915->drm, "Local memory IO size: %pa\n",
+ &mem->io_size);
drm_info(&i915->drm, "Local memory available: %pa\n",
&lmem_size);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.h b/drivers/gpu/drm/i915/gt/intel_region_lmem.h
index 062d0542ae34..1438576b527a 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.h
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.h
@@ -10,7 +10,4 @@ struct intel_gt;
struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt);
-struct intel_memory_region *
-intel_gt_setup_fake_lmem(struct intel_gt *gt);
-
#endif /* !__INTEL_REGION_LMEM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index f691b7614437..a9c13b1a3018 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1491,7 +1491,7 @@ void intel_rps_enable(struct intel_rps *rps)
if (has_busy_stats(rps))
intel_rps_set_timer(rps);
- else if (GRAPHICS_VER(i915) >= 6)
+ else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
intel_rps_set_interrupts(rps);
else
/* Ironlake currently uses intel_ips.ko */ {}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 4ae8356dcc2a..614915ffbd37 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -34,7 +34,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
return total;
}
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+static u32
+_intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
+ const u8 *subslice_mask, u8 slice)
{
int i, offset = slice * sseu->ss_stride;
u32 mask = 0;
@@ -42,12 +44,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
GEM_BUG_ON(slice >= sseu->max_slices);
for (i = 0; i < sseu->ss_stride; i++)
- mask |= (u32)sseu->subslice_mask[offset + i] <<
- i * BITS_PER_BYTE;
+ mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
return mask;
}
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+ return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+}
+
+u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
+{
+ return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+}
+
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
u8 *subslice_mask, u32 ss_mask)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 60882a74741e..8a79cd8eaab4 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
u8 *subslice_mask, u32 ss_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 26038066e90b..c014b40d2e9f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -684,11 +684,10 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+ /* Wa_14014947963:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
+ IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
}
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@ -1344,12 +1343,6 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1409757795:xehpsdv */
wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
- /* Wa_18011725039:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
- wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
- wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
- }
-
/* Wa_16011155590:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1386,19 +1379,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GAMTLBVEBOX0_CLKGATE_DIS);
}
- /* Wa_14012362059:xehpsdv */
- wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
-
/* Wa_16012725990:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
/* Wa_14011060649:xehpsdv */
wa_14011060649(gt, wal);
-
- /* Wa_14014368820:xehpsdv */
- wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
- GLOBAL_INVALIDATION_MODE);
}
static void
@@ -1440,10 +1426,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010680813:dg2_g10 */
- wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
-
/* Wa_14010948348:dg2_g10 */
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
@@ -1490,16 +1472,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
}
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- /* Wa_1509235366:dg2 */
- wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
- GLOBAL_INVALIDATION_MODE);
-
/* Wa_14014830051:dg2 */
wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
@@ -1508,14 +1480,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
* recommended tuning settings documented in the bspec's
* performance guide section.
*/
- wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
-
- /* Wa_18018781329:dg2 */
- wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
}
static void
@@ -1943,6 +1908,11 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
RING_FORCE_TO_NONPRIV_RANGE_4);
break;
+ case COMPUTE_CLASS:
+ /* Wa_16011157294:dg2_g10 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+ whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+ break;
default:
break;
}
@@ -2049,6 +2019,23 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_DG2(i915)) {
/* Wa_14015227452:dg2 */
wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+ /* Wa_1509235366:dg2 */
+ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+ GLOBAL_INVALIDATION_MODE);
+
+ /*
+ * The following are not actually "workarounds" but rather
+ * recommended tuning settings documented in the bspec's
+ * performance guide section.
+ */
+ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+
+ /* Wa_18018781329:dg2 */
+ wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
}
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2149,6 +2136,24 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+ /* Wa_22012532006:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+ wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ /* Wa_14010680813:dg2_g10 */
+ wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
+ EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ /* Wa_14012362059:dg2 */
+ wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+ }
+
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
/*
@@ -2568,6 +2573,53 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
}
+/*
+ * The workarounds in this function apply to shared registers in
+ * the general render reset domain that aren't tied to a
+ * specific engine. Since all render+compute engines get reset
+ * together, and the contents of these registers are lost during
+ * the shared render domain reset, we'll define such workarounds
+ * here and then add them to just a single RCS or CCS engine's
+ * workaround list (whichever engine has the XXXX flag).
+ */
+static void
+general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ if (IS_XEHPSDV(i915)) {
+ /* Wa_1409954639 */
+ wa_masked_en(wal,
+ GEN8_ROW_CHICKEN,
+ SYSTOLIC_DOP_CLOCK_GATING_DIS);
+
+ /* Wa_1607196519 */
+ wa_masked_en(wal,
+ GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_GRF_CLEAR);
+
+ /* Wa_14010670810:xehpsdv */
+ wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+
+ /* Wa_14010449647:xehpsdv */
+ wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* Wa_18011725039:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
+ wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+ wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+ }
+
+ /* Wa_14012362059:xehpsdv */
+ wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+
+ /* Wa_14014368820:xehpsdv */
+ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+ GLOBAL_INVALIDATION_MODE);
+ }
+}
+
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
@@ -2576,6 +2628,14 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
engine_fake_wa_init(engine, wal);
+ /*
+ * These are common workarounds that just need to applied
+ * to a single RCS/CCS engine's workaround list since
+ * they're reset as part of the general render domain reset.
+ */
+ if (engine->class == RENDER_CLASS)
+ general_render_compute_wa_init(engine, wal);
+
if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 0683b27a3890..402f085f3a02 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -3,6 +3,7 @@
* Copyright © 2020 Intel Corporation
*/
+#include <linux/iosys-map.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/shmem_fs.h>
@@ -123,6 +124,37 @@ static int __shmem_rw(struct file *file, loff_t off,
return 0;
}
+int shmem_read_to_iosys_map(struct file *file, loff_t off,
+ struct iosys_map *map, size_t map_off, size_t len)
+{
+ unsigned long pfn;
+
+ for (pfn = off >> PAGE_SHIFT; len; pfn++) {
+ unsigned int this =
+ min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
+ struct page *page;
+ void *vaddr;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ vaddr = kmap(page);
+ iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off),
+ this);
+ mark_page_accessed(page);
+ kunmap(page);
+ put_page(page);
+
+ len -= this;
+ map_off += this;
+ off = 0;
+ }
+
+ return 0;
+}
+
int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
{
return __shmem_rw(file, off, dst, len, false);
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
index c1669170c351..b2b04d88c6e5 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.h
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
+struct iosys_map;
struct drm_i915_gem_object;
struct file;
@@ -17,6 +18,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
void *shmem_pin_map(struct file *file);
void shmem_unpin_map(struct file *file, void *ptr);
+int shmem_read_to_iosys_map(struct file *file, loff_t off,
+ struct iosys_map *map, size_t map_off, size_t len);
int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
int shmem_write(struct file *file, loff_t off, void *src, size_t len);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 9d779de16613..bf7079480d47 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -6,8 +6,9 @@
#ifndef _INTEL_GUC_H_
#define _INTEL_GUC_H_
-#include <linux/xarray.h>
#include <linux/delay.h>
+#include <linux/iosys-map.h>
+#include <linux/xarray.h>
#include "intel_uncore.h"
#include "intel_guc_fw.h"
@@ -146,8 +147,8 @@ struct intel_guc {
/** @ads_vma: object allocated to hold the GuC ADS */
struct i915_vma *ads_vma;
- /** @ads_blob: contents of the GuC ADS */
- struct __guc_ads_blob *ads_blob;
+ /** @ads_map: contents of the GuC ADS */
+ struct iosys_map ads_map;
/** @ads_regset_size: size of the save/restore regsets in the ADS */
u32 ads_regset_size;
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 7e41175618f5..92cb88248391 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -57,9 +57,22 @@ struct __guc_ads_blob {
struct guc_gt_system_info system_info;
struct guc_engine_usage engine_usage;
/* From here on, location is dynamic! Refer to above diagram. */
- struct guc_mmio_reg regset[0];
+ struct guc_mmio_reg regset[];
} __packed;
+#define ads_blob_read(guc_, field_) \
+ iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_)
+
+#define ads_blob_write(guc_, field_, val_) \
+ iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, \
+ field_, val_)
+
+#define info_map_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(map_, field_) \
+ iosys_map_rd_field(map_, 0, struct guc_gt_system_info, field_)
+
static u32 guc_ads_regset_size(struct intel_guc *guc)
{
GEM_BUG_ON(!guc->ads_regset_size);
@@ -123,33 +136,37 @@ static u32 guc_ads_blob_size(struct intel_guc *guc)
guc_ads_private_data_size(guc);
}
-static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies)
+static void guc_policies_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
+ u32 global_flags = 0;
- policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
- policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
+ ads_blob_write(guc, policies.dpc_promote_time,
+ GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+ ads_blob_write(guc, policies.max_num_work_items,
+ GLOBAL_POLICY_MAX_NUM_WI);
- policies->global_flags = 0;
if (i915->params.reset < 2)
- policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+ global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
- policies->is_valid = 1;
+ ads_blob_write(guc, policies.global_flags, global_flags);
+ ads_blob_write(guc, policies.is_valid, 1);
}
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
struct drm_printer *dp)
{
- struct __guc_ads_blob *blob = guc->ads_blob;
-
- if (unlikely(!blob))
+ if (unlikely(iosys_map_is_null(&guc->ads_map)))
return;
drm_printf(dp, "Global scheduling policies:\n");
- drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time);
- drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items);
- drm_printf(dp, " Flags = %u\n", blob->policies.global_flags);
+ drm_printf(dp, " DPC promote time = %u\n",
+ ads_blob_read(guc, policies.dpc_promote_time));
+ drm_printf(dp, " Max num work items = %u\n",
+ ads_blob_read(guc, policies.max_num_work_items));
+ drm_printf(dp, " Flags = %u\n",
+ ads_blob_read(guc, policies.global_flags));
}
static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
@@ -164,29 +181,30 @@ static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
int intel_guc_global_policies_update(struct intel_guc *guc)
{
- struct __guc_ads_blob *blob = guc->ads_blob;
struct intel_gt *gt = guc_to_gt(guc);
+ u32 scheduler_policies;
intel_wakeref_t wakeref;
int ret;
- if (!blob)
+ if (iosys_map_is_null(&guc->ads_map))
return -EOPNOTSUPP;
- GEM_BUG_ON(!blob->ads.scheduler_policies);
+ scheduler_policies = ads_blob_read(guc, ads.scheduler_policies);
+ GEM_BUG_ON(!scheduler_policies);
- guc_policies_init(guc, &blob->policies);
+ guc_policies_init(guc);
if (!intel_guc_is_ready(guc))
return 0;
with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
- ret = guc_action_policies_update(guc, blob->ads.scheduler_policies);
+ ret = guc_action_policies_update(guc, scheduler_policies);
return ret;
}
static void guc_mapping_table_init(struct intel_gt *gt,
- struct guc_gt_system_info *system_info)
+ struct iosys_map *info_map)
{
unsigned int i, j;
struct intel_engine_cs *engine;
@@ -195,14 +213,14 @@ static void guc_mapping_table_init(struct intel_gt *gt,
/* Table must be set to invalid values for entries not used */
for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
- system_info->mapping_table[i][j] =
- GUC_MAX_INSTANCES_PER_CLASS;
+ info_map_write(info_map, mapping_table[i][j],
+ GUC_MAX_INSTANCES_PER_CLASS);
for_each_engine(engine, gt, id) {
u8 guc_class = engine_class_to_guc_class(engine->class);
- system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
- engine->instance;
+ info_map_write(info_map, mapping_table[guc_class][ilog2(engine->logical_mask)],
+ engine->instance);
}
}
@@ -317,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+ if (engine->class == RENDER_CLASS &&
+ CCS_MASK(engine->gt))
+ ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
+
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
@@ -365,64 +387,69 @@ fail_regset_init:
return ret;
}
-static void guc_mmio_reg_state_init(struct intel_guc *guc,
- struct __guc_ads_blob *blob)
+static void guc_mmio_reg_state_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
- struct guc_mmio_reg *ads_registers;
enum intel_engine_id id;
u32 addr_ggtt, offset;
offset = guc_ads_regset_offset(guc);
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
- ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
- memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size);
+ iosys_map_memcpy_to(&guc->ads_map, offset, guc->ads_regset,
+ guc->ads_regset_size);
for_each_engine(engine, gt, id) {
u32 count = guc->ads_regset_count[id];
- struct guc_mmio_reg_set *ads_reg_set;
u8 guc_class;
/* Class index is checked in class converter */
GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
guc_class = engine_class_to_guc_class(engine->class);
- ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance];
if (!count) {
- ads_reg_set->address = 0;
- ads_reg_set->count = 0;
+ ads_blob_write(guc,
+ ads.reg_state_list[guc_class][engine->instance].address,
+ 0);
+ ads_blob_write(guc,
+ ads.reg_state_list[guc_class][engine->instance].count,
+ 0);
continue;
}
- ads_reg_set->address = addr_ggtt;
- ads_reg_set->count = count;
+ ads_blob_write(guc,
+ ads.reg_state_list[guc_class][engine->instance].address,
+ addr_ggtt);
+ ads_blob_write(guc,
+ ads.reg_state_list[guc_class][engine->instance].count,
+ count);
addr_ggtt += count * sizeof(struct guc_mmio_reg);
}
}
static void fill_engine_enable_masks(struct intel_gt *gt,
- struct guc_gt_system_info *info)
+ struct iosys_map *info_map)
{
- info->engine_enabled_masks[GUC_RENDER_CLASS] = 1;
- info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1;
- info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt);
- info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt);
+ info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
+ info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
+ info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
+ info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
+ info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
}
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
-static int guc_prep_golden_context(struct intel_guc *guc,
- struct __guc_ads_blob *blob)
+static int guc_prep_golden_context(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
u32 addr_ggtt, offset;
u32 total_size = 0, alloc_size, real_size;
u8 engine_class, guc_class;
- struct guc_gt_system_info *info, local_info;
+ struct guc_gt_system_info local_info;
+ struct iosys_map info_map;
/*
* Reserve the memory for the golden contexts and point GuC at it but
@@ -436,14 +463,15 @@ static int guc_prep_golden_context(struct intel_guc *guc,
* GuC will also validate that the LRC base + size fall within the
* allowed GGTT range.
*/
- if (blob) {
+ if (!iosys_map_is_null(&guc->ads_map)) {
offset = guc_ads_golden_ctxt_offset(guc);
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
- info = &blob->system_info;
+ info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
+ offsetof(struct __guc_ads_blob, system_info));
} else {
memset(&local_info, 0, sizeof(local_info));
- info = &local_info;
- fill_engine_enable_masks(gt, info);
+ iosys_map_set_vaddr(&info_map, &local_info);
+ fill_engine_enable_masks(gt, &info_map);
}
for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
@@ -452,14 +480,14 @@ static int guc_prep_golden_context(struct intel_guc *guc,
guc_class = engine_class_to_guc_class(engine_class);
- if (!info->engine_enabled_masks[guc_class])
+ if (!info_map_read(&info_map, engine_enabled_masks[guc_class]))
continue;
real_size = intel_engine_context_size(gt, engine_class);
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
- if (!blob)
+ if (iosys_map_is_null(&guc->ads_map))
continue;
/*
@@ -473,15 +501,18 @@ static int guc_prep_golden_context(struct intel_guc *guc,
* what comes before it in the context image (which is identical
* on all engines).
*/
- blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE;
- blob->ads.golden_context_lrca[guc_class] = addr_ggtt;
+ ads_blob_write(guc, ads.eng_state_size[guc_class],
+ real_size - LRC_SKIP_SIZE);
+ ads_blob_write(guc, ads.golden_context_lrca[guc_class],
+ addr_ggtt);
+
addr_ggtt += alloc_size;
}
- if (!blob)
- return total_size;
+ /* Make sure current size matches what we calculated previously */
+ if (guc->ads_golden_ctxt_size)
+ GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
- GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
return total_size;
}
@@ -505,18 +536,16 @@ static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_
static void guc_init_golden_context(struct intel_guc *guc)
{
- struct __guc_ads_blob *blob = guc->ads_blob;
struct intel_engine_cs *engine;
struct intel_gt *gt = guc_to_gt(guc);
- u32 addr_ggtt, offset;
- u32 total_size = 0, alloc_size, real_size;
+ unsigned long offset;
+ u32 addr_ggtt, total_size = 0, alloc_size, real_size;
u8 engine_class, guc_class;
- u8 *ptr;
if (!intel_uc_uses_guc_submission(&gt->uc))
return;
- GEM_BUG_ON(!blob);
+ GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
/*
* Go back and fill in the golden context data now that it is
@@ -524,15 +553,13 @@ static void guc_init_golden_context(struct intel_guc *guc)
*/
offset = guc_ads_golden_ctxt_offset(guc);
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
- ptr = ((u8 *)blob) + offset;
for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
if (engine_class == OTHER_CLASS)
continue;
guc_class = engine_class_to_guc_class(engine_class);
-
- if (!blob->system_info.engine_enabled_masks[guc_class])
+ if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class]))
continue;
real_size = intel_engine_context_size(gt, engine_class);
@@ -543,24 +570,26 @@ static void guc_init_golden_context(struct intel_guc *guc)
if (!engine) {
drm_err(&gt->i915->drm, "No engine state recorded for class %d!\n",
engine_class);
- blob->ads.eng_state_size[guc_class] = 0;
- blob->ads.golden_context_lrca[guc_class] = 0;
+ ads_blob_write(guc, ads.eng_state_size[guc_class], 0);
+ ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0);
continue;
}
- GEM_BUG_ON(blob->ads.eng_state_size[guc_class] !=
+ GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
real_size - LRC_SKIP_SIZE);
- GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt);
+ GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
+
addr_ggtt += alloc_size;
- shmem_read(engine->default_state, 0, ptr, real_size);
- ptr += alloc_size;
+ shmem_read_to_iosys_map(engine->default_state, 0, &guc->ads_map,
+ offset, real_size);
+ offset += alloc_size;
}
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
}
-static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob)
+static void guc_capture_list_init(struct intel_guc *guc)
{
int i, j;
u32 addr_ggtt, offset;
@@ -572,11 +601,11 @@ static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
- blob->ads.capture_instance[i][j] = addr_ggtt;
- blob->ads.capture_class[i][j] = addr_ggtt;
+ ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt);
+ ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt);
}
- blob->ads.capture_global[i] = addr_ggtt;
+ ads_blob_write(guc, ads.capture_global[i], addr_ggtt);
}
}
@@ -584,47 +613,52 @@ static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
- struct __guc_ads_blob *blob = guc->ads_blob;
+ struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
+ offsetof(struct __guc_ads_blob, system_info));
u32 base;
/* GuC scheduling policies */
- guc_policies_init(guc, &blob->policies);
+ guc_policies_init(guc);
/* System info */
- fill_engine_enable_masks(gt, &blob->system_info);
+ fill_engine_enable_masks(gt, &info_map);
- blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] =
- hweight8(gt->info.sseu.slice_mask);
- blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] =
- gt->info.vdbox_sfc_access;
+ ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
+ hweight8(gt->info.sseu.slice_mask));
+ ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
+ gt->info.vdbox_sfc_access);
if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) {
u32 distdbreg = intel_uncore_read(gt->uncore,
GEN12_DIST_DBS_POPULATED);
- blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] =
- ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) &
- GEN12_DOORBELLS_PER_SQIDI) + 1;
+ ads_blob_write(guc,
+ system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+ ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
+ & GEN12_DOORBELLS_PER_SQIDI) + 1);
}
/* Golden contexts for re-initialising after a watchdog reset */
- guc_prep_golden_context(guc, blob);
+ guc_prep_golden_context(guc);
- guc_mapping_table_init(guc_to_gt(guc), &blob->system_info);
+ guc_mapping_table_init(guc_to_gt(guc), &info_map);
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
/* Capture list for hang debug */
- guc_capture_list_init(guc, blob);
+ guc_capture_list_init(guc);
/* ADS */
- blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
- blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
+ ads_blob_write(guc, ads.scheduler_policies, base +
+ offsetof(struct __guc_ads_blob, policies));
+ ads_blob_write(guc, ads.gt_system_info, base +
+ offsetof(struct __guc_ads_blob, system_info));
/* MMIO save/restore list */
- guc_mmio_reg_state_init(guc, blob);
+ guc_mmio_reg_state_init(guc);
/* Private Data */
- blob->ads.private_data = base + guc_ads_private_data_offset(guc);
+ ads_blob_write(guc, ads.private_data, base +
+ guc_ads_private_data_offset(guc));
i915_gem_object_flush_map(guc->ads_vma->obj);
}
@@ -638,6 +672,7 @@ static void __guc_ads_init(struct intel_guc *guc)
*/
int intel_guc_ads_create(struct intel_guc *guc)
{
+ void *ads_blob;
u32 size;
int ret;
@@ -653,7 +688,7 @@ int intel_guc_ads_create(struct intel_guc *guc)
guc->ads_regset_size = ret;
/* Likewise the golden contexts: */
- ret = guc_prep_golden_context(guc, NULL);
+ ret = guc_prep_golden_context(guc);
if (ret < 0)
return ret;
guc->ads_golden_ctxt_size = ret;
@@ -662,10 +697,15 @@ int intel_guc_ads_create(struct intel_guc *guc)
size = guc_ads_blob_size(guc);
ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
- (void **)&guc->ads_blob);
+ &ads_blob);
if (ret)
return ret;
+ if (i915_gem_object_is_lmem(guc->ads_vma->obj))
+ iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)ads_blob);
+ else
+ iosys_map_set_vaddr(&guc->ads_map, ads_blob);
+
__guc_ads_init(guc);
return 0;
@@ -686,7 +726,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc)
void intel_guc_ads_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
- guc->ads_blob = NULL;
+ iosys_map_clear(&guc->ads_map);
kfree(guc->ads_regset);
}
@@ -698,8 +738,8 @@ static void guc_ads_private_data_reset(struct intel_guc *guc)
if (!size)
return;
- memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0,
- size);
+ iosys_map_memset(&guc->ads_map, guc_ads_private_data_offset(guc),
+ 0, size);
}
/**
@@ -722,18 +762,16 @@ void intel_guc_ads_reset(struct intel_guc *guc)
u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
{
- struct __guc_ads_blob *blob = guc->ads_blob;
- u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
- u32 offset = base + ptr_offset(blob, engine_usage);
-
- return offset;
+ return intel_guc_ggtt_offset(guc, guc->ads_vma) +
+ offsetof(struct __guc_ads_blob, engine_usage);
}
-struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
+struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
{
struct intel_guc *guc = &engine->gt->uc.guc;
- struct __guc_ads_blob *blob = guc->ads_blob;
u8 guc_class = engine_class_to_guc_class(engine->class);
+ size_t offset = offsetof(struct __guc_ads_blob,
+ engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
- return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
+ return IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offset);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
index e74c110facff..1c64f4d6ea21 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
@@ -7,6 +7,7 @@
#define _INTEL_GUC_ADS_H_
#include <linux/types.h>
+#include <linux/iosys-map.h>
struct intel_guc;
struct drm_printer;
@@ -18,7 +19,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
void intel_guc_ads_reset(struct intel_guc *guc);
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
struct drm_printer *p);
-struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
+struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine);
u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 6a4612a852e2..4b300b6cc0f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -46,8 +46,8 @@
#define GUC_VIDEO_CLASS 1
#define GUC_VIDEOENHANCE_CLASS 2
#define GUC_BLITTER_CLASS 3
-#define GUC_RESERVED_CLASS 4
-#define GUC_LAST_ENGINE_CLASS GUC_RESERVED_CLASS
+#define GUC_COMPUTE_CLASS 4
+#define GUC_LAST_ENGINE_CLASS GUC_COMPUTE_CLASS
#define GUC_MAX_ENGINE_CLASSES 16
#define GUC_MAX_INSTANCES_PER_CLASS 32
@@ -156,23 +156,37 @@ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \
)
+/* the GuC arrays don't include OTHER_CLASS */
+static u8 engine_class_guc_class_map[] = {
+ [RENDER_CLASS] = GUC_RENDER_CLASS,
+ [COPY_ENGINE_CLASS] = GUC_BLITTER_CLASS,
+ [VIDEO_DECODE_CLASS] = GUC_VIDEO_CLASS,
+ [VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS,
+ [COMPUTE_CLASS] = GUC_COMPUTE_CLASS,
+};
+
+static u8 guc_class_engine_class_map[] = {
+ [GUC_RENDER_CLASS] = RENDER_CLASS,
+ [GUC_BLITTER_CLASS] = COPY_ENGINE_CLASS,
+ [GUC_VIDEO_CLASS] = VIDEO_DECODE_CLASS,
+ [GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS,
+ [GUC_COMPUTE_CLASS] = COMPUTE_CLASS,
+};
+
static inline u8 engine_class_to_guc_class(u8 class)
{
- BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS);
- BUILD_BUG_ON(GUC_BLITTER_CLASS != COPY_ENGINE_CLASS);
- BUILD_BUG_ON(GUC_VIDEO_CLASS != VIDEO_DECODE_CLASS);
- BUILD_BUG_ON(GUC_VIDEOENHANCE_CLASS != VIDEO_ENHANCEMENT_CLASS);
+ BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS + 1);
GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS);
- return class;
+ return engine_class_guc_class_map[class];
}
static inline u8 guc_class_to_engine_class(u8 guc_class)
{
+ BUILD_BUG_ON(ARRAY_SIZE(guc_class_engine_class_map) != GUC_LAST_ENGINE_CLASS + 1);
GEM_BUG_ON(guc_class > GUC_LAST_ENGINE_CLASS);
- GEM_BUG_ON(guc_class == GUC_RESERVED_CLASS);
- return guc_class;
+ return guc_class_engine_class_map[guc_class];
}
/* Work item for submitting workloads into work queue of GuC. */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 766744602d75..9f032c65a488 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -12,6 +12,7 @@
#include "intel_mchbar_regs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
{
@@ -116,7 +117,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
{
u32 request[] = {
GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+ SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
id,
};
@@ -580,10 +581,10 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
{
+ struct intel_rps *rps = &slpc_to_gt(slpc)->rps;
u32 rp_state_cap;
- rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore,
- GEN6_RP_STATE_CAP);
+ rp_state_cap = intel_rps_read_state_cap(rps);
slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) *
GT_FREQUENCY_MULTIPLIER;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b3a429a92c0d..1ce7e04aa837 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -174,11 +174,8 @@ static inline void init_sched_state(struct intel_context *ce)
__maybe_unused
static bool sched_state_is_init(struct intel_context *ce)
{
- /*
- * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
- * suspend.
- */
- return !(ce->guc_state.sched_state &=
+ /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
+ return !(ce->guc_state.sched_state &
~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
}
@@ -1139,6 +1136,9 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
}
+#define record_read(map_, field_) \
+ iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
+
/*
* GuC updates shared memory and KMD reads it. Since this is not synchronized,
* we run into a race where the value read is inconsistent. Sometimes the
@@ -1153,17 +1153,17 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
static void __get_engine_usage_record(struct intel_engine_cs *engine,
u32 *last_in, u32 *id, u32 *total)
{
- struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
int i = 0;
do {
- *last_in = READ_ONCE(rec->last_switch_in_stamp);
- *id = READ_ONCE(rec->current_context_index);
- *total = READ_ONCE(rec->total_runtime);
+ *last_in = record_read(&rec_map, last_switch_in_stamp);
+ *id = record_read(&rec_map, current_context_index);
+ *total = record_read(&rec_map, total_runtime);
- if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
- READ_ONCE(rec->current_context_index) == *id &&
- READ_ONCE(rec->total_runtime) == *total)
+ if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
+ record_read(&rec_map, current_context_index) == *id &&
+ record_read(&rec_map, total_runtime) == *total)
break;
} while (++i < 6);
}
@@ -1818,24 +1818,11 @@ int intel_guc_submission_init(struct intel_guc *guc)
*/
GEM_BUG_ON(!guc->lrc_desc_pool);
- xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
-
- spin_lock_init(&guc->submission_state.lock);
- INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
- ida_init(&guc->submission_state.guc_ids);
- INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
- INIT_WORK(&guc->submission_state.destroyed_worker,
- destroyed_worker_func);
- INIT_WORK(&guc->submission_state.reset_fail_worker,
- reset_fail_worker_func);
-
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock);
- INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
@@ -3608,6 +3595,9 @@ static int guc_resume(struct intel_engine_cs *engine)
setup_hwsp(engine);
start_engine(engine);
+ if (engine->class == RENDER_CLASS)
+ xehp_enable_ccs_engines(engine);
+
return 0;
}
@@ -3789,7 +3779,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
guc_default_irqs(engine);
guc_init_breadcrumbs(engine);
- if (engine->class == RENDER_CLASS)
+ if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
@@ -3831,6 +3821,20 @@ static bool __guc_submission_selected(struct intel_guc *guc)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
+ xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
+
+ spin_lock_init(&guc->submission_state.lock);
+ INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
+ ida_init(&guc->submission_state.guc_ids);
+ INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
+ INIT_WORK(&guc->submission_state.destroyed_worker,
+ destroyed_worker_func);
+ INIT_WORK(&guc->submission_state.reset_fail_worker,
+ reset_fail_worker_func);
+
+ spin_lock_init(&guc->timestamp.lock);
+ INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+
guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
@@ -4022,10 +4026,9 @@ static void guc_handle_context_reset(struct intel_guc *guc,
capture_error_state(guc, ce);
guc_context_replay(ce);
} else {
- drm_err(&guc_to_gt(guc)->i915->drm,
- "Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
- ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
- context_blocked(ce));
+ drm_info(&guc_to_gt(guc)->i915->drm,
+ "Ignoring context reset notification of banned context 0x%04X on %s",
+ ce->guc_id.id, ce->engine->name);
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index 1297ddbf7f88..812220a43df8 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -154,6 +154,10 @@ static int intel_guc_multi_lrc_basic(void *arg)
int ret;
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+ /* We don't support breadcrumb handshake on these classes */
+ if (class == COMPUTE_CLASS || class == RENDER_CLASS)
+ continue;
+
ret = __intel_guc_multi_lrc_basic(gt, class);
if (ret)
return ret;