From f2ed8ef31366467930c98494535a044e33a1b3d4 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Tue, 15 Feb 2022 11:01:15 +0530 Subject: drm/i915/perf: Skip the i915_perf_init for dg2 i915_perf is not enabled for dg2 yet, hence skip the feature initialization. Signed-off-by: Ramalingam C cc: Umesh Nerlige Ramappa Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220215053115.6023-1-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2e8028e826b5..cb381fba15e3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4332,6 +4332,10 @@ void i915_perf_init(struct drm_i915_private *i915) /* XXX const struct i915_perf_ops! */ + /* i915_perf is not enabled for DG2 yet */ + if (IS_DG2(i915)) + return; + perf->oa_formats = oa_formats; if (IS_HASWELL(i915)) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; -- cgit From 5224f79096170bf7b92cc8fe42a12f44b91e5f62 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Feb 2022 19:11:44 -0600 Subject: treewide: Replace zero-length arrays with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (next-20220214$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; UAPI and wireless changes were intentionally excluded from this patch and will be sent out separately. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 1a1edae67e4e..3acee0060e23 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -51,7 +51,7 @@ struct __guc_ads_blob { struct guc_gt_system_info system_info; struct guc_engine_usage engine_usage; /* From here on, location is dynamic! Refer to above diagram. */ - struct guc_mmio_reg regset[0]; + struct guc_mmio_reg regset[]; } __packed; static u32 guc_ads_regset_size(struct intel_guc *guc) -- cgit From edf176f48d87fa25ca93f09362707cf5314bf7ee Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 15 Feb 2022 15:55:31 -0800 Subject: drm/i915/dg2: Move misplaced 'ctx' & 'gt' wa's to engine wa list Registers that belong to the shared render/compute reset domain need to be placed on an engine workaround list to ensure that they are properly re-applied whenever any RCS or CCS engine is reset, even if the registers do not belong to a specific engine's MMIO range. We have a number of workarounds today that are incorrectly implemented on the 'gt' workaround list and need to be moved accordingly. We also have one workaround (Wa_22012532006) that is incorrectly implemented on the context workaround list, even though the register it is adjusting is not part of the RCS engine's context image; it must also be moved. We'll have some workaround refactoring coming in the near future that deals with registers in the reset domain in a more clear way. But in the meantime, we should just move these workarounds to rcs_engine_wa_init() to place them on the RCS engine's workaround list. All production DG2 platforms will have an RCS engine (it's never fused off) so these registers will be properly restored after a domain reset triggered via an RCS engine _or_ a CCS engine. Cc: Matt Roper Signed-off-by: Srinivasan Shanmugam Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220215235531.2236399-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 62 ++++++++++++++++------------- 1 file changed, 35 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 77ac294acc9d..1912a868bc0c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -681,12 +681,6 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16013271637:dg2 */ wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -1438,10 +1432,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010680813:dg2_g10 */ - wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); - /* Wa_14010948348:dg2_g10 */ wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); @@ -1488,16 +1478,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - /* Wa_1509235366:dg2 */ - wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | - GLOBAL_INVALIDATION_MODE); - /* Wa_14014830051:dg2 */ wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1506,14 +1486,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); - - /* Wa_18018781329:dg2 */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } static void @@ -2047,6 +2020,23 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2(i915)) { /* Wa_14015227452:dg2 */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_1509235366:dg2 */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); + + /* + * The following are not actually "workarounds" but rather + * recommended tuning settings documented in the bspec's + * performance guide section. + */ + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + + /* Wa_18018781329:dg2 */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2147,6 +2137,24 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + /* Wa_22012532006:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010680813:dg2_g10 */ + wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012362059:dg2 */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + } + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* -- cgit From 8c26491f58538ffc647b813070ba493e35a48984 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 17 Feb 2022 09:56:34 -0800 Subject: drm/i915: Kill the fake lmem support This was useful for early development of lmem, but it's not used anymore, so remove it. v2: Remove unneeded fields from struct intel_memory_region Cc: Chris Wilson Cc: Matthew Auld Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220217175634.4128754-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/Kconfig.unstable | 8 -- drivers/gpu/drm/i915/gt/intel_gt.c | 2 - drivers/gpu/drm/i915/gt/intel_region_lmem.c | 112 +--------------------------- drivers/gpu/drm/i915/gt/intel_region_lmem.h | 3 - drivers/gpu/drm/i915/i915_driver.c | 15 ---- drivers/gpu/drm/i915/i915_params.c | 5 -- drivers/gpu/drm/i915/i915_params.h | 1 - drivers/gpu/drm/i915/intel_memory_region.h | 5 -- 8 files changed, 2 insertions(+), 149 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable index 0c2276155c2b..cf151a297ed7 100644 --- a/drivers/gpu/drm/i915/Kconfig.unstable +++ b/drivers/gpu/drm/i915/Kconfig.unstable @@ -19,11 +19,3 @@ config DRM_I915_UNSTABLE Recommended for driver developers _only_. If in the slightest bit of doubt, say "N". - -config DRM_I915_UNSTABLE_FAKE_LMEM - bool "Enable the experimental fake lmem" - depends on DRM_I915_UNSTABLE - default n - help - Convert some system memory into a fake local memory region for - testing. diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 3a355b50082d..db7fe97bd15f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -62,8 +62,6 @@ int intel_gt_probe_lmem(struct intel_gt *gt) int err; mem = intel_gt_setup_lmem(gt); - if (mem == ERR_PTR(-ENODEV)) - mem = intel_gt_setup_fake_lmem(gt); if (IS_ERR(mem)) { err = PTR_ERR(mem); if (err == -ENODEV) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 21215a080088..2709675336de 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -12,60 +12,6 @@ #include "gem/i915_gem_ttm.h" #include "gt/intel_gt.h" -static int init_fake_lmem_bar(struct intel_memory_region *mem) -{ - struct drm_i915_private *i915 = mem->i915; - struct i915_ggtt *ggtt = to_gt(i915)->ggtt; - unsigned long n; - int ret; - - /* We want to 1:1 map the mappable aperture to our reserved region */ - - mem->fake_mappable.start = 0; - mem->fake_mappable.size = resource_size(&mem->region); - mem->fake_mappable.color = I915_COLOR_UNEVICTABLE; - - ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable); - if (ret) - return ret; - - mem->remap_addr = dma_map_resource(i915->drm.dev, - mem->region.start, - mem->fake_mappable.size, - DMA_BIDIRECTIONAL, - DMA_ATTR_FORCE_CONTIGUOUS); - if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) { - drm_mm_remove_node(&mem->fake_mappable); - return -EINVAL; - } - - for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) { - ggtt->vm.insert_page(&ggtt->vm, - mem->remap_addr + (n << PAGE_SHIFT), - n << PAGE_SHIFT, - I915_CACHE_NONE, 0); - } - - mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr, - mem->fake_mappable.size); - - return 0; -} - -static void release_fake_lmem_bar(struct intel_memory_region *mem) -{ - if (!drm_mm_node_allocated(&mem->fake_mappable)) - return; - - drm_mm_remove_node(&mem->fake_mappable); - - dma_unmap_resource(mem->i915->drm.dev, - mem->remap_addr, - mem->fake_mappable.size, - DMA_BIDIRECTIONAL, - DMA_ATTR_FORCE_CONTIGUOUS); -} - static int region_lmem_release(struct intel_memory_region *mem) { @@ -73,7 +19,6 @@ region_lmem_release(struct intel_memory_region *mem) ret = intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); - release_fake_lmem_bar(mem); return ret; } @@ -83,17 +28,10 @@ region_lmem_init(struct intel_memory_region *mem) { int ret; - if (mem->i915->params.fake_lmem_start) { - ret = init_fake_lmem_bar(mem); - GEM_BUG_ON(ret); - } - if (!io_mapping_init_wc(&mem->iomap, mem->io_start, - resource_size(&mem->region))) { - ret = -EIO; - goto out_no_io; - } + resource_size(&mem->region))) + return -EIO; ret = intel_region_ttm_init(mem); if (ret) @@ -103,8 +41,6 @@ region_lmem_init(struct intel_memory_region *mem) out_no_buddy: io_mapping_fini(&mem->iomap); -out_no_io: - release_fake_lmem_bar(mem); return ret; } @@ -115,50 +51,6 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = { .init_object = __i915_gem_ttm_object_init, }; -struct intel_memory_region * -intel_gt_setup_fake_lmem(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - struct intel_memory_region *mem; - resource_size_t mappable_end; - resource_size_t io_start; - resource_size_t start; - - if (!HAS_LMEM(i915)) - return ERR_PTR(-ENODEV); - - if (!i915->params.fake_lmem_start) - return ERR_PTR(-ENODEV); - - GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt)); - - /* Your mappable aperture belongs to me now! */ - mappable_end = pci_resource_len(pdev, 2); - io_start = pci_resource_start(pdev, 2); - start = i915->params.fake_lmem_start; - - mem = intel_memory_region_create(i915, - start, - mappable_end, - PAGE_SIZE, - io_start, - INTEL_MEMORY_LOCAL, - 0, - &intel_region_lmem_ops); - if (!IS_ERR(mem)) { - drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n", - &mem->region); - drm_info(&i915->drm, - "Intel graphics fake LMEM IO start: %llx\n", - (u64)mem->io_start); - drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n", - (u64)resource_size(&mem->region)); - } - - return mem; -} - static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.h b/drivers/gpu/drm/i915/gt/intel_region_lmem.h index 062d0542ae34..1438576b527a 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.h +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.h @@ -10,7 +10,4 @@ struct intel_gt; struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt); -struct intel_memory_region * -intel_gt_setup_fake_lmem(struct intel_gt *gt); - #endif /* !__INTEL_REGION_LMEM_H */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 5f2343389b5e..5dd81224ac81 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -829,21 +829,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; - /* - * Check if we support fake LMEM -- for now we only unleash this for - * the live selftests(test-and-exit). - */ -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { - if (GRAPHICS_VER(i915) >= 9 && i915_selftest.live < 0 && - i915->params.fake_lmem_start) { - mkwrite_device_info(i915)->memory_regions = - REGION_SMEM | REGION_LMEM | REGION_STOLEN_SMEM; - GEM_BUG_ON(!HAS_LMEM(i915)); - } - } -#endif - ret = pci_enable_device(pdev); if (ret) goto out_fini; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 525ae832aa9a..eea355c2fc28 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -195,11 +195,6 @@ i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); #endif -#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0400, - "Fake LMEM start offset (default: 0)"); -#endif - #if CONFIG_DRM_I915_REQUEST_TIMEOUT i915_param_named_unsafe(request_timeout_ms, uint, 0600, "Default request/fence/batch buffer expiration timeout."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c9d53ff910a0..c779a6f85c7e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -72,7 +72,6 @@ struct drm_printer; param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ - param(unsigned long, fake_lmem_start, 0, IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) ? 0400 : 0) \ param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 5625c9c38993..06464b8865fc 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -67,9 +67,6 @@ struct intel_memory_region { struct io_mapping iomap; struct resource region; - /* For fake LMEM */ - struct drm_mm_node fake_mappable; - resource_size_t io_start; resource_size_t min_page_size; resource_size_t total; @@ -81,8 +78,6 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - dma_addr_t remap_addr; - struct { struct mutex lock; /* Protects access to objects */ struct list_head list; -- cgit From 132aaaf01788d5603a1358cd53a95e5367fd612b Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Sat, 19 Feb 2022 00:17:42 +0530 Subject: drm/i915: add needs_compact_pt flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new platform flag, needs_compact_pt, to mark the requirement of compact pt layout support for the ppGTT when using 64K GTT pages. With this flag has_64k_pages will only indicate requirement of 64K GTT page sizes or larger for device local memory access. v6: * minor doc formatting Suggested-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-6-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++++++--- drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b48dd9ccacd..12173d86f752 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1535,12 +1535,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* * Set this flag, when platform requires 64K GTT page sizes or larger for - * device local memory access. Also this flag implies that we require or - * at least support the compact PT layout for the ppGTT when using the 64K - * GTT pages. + * device local memory access. */ #define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) +/* + * Set this flag when platform doesn't allow both 64k pages and 4k pages in + * the same PT. this flag means we need to support compact PT layout for the + * ppGTT when using the 64K GTT pages. + */ +#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 55333ccd1e6d..5252b8030af9 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1028,6 +1028,7 @@ static const struct intel_device_info xehpsdv_info = { PLATFORM(INTEL_XEHPSDV), .display = { }, .has_64k_pages = 1, + .needs_compact_pt = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | @@ -1047,6 +1048,7 @@ static const struct intel_device_info dg2_info = { PLATFORM(INTEL_DG2), .has_guc_deprivilege = 1, .has_64k_pages = 1, + .needs_compact_pt = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 09ce63c8f78f..47ffd9a1bfa6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -128,6 +128,7 @@ enum intel_ppgtt_type { /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(has_64k_pages); \ + func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ func(has_global_mocs); \ -- cgit From 87bd701ee268a13babdfddea53894a416209123b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:43 +0530 Subject: drm/i915: enforce min GTT alignment for discrete cards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For compact-pt we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. v3: * use needs_compact_pt flag to discriminate between 64K and 64K with compact-pt * add i915_vm_obj_min_alignment * use i915_vm_obj_min_alignment to round up vma reservation if compact-pt instead of hard coding v5: * fix i915_vm_obj_min_alignment for internal objects which have no memory region v6: * tiled_blits_create correctly pick largest required alignment v8: * i915_vm_min_alignment protect against array overflow for mock region Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström Cc: Joonas Lahtinen Cc: Rodrigo Vivi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-7-ramalingam.c@intel.com --- .../drm/i915/gem/selftests/i915_gem_client_blt.c | 21 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 12 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 22 +++++ drivers/gpu/drm/i915/i915_vma.c | 9 ++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ++++++++++++++-------- 5 files changed, 119 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c8ff8bf0986d..3675d12a7d9a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,19 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +434,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +461,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +492,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +505,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..df23ebdfc994 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,18 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; + } else if (HAS_64K_PAGES(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; + } + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->bound_list); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 8073438b67c8..6cd518a3277c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -29,6 +29,8 @@ #include "i915_selftest.h" #include "i915_vma_resource.h" #include "i915_vma_types.h" +#include "i915_params.h" +#include "intel_memory_region.h" #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -223,6 +225,7 @@ struct i915_address_space { struct device *dma; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ + u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1]; unsigned int bind_async_flags; @@ -384,6 +387,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); } +static inline u64 i915_vm_min_alignment(struct i915_address_space *vm, + enum intel_memory_type type) +{ + /* avoid INTEL_MEMORY_MOCK overflow */ + if ((int)type >= ARRAY_SIZE(vm->min_alignment)) + type = INTEL_MEMORY_SYSTEM; + + return vm->min_alignment[type]; +} + +static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm, + struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM; + + return i915_vm_min_alignment(vm, type); +} + static inline bool i915_vm_has_cache_coloring(struct i915_address_space *vm) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 30307e34d2dc..52f43a465440 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -756,6 +756,14 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj)); + /* + * for compact-pt we round up the reservation to prevent + * any smaller pages being used within the same PDE + */ + if (NEEDS_COMPACT_PT(vma->vm->i915)) + size = round_up(size, alignment); + /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. @@ -768,6 +776,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } color = 0; + if (i915_vm_has_cache_coloring(vma->vm)) color = vma->obj->cache_level; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index fba1c8be1649..b80788a2b7f9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -238,6 +238,8 @@ static int lowlevel_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); I915_RND_STATE(seed_prng); struct i915_vma_resource *mock_vma_res; unsigned int size; @@ -251,9 +253,10 @@ static int lowlevel_hole(struct i915_address_space *vm, I915_RND_SUBSTATE(prng, seed_prng); struct drm_i915_gem_object *obj; unsigned int *order, count, n; - u64 hole_size; + u64 hole_size, aligned_size; - hole_size = (hole_end - hole_start) >> size; + aligned_size = max_t(u32, ilog2(min_alignment), size); + hole_size = (hole_end - hole_start) >> aligned_size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); count = hole_size >> 1; @@ -274,8 +277,8 @@ static int lowlevel_hole(struct i915_address_space *vm, } GEM_BUG_ON(!order); - GEM_BUG_ON(count * BIT_ULL(size) > vm->total); - GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end); /* Ignore allocation failures (i.e. don't report them as * a test failure) as we are purposefully allocating very @@ -298,10 +301,10 @@ static int lowlevel_hole(struct i915_address_space *vm, } for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; - GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total); if (igt_timeout(end_time, "%s timed out before %d/%d\n", @@ -344,7 +347,7 @@ alloc_vm_end: } mock_vma_res->bi.pages = obj->mm.pages; - mock_vma_res->node_size = BIT_ULL(size); + mock_vma_res->node_size = BIT_ULL(aligned_size); mock_vma_res->start = addr; with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) @@ -355,7 +358,7 @@ alloc_vm_end: i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); @@ -399,8 +402,10 @@ static int fill_hole(struct i915_address_space *vm, { const u64 hole_size = hole_end - hole_start; struct drm_i915_gem_object *obj; + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); const unsigned long max_pages = - min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment)); const unsigned long max_step = max(int_sqrt(max_pages), 2UL); unsigned long npages, prime, flags; struct i915_vma *vma; @@ -441,14 +446,17 @@ static int fill_hole(struct i915_address_space *vm, offset = p->offset; list_for_each_entry(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } err = i915_vma_pin(vma, 0, 0, offset | flags); @@ -470,22 +478,25 @@ static int fill_hole(struct i915_address_space *vm, i915_vma_unpin(vma); if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } if (!drm_mm_node_allocated(&vma->node) || @@ -506,22 +517,25 @@ static int fill_hole(struct i915_address_space *vm, } if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry_reverse(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } err = i915_vma_pin(vma, 0, 0, offset | flags); @@ -543,22 +557,25 @@ static int fill_hole(struct i915_address_space *vm, i915_vma_unpin(vma); if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry_reverse(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } if (!drm_mm_node_allocated(&vma->node) || @@ -579,9 +596,9 @@ static int fill_hole(struct i915_address_space *vm, } if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } } @@ -611,6 +628,7 @@ static int walk_hole(struct i915_address_space *vm, const u64 hole_size = hole_end - hole_start; const unsigned long max_pages = min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long min_alignment; unsigned long flags; u64 size; @@ -620,6 +638,8 @@ static int walk_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + for_each_prime_number_from(size, 1, max_pages) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -638,7 +658,7 @@ static int walk_hole(struct i915_address_space *vm, for (addr = hole_start; addr + obj->base.size < hole_end; - addr += obj->base.size) { + addr += round_up(obj->base.size, min_alignment)) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", @@ -690,6 +710,7 @@ static int pot_hole(struct i915_address_space *vm, { struct drm_i915_gem_object *obj; struct i915_vma *vma; + unsigned int min_alignment; unsigned long flags; unsigned int pot; int err = 0; @@ -698,6 +719,8 @@ static int pot_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -710,13 +733,13 @@ static int pot_hole(struct i915_address_space *vm, /* Insert a pair of pages across every pot boundary within the hole */ for (pot = fls64(hole_end - 1) - 1; - pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot > ilog2(2 * min_alignment); pot--) { u64 step = BIT_ULL(pot); u64 addr; - for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; - addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + for (addr = round_up(hole_start + min_alignment, step) - min_alignment; + addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { @@ -761,6 +784,7 @@ static int drunk_hole(struct i915_address_space *vm, unsigned long end_time) { I915_RND_STATE(prng); + unsigned int min_alignment; unsigned int size; unsigned long flags; @@ -768,15 +792,18 @@ static int drunk_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + /* Keep creating larger objects until one cannot fit into the hole */ for (size = 12; (hole_end - hole_start) >> size; size++) { struct drm_i915_gem_object *obj; unsigned int *order, count, n; struct i915_vma *vma; - u64 hole_size; + u64 hole_size, aligned_size; int err = -ENODEV; - hole_size = (hole_end - hole_start) >> size; + aligned_size = max_t(u32, ilog2(min_alignment), size); + hole_size = (hole_end - hole_start) >> aligned_size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); count = hole_size >> 1; @@ -816,7 +843,7 @@ static int drunk_hole(struct i915_address_space *vm, GEM_BUG_ON(vma->size != BIT_ULL(size)); for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { @@ -868,11 +895,14 @@ static int __shrink_hole(struct i915_address_space *vm, { struct drm_i915_gem_object *obj; unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int min_alignment; unsigned int order = 12; LIST_HEAD(objects); int err = 0; u64 addr; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + /* Keep creating larger objects until one cannot fit into the hole */ for (addr = hole_start; addr < hole_end; ) { struct i915_vma *vma; @@ -913,7 +943,7 @@ static int __shrink_hole(struct i915_address_space *vm, } i915_vma_unpin(vma); - addr += size; + addr += round_up(size, min_alignment); /* * Since we are injecting allocation faults at random intervals, -- cgit From 5189e3126eb136a2cffacc708f08ca4fe86ebcf4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:44 +0530 Subject: drm/i915: support 64K GTT pages for discrete cards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit discrete cards optimise 64K GTT pages for local-memory, since everything should be allocated at 64K granularity. We say goodbye to sparse entries, and instead get a compact 256B page-table for 64K pages, which should be more cache friendly. 4K pages for local-memory are no longer supported by the HW. v4: don't return uninitialized err in igt_ppgtt_compact Reported-by: kernel test robot Signed-off-by: Matthew Auld Signed-off-by: Stuart Summers Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström Cc: Joonas Lahtinen Cc: Rodrigo Vivi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-8-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 60 +++++++++++++ drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 108 +++++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/gt/intel_ppgtt.c | 1 + 4 files changed, 169 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index f36191ebf964..a7d9bdb85d70 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1478,6 +1478,65 @@ out: return err; } +static int igt_ppgtt_compact(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* + * Simple test to catch issues with compact 64K pages -- since the pt is + * compacted to 256B that gives us 32 entries per pt, however since the + * backing page for the pt is 4K, any extra entries we might incorrectly + * write out should be ignored by the HW. If ever hit such a case this + * test should catch it since some of our writes would land in scratch. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks compact 64K page support, skipping\n"); + return 0; + } + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + /* We want the range to cover multiple page-table boundaries. */ + obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("LMEM compact unable to allocate huge-page(s)\n"); + goto out_unpin; + } + + /* + * Disable 2M GTT pages by forcing the page-size to 64K for the GTT + * insertion. + */ + obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K; + + err = igt_write_huge(i915, obj); + if (err) + pr_err("LMEM compact write-huge failed\n"); + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1735,6 +1794,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), + SUBTEST(igt_ppgtt_compact), }; if (!HAS_PPGTT(i915)) { diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c43e724afa9f..62471730266c 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, start, end, lvl); } else { unsigned int count; + unsigned int pte = gen8_pd_index(start, 0); + unsigned int num_ptes; u64 *vaddr; count = gen8_pt_count(start, end); @@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); + num_ptes = count; + if (pt->is_compact) { + GEM_BUG_ON(num_ptes % 16); + GEM_BUG_ON(pte % 16); + num_ptes /= 16; + pte /= 16; + } + vaddr = px_vaddr(pt); - memset64(vaddr + gen8_pd_index(start, 0), + memset64(vaddr + pte, vm->scratch[0]->encode, - count); + num_ptes); atomic_sub(count, &pt->used); start += count; @@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, return idx; } +static void +xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + struct sgt_dma *iter, + enum i915_cache_level cache_level, + u32 flags) +{ + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); + unsigned int rem = sg_dma_len(iter->sg); + u64 start = vma_res->start; + + GEM_BUG_ON(!i915_vm_is_4lvl(vm)); + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vm, start); + struct i915_page_directory * const pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); + gen8_pte_t encode = pte_encode; + unsigned int page_size; + gen8_pte_t *vaddr; + u16 index, max; + + max = I915_PDES; + + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); + encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; + + vaddr = px_vaddr(pd); + } else { + if (encode & GEN12_PPGTT_PTE_LM) { + GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); + GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); + GEM_BUG_ON(!IS_ALIGNED(iter->dma, + I915_GTT_PAGE_SIZE_64K)); + + index = __gen8_pte_index(start, 0) / 16; + page_size = I915_GTT_PAGE_SIZE_64K; + + max /= 16; + + vaddr = px_vaddr(pd); + vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + + pt->is_compact = true; + } else { + GEM_BUG_ON(pt->is_compact); + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; + } + + vaddr = px_vaddr(pt); + } + + do { + GEM_BUG_ON(rem < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = sg_dma_len(iter->sg); + if (!rem) + break; + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + vma_res->page_sizes_gtt |= page_size; + } while (iter->sg && sg_dma_len(iter->sg)); +} + static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, struct i915_vma_resource *vma_res, struct sgt_dma *iter, @@ -586,7 +685,10 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma_res); if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + if (HAS_64K_PAGES(vm->i915)) + xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + else + gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); } else { u64 idx = vma_res->start >> GEN8_PTE_SHIFT; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 6cd518a3277c..5e038cef0d9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -92,6 +92,8 @@ typedef u64 gen8_pte_t; #define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define GEN12_PDE_64K BIT(6) + /* * Cacheability Control is a 4-bit value. The low three bits are stored in bits * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. @@ -160,6 +162,7 @@ struct i915_page_table { atomic_t used; struct i915_page_table *stash; }; + bool is_compact; }; struct i915_page_directory { diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 48e6e2f87700..043652dc6892 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -26,6 +26,7 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } + pt->is_compact = false; atomic_set(&pt->used, 0); return pt; } -- cgit From a413c99fc1e49db4db27f4bf0f7791011b4e2132 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Sat, 19 Feb 2022 00:17:45 +0530 Subject: drm/i915: add gtt misalignment test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add test to check handling of misaligned offsets and sizes v4: * remove spurious blank lines * explicitly cast intel_region_id to intel_memory_type in misaligned_pin Reported-by: kernel test robot v6: * use NEEDS_COMPACT_PT instead of hard coding for DG2 v7: * use i915_vma_unbind_unlocked in misalignment test v8: * handle stolen smem region returning -ENODEV due to uninitialized on some setups * avoid trying to test bad alignments on single page hole regions Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-9-ramalingam.c@intel.com --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 126 ++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index b80788a2b7f9..ca4ed9dd909b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -26,9 +26,11 @@ #include #include "gem/i915_gem_context.h" +#include "gem/i915_gem_region.h" #include "gem/selftests/mock_context.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_gtt.h" #include "i915_random.h" #include "i915_selftest.h" @@ -1067,6 +1069,118 @@ err_purge: return err; } +static int misaligned_case(struct i915_address_space *vm, struct intel_memory_region *mr, + u64 addr, u64 size, unsigned long flags) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = 0; + u64 expected_vma_size, expected_node_size; + bool is_stolen = mr->type == INTEL_MEMORY_STOLEN_SYSTEM || + mr->type == INTEL_MEMORY_STOLEN_LOCAL; + + obj = i915_gem_object_create_region(mr, size, 0, 0); + if (IS_ERR(obj)) { + /* if iGVT-g or DMAR is active, stolen mem will be uninitialized */ + if (PTR_ERR(obj) == -ENODEV && is_stolen) + return 0; + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node)) { + err = -EINVAL; + goto err_put; + } + + if (i915_vma_misplaced(vma, 0, 0, addr | flags)) { + err = -EINVAL; + goto err_put; + } + + expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1)); + expected_node_size = expected_vma_size; + + if (NEEDS_COMPACT_PT(vm->i915) && i915_gem_object_is_lmem(obj)) { + /* compact-pt should expand lmem node to 2MB */ + expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K); + expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M); + } + + if (vma->size != expected_vma_size || vma->node.size != expected_node_size) { + err = i915_vma_unbind_unlocked(vma); + err = -EBADSLT; + goto err_put; + } + + err = i915_vma_unbind_unlocked(vma); + if (err) + goto err_put; + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + +err_put: + i915_gem_object_put(obj); + cleanup_freed_objects(vm->i915); + return err; +} + +static int misaligned_pin(struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct intel_memory_region *mr; + enum intel_region_id id; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + int err = 0; + u64 hole_size = hole_end - hole_start; + + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_memory_region(mr, vm->i915, id) { + u64 min_alignment = i915_vm_min_alignment(vm, (enum intel_memory_type)id); + u64 size = min_alignment; + u64 addr = round_down(hole_start + (hole_size / 2), min_alignment); + + /* avoid -ENOSPC on very small hole setups */ + if (hole_size < 3 * min_alignment) + continue; + + /* we can't test < 4k alignment due to flags being encoded in lower bits */ + if (min_alignment != I915_GTT_PAGE_SIZE_4K) { + err = misaligned_case(vm, mr, addr + (min_alignment / 2), size, flags); + /* misaligned should error with -EINVAL*/ + if (!err) + err = -EBADSLT; + if (err != -EINVAL) + return err; + } + + /* test for vma->size expansion to min page size */ + err = misaligned_case(vm, mr, addr, PAGE_SIZE, flags); + if (err) + return err; + + /* test for intermediate size not expanding vma->size for large alignments */ + err = misaligned_case(vm, mr, addr, size / 2, flags); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, @@ -1136,6 +1250,11 @@ static int igt_ppgtt_shrink_boom(void *arg) return exercise_ppgtt(arg, shrink_boom); } +static int igt_ppgtt_misaligned_pin(void *arg) +{ + return exercise_ppgtt(arg, misaligned_pin); +} + static int sort_holes(void *priv, const struct list_head *A, const struct list_head *B) { @@ -1208,6 +1327,11 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static int igt_ggtt_misaligned_pin(void *arg) +{ + return exercise_ggtt(arg, misaligned_pin); +} + static int igt_ggtt_page(void *arg) { const unsigned int count = PAGE_SIZE/sizeof(u32); @@ -2180,12 +2304,14 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_fill), SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ppgtt_shrink_boom), + SUBTEST(igt_ppgtt_misaligned_pin), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), + SUBTEST(igt_ggtt_misaligned_pin), SUBTEST(igt_cs_tlb), }; -- cgit From 2cff4b9ed6171c61cc50c38b25534f7c3d30db3c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:46 +0530 Subject: drm/i915/gtt: allow overriding the pt alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some platforms we have alignment restrictions when accessing LMEM from the GTT. In the next few patches we need to be able to modify the page-tables directly via the GTT itself. Suggested-by: Ramalingam C Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Ramalingam C Reviewed-by: Ramalingam C Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-10-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.h | 10 +++++++++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 5e038cef0d9f..9d83c2d3959c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -200,6 +200,14 @@ void *__px_vaddr(struct drm_i915_gem_object *p); struct i915_vm_pt_stash { /* preallocated chains of page tables/directories */ struct i915_page_table *pt[2]; + /* + * Optionally override the alignment/size of the physical page that + * contains each PT. If not set defaults back to the usual + * I915_GTT_PAGE_SIZE_4K. This does not influence the other paging + * structures. MUST be a power-of-two. ONLY applicable on discrete + * platforms. + */ + int pt_sz; }; struct i915_vma_ops { @@ -595,7 +603,7 @@ void free_scratch(struct i915_address_space *vm); struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz); -struct i915_page_table *alloc_pt(struct i915_address_space *vm); +struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz); struct i915_page_directory *alloc_pd(struct i915_address_space *vm); struct i915_page_directory *__alloc_pd(int npde); diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 043652dc6892..d91e2beb7517 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -12,7 +12,7 @@ #include "gen6_ppgtt.h" #include "gen8_ppgtt.h" -struct i915_page_table *alloc_pt(struct i915_address_space *vm) +struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz) { struct i915_page_table *pt; @@ -20,7 +20,7 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + pt->base = vm->alloc_pt_dma(vm, sz); if (IS_ERR(pt->base)) { kfree(pt); return ERR_PTR(-ENOMEM); @@ -221,17 +221,25 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm, u64 size) { unsigned long count; - int shift, n; + int shift, n, pt_sz; shift = vm->pd_shift; if (!shift) return 0; + pt_sz = stash->pt_sz; + if (!pt_sz) + pt_sz = I915_GTT_PAGE_SIZE_4K; + else + GEM_BUG_ON(!IS_DGFX(vm->i915)); + + GEM_BUG_ON(!is_power_of_2(pt_sz)); + count = pd_count(size, shift); while (count--) { struct i915_page_table *pt; - pt = alloc_pt(vm); + pt = alloc_pt(vm, pt_sz); if (IS_ERR(pt)) { i915_vm_free_pt_stash(vm, stash); return PTR_ERR(pt); -- cgit From 6f84aa1cd47cc0feb38da76999626051491316d7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:47 +0530 Subject: drm/i915/gtt: add xehpsdv_ppgtt_insert_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If this is LMEM then we get a 32 entry PT, with each PTE pointing to some 64K block of memory, otherwise it's just the usual 512 entry PT. This very much assumes the caller knows what they are doing. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Ramalingam C Reviewed-by: Ramalingam C Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-11-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 50 ++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 62471730266c..f574da00eff1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -715,13 +715,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, gen8_pdp_for_page_index(vm, idx); struct i915_page_directory *pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); gen8_pte_t *vaddr; - vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + GEM_BUG_ON(pt->is_compact); + + vaddr = px_vaddr(pt); vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } +static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + u64 idx = offset >> GEN8_PTE_SHIFT; + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + struct i915_page_directory *pd = + i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); + gen8_pte_t *vaddr; + + GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); + GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); + + if (!pt->is_compact) { + vaddr = px_vaddr(pd); + vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; + pt->is_compact = true; + } + + vaddr = px_vaddr(pt); + vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); +} + +static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + if (flags & PTE_LM) + return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, + level, flags); + + return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); +} + static int gen8_init_scratch(struct i915_address_space *vm) { u32 pte_flags; @@ -921,7 +964,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; - ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; + else + ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; ppgtt->vm.foreach = gen8_ppgtt_foreach; -- cgit From 00e27ad85bc9842e2a775765597e6fe4b6beb584 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:48 +0530 Subject: drm/i915/migrate: add acceleration support for DG2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is all kinds of awkward since we now have to contend with using 64K GTT pages when mapping anything in LMEM(including the page-tables themselves). v2(Ram) - Document the ppGTT layout and add a better description for the different windows. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Ramalingam C Reviewed-by: Ramalingam C Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-12-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 196 ++++++++++++++++++++++++++------ 1 file changed, 164 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 18b44af56969..20444d6ceb3c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -32,6 +32,38 @@ static bool engine_supports_migration(struct intel_engine_cs *engine) return true; } +static void xehpsdv_toggle_pdes(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + /* + * Insert a dummy PTE into every PT that will map to LMEM to ensure + * we have a correctly setup PDE structure for later use. + */ + vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM); + GEM_BUG_ON(!pt->is_compact); + d->offset += SZ_2M; +} + +static void xehpsdv_insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + /* + * We are playing tricks here, since the actual pt, from the hw + * pov, is only 256bytes with 32 entries, or 4096bytes with 512 + * entries, but we are still guaranteed that the physical + * alignment is 64K underneath for the pt, and we are careful + * not to access the space in the void. + */ + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM); + d->offset += SZ_64K; +} + static void insert_pte(struct i915_address_space *vm, struct i915_page_table *pt, void *data) @@ -74,7 +106,32 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * i.e. within the same non-preemptible window so that we do not switch * to another migration context that overwrites the PTE. * - * TODO: Add support for huge LMEM PTEs + * This changes quite a bit on platforms with HAS_64K_PAGES support, + * where we instead have three windows, each CHUNK_SIZE in size. The + * first is reserved for mapping system-memory, and that just uses the + * 512 entry layout using 4K GTT pages. The other two windows just map + * lmem pages and must use the new compact 32 entry layout using 64K GTT + * pages, which ensures we can address any lmem object that the user + * throws at us. We then also use the xehpsdv_toggle_pdes as a way of + * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the + * compact layout for each of these page-tables, that fall within the + * [CHUNK_SIZE, 3 * CHUNK_SIZE) range. + * + * We lay the ppGTT out as: + * + * [0, CHUNK_SZ) -> first window/object, maps smem + * [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src + * [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst + * + * For the PTE window it's also quite different, since each PTE must + * point to some 64K page, one for each PT(since it's in lmem), and yet + * each is only <= 4096bytes, but since the unused space within that PTE + * range is never touched, this should be fine. + * + * So basically each PT now needs 64K of virtual memory, instead of 4K, + * which looks like: + * + * [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE */ vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY); @@ -86,6 +143,9 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; } + if (HAS_64K_PAGES(gt->i915)) + stash.pt_sz = I915_GTT_PAGE_SIZE_64K; + /* * Each engine instance is assigned its own chunk in the VM, so * that we can run multiple instances concurrently @@ -105,14 +165,20 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need * 4x2 page directories for source/destination. */ - sz = 2 * CHUNK_SZ; + if (HAS_64K_PAGES(gt->i915)) + sz = 3 * CHUNK_SZ; + else + sz = 2 * CHUNK_SZ; d.offset = base + sz; /* * We need another page directory setup so that we can write * the 8x512 PTE in each chunk. */ - sz += (sz >> 12) * sizeof(u64); + if (HAS_64K_PAGES(gt->i915)) + sz += (sz / SZ_2M) * SZ_64K; + else + sz += (sz >> 12) * sizeof(u64); err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); if (err) @@ -133,7 +199,18 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; /* Now allow the GPU to rewrite the PTE via its own ppGTT */ - vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); + if (HAS_64K_PAGES(gt->i915)) { + vm->vm.foreach(&vm->vm, base, d.offset - base, + xehpsdv_insert_pte, &d); + d.offset = base + CHUNK_SZ; + vm->vm.foreach(&vm->vm, + d.offset, + 2 * CHUNK_SZ, + xehpsdv_toggle_pdes, &d); + } else { + vm->vm.foreach(&vm->vm, base, d.offset - base, + insert_pte, &d); + } } return &vm->vm; @@ -269,19 +346,38 @@ static int emit_pte(struct i915_request *rq, u64 offset, int length) { + bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915); const u64 encode = rq->context->vm->pte_encode(0, cache_level, is_lmem ? PTE_LM : 0); struct intel_ring *ring = rq->ring; - int total = 0; + int pkt, dword_length; + u32 total = 0; + u32 page_size; u32 *hdr, *cs; - int pkt; GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + page_size = I915_GTT_PAGE_SIZE; + dword_length = 0x400; + /* Compute the page directory offset for the target address range */ - offset >>= 12; - offset *= sizeof(u64); - offset += 2 * CHUNK_SZ; + if (has_64K_pages) { + GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M)); + + offset /= SZ_2M; + offset *= SZ_64K; + offset += 3 * CHUNK_SZ; + + if (is_lmem) { + page_size = I915_GTT_PAGE_SIZE_64K; + dword_length = 0x40; + } + } else { + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; + } + offset += (u64)rq->engine->instance << 32; cs = intel_ring_begin(rq, 6); @@ -289,7 +385,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); hdr = cs; @@ -299,6 +395,8 @@ static int emit_pte(struct i915_request *rq, do { if (cs - hdr >= pkt) { + int dword_rem; + *hdr += cs - hdr - 2; *cs++ = MI_NOOP; @@ -310,7 +408,18 @@ static int emit_pte(struct i915_request *rq, if (IS_ERR(cs)) return PTR_ERR(cs); - pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + dword_rem = dword_length; + if (has_64K_pages) { + if (IS_ALIGNED(total, SZ_2M)) { + offset = round_up(offset, SZ_64K); + } else { + dword_rem = SZ_2M - (total & (SZ_2M - 1)); + dword_rem /= page_size; + dword_rem *= 2; + } + } + + pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); hdr = cs; @@ -319,13 +428,15 @@ static int emit_pte(struct i915_request *rq, *cs++ = upper_32_bits(offset); } + GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size)); + *cs++ = lower_32_bits(encode | it->dma); *cs++ = upper_32_bits(encode | it->dma); offset += 8; - total += I915_GTT_PAGE_SIZE; + total += page_size; - it->dma += I915_GTT_PAGE_SIZE; + it->dma += page_size; if (it->dma >= it->max) { it->sg = __sg_next(it->sg); if (!it->sg || sg_dma_len(it->sg) == 0) @@ -356,7 +467,8 @@ static bool wa_1209644611_applies(int ver, u32 size) return height % 4 == 3 && height <= 8; } -static int emit_copy(struct i915_request *rq, int size) +static int emit_copy(struct i915_request *rq, + u32 dst_offset, u32 src_offset, int size) { const int ver = GRAPHICS_VER(rq->engine->i915); u32 instance = rq->engine->instance; @@ -371,31 +483,31 @@ static int emit_copy(struct i915_request *rq, int size) *cs++ = BLT_DEPTH_32 | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = instance; *cs++ = 0; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; *cs++ = instance; } else if (ver >= 8) { *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = instance; *cs++ = 0; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; *cs++ = instance; } else { GEM_BUG_ON(instance); *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; } intel_ring_advance(rq, cs); @@ -423,6 +535,7 @@ intel_context_migrate_copy(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); do { + u32 src_offset, dst_offset; int len; rq = i915_request_create(ce); @@ -450,15 +563,28 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, - CHUNK_SZ); + src_offset = 0; + dst_offset = CHUNK_SZ; + if (HAS_64K_PAGES(ce->engine->i915)) { + GEM_BUG_ON(!src_is_lmem && !dst_is_lmem); + + src_offset = 0; + dst_offset = 0; + if (src_is_lmem) + src_offset = CHUNK_SZ; + if (dst_is_lmem) + dst_offset = 2 * CHUNK_SZ; + } + + len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, + src_offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; } err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, - CHUNK_SZ, len); + dst_offset, len); if (err < 0) goto out_rq; if (err < len) { @@ -470,7 +596,7 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - err = emit_copy(rq, len); + err = emit_copy(rq, dst_offset, src_offset, len); /* Arbitration is re-enabled between requests. */ out_rq: @@ -488,14 +614,15 @@ out_ce: return err; } -static int emit_clear(struct i915_request *rq, int size, u32 value) +static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value) { const int ver = GRAPHICS_VER(rq->engine->i915); - u32 instance = rq->engine->instance; u32 *cs; GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + offset += (u64)rq->engine->instance << 32; + cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -505,17 +632,17 @@ static int emit_clear(struct i915_request *rq, int size, u32 value) *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = 0; /* offset */ - *cs++ = instance; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); *cs++ = value; *cs++ = MI_NOOP; } else { - GEM_BUG_ON(instance); + GEM_BUG_ON(upper_32_bits(offset)); *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = 0; + *cs++ = lower_32_bits(offset); *cs++ = value; } @@ -542,6 +669,7 @@ intel_context_migrate_clear(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); do { + u32 offset; int len; rq = i915_request_create(ce); @@ -569,7 +697,11 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); + offset = 0; + if (HAS_64K_PAGES(ce->engine->i915) && is_lmem) + offset = CHUNK_SZ; + + len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; @@ -579,7 +711,7 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - err = emit_clear(rq, len, value); + err = emit_clear(rq, offset, len, value); /* Arbitration is re-enabled between requests. */ out_rq: -- cgit From 5e3094cfd9fb313be3b8fbf9f91e92a30483bc28 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Sat, 19 Feb 2022 00:17:50 +0530 Subject: drm/i915/xehpsdv: Add has_flat_ccs to device info Platforms of XeHP and beyond support 3D surface (buffer) compression and various compression formats. This is accomplished by an additional compression control state (CCS) stored for each surface. Gen 12 devices(TGL family and DG1) stores compression states in a separate region of memory. It is managed by user-space and has an associated set of user-space managed page tables used by hardware for address translation. In Xe HP and beyond (XEHPSDV, DG2, etc), there is a new feature introduced i.e Flat CCS. It replaced AUX page tables with a flat indexed region of device memory for storing compression states. Cc: Joonas Lahtinen Cc: Matthew Auld Signed-off-by: CQ Tang Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-14-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 12173d86f752..6ffadf4b3f1a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1551,6 +1551,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) +/* + * Platform has the dedicated compression control state for each lmem surfaces + * stored in lmem to support the 3D and media compression formats. + */ +#define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) + #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) #define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5252b8030af9..c1a3f1107022 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1003,6 +1003,7 @@ static const struct intel_device_info adl_p_info = { XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ .has_64bit_reloc = 1, \ + .has_flat_ccs = 1, \ .has_global_mocs = 1, \ .has_gt_uc = 1, \ .has_llc = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 47ffd9a1bfa6..d3cce0220018 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -131,6 +131,7 @@ enum intel_ppgtt_type { func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ + func(has_flat_ccs); \ func(has_global_mocs); \ func(has_gt_uc); \ func(has_guc_deprivilege); \ -- cgit From 4b31b8e34460af9b2eff0d389a6caefcb694a1bc Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Sat, 19 Feb 2022 00:17:51 +0530 Subject: drm/i915/lmem: Enable lmem for platforms with Flat CCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A portion of device memory is reserved for Flat CCS so usable device memory will be reduced by size of Flat CCS. Size of Flat CCS is specified in “XEHPSDV_FLAT_CCS_BASE_ADDR”. So to get effective device memory we need to subtract total device memory by Flat CCS memory size. v2: Addressed the small bar related issue [Matt] Removed a reduntant check [Matt] v3: removed a variable s/DRM_ERROR/drm_err [Lucas] Cc: Matthew Auld Signed-off-by: Abdiel Janulgue Signed-off-by: Ramalingam C Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-15-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt.h | 1 + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 3 +++ 4 files changed, 46 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index db7fe97bd15f..beb33b1a9684 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -908,6 +908,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) return intel_uncore_read_fw(gt->uncore, reg); } +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 2dad46c3eff2..0f571c8ee22b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -85,6 +85,7 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, } u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 2709675336de..7f711c16501f 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -97,8 +97,29 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (!IS_DGFX(i915)) return ERR_PTR(-ENODEV); - /* Stolen starts from GSMBASE on DG1 */ - lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE); + if (HAS_FLAT_CCS(i915)) { + u64 tile_stolen, flat_ccs_base; + + lmem_size = pci_resource_len(pdev, 2); + flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + + if (GEM_WARN_ON(lmem_size < flat_ccs_base)) + return ERR_PTR(-ENODEV); + + tile_stolen = lmem_size - flat_ccs_base; + + /* If the FLAT_CCS_BASE_ADDR register is not populated, flag an error */ + if (tile_stolen == lmem_size) + drm_err(&i915->drm, + "CCS_BASE_ADDR register did not have expected value\n"); + + lmem_size -= tile_stolen; + } else { + /* Stolen starts from GSMBASE without CCS */ + lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); + } + io_start = pci_resource_start(pdev, 2); if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f95bbb10b6f4..4b95c94084d9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -12645,6 +12645,9 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13) +#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHPSDV_CCS_BASE_SHIFT 8 + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ -- cgit From 64b2a6a054c40c04a4e48fd70002570654381f9c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 17 Feb 2022 18:02:23 +0530 Subject: drm/i915/gt: use get_reset_domain() helper We dont need to implement reset_domain in intel_engine _setup(), but can be done as a helper. Implemented as engine->reset_domain = get_reset_domain(). Cc: Rodrigo Vivi Signed-off-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220217123223.748184-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 74 ++++++++++++++++++------------- 1 file changed, 42 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d1daa4cc2895..85164bba2b75 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -290,6 +290,46 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) GEM_DEBUG_WARN_ON(iir); } +static u32 get_reset_domain(u8 ver, enum intel_engine_id id) +{ + u32 reset_domain; + + if (ver >= 11) { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + reset_domain = engine_reset_domains[id]; + } else { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + reset_domain = engine_reset_domains[id]; + } + + return reset_domain; +} + static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, u8 logical_instance) { @@ -325,38 +365,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); - if (GRAPHICS_VER(gt->i915) >= 11) { - static const u32 engine_reset_domains[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VCS4] = GEN11_GRDOM_MEDIA5, - [VCS5] = GEN11_GRDOM_MEDIA6, - [VCS6] = GEN11_GRDOM_MEDIA7, - [VCS7] = GEN11_GRDOM_MEDIA8, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - [VECS2] = GEN11_GRDOM_VECS3, - [VECS3] = GEN11_GRDOM_VECS4, - }; - GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || - !engine_reset_domains[id]); - engine->reset_domain = engine_reset_domains[id]; - } else { - static const u32 engine_reset_domains[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; - GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || - !engine_reset_domains[id]); - engine->reset_domain = engine_reset_domains[id]; - } + engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915), + id); engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; -- cgit From 9648f1c3739505557d94ff749a4f32192ea81fe3 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 16 Feb 2022 10:15:04 -0800 Subject: drm/i915/guc/slpc: Correct the param count for unset param SLPC unset param H2G only needs one parameter - the id of the param. Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency limits") Suggested-by: Umesh Nerlige Ramappa Signed-off-by: Vinay Belgaumkar Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220216181504.7155-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 13b27b8ff74e..ba21ace973da 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -110,7 +110,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) { u32 request[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), id, }; -- cgit From 0591ee6a5c428c2309f5fefcdbe40d9eb669634a Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 17 Feb 2022 11:15:25 -0800 Subject: drm/i915/guc/slpc: Use wrapper for reading RP_STATE_CAP This will ensure correct values for Gen12+ platforms. v2: Rebase Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Vinay Belgaumkar Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220216181504.7155-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ba21ace973da..8df6bc83dbad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_guc_slpc.h" #include "gt/intel_gt.h" +#include "gt/intel_rps.h" static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { @@ -574,10 +575,10 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) static void slpc_get_rp_values(struct intel_guc_slpc *slpc) { + struct intel_rps *rps = &slpc_to_gt(slpc)->rps; u32 rp_state_cap; - rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore, - GEN6_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * GT_FREQUENCY_MULTIPLIER; -- cgit From b9ef89392c2ac694a3e5624cde8f848fbf393818 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 22 Feb 2022 06:14:24 -0800 Subject: drm/i915/tgl: Simply subplatform detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past we had a need to differentiate TGL U and TGL Y, there was a different voltage swing table for each subplatform and some PCI ids of this subplatforms are shared but it turned out that it was a specification mistake and the voltage swing table was indeed the same but we went ahead with that patch because we needed to differentiate TGL U and Y from TGL H and by that time TGL H was embargoed so that was the perfect way to land it upstream. Now the embargo for TGL H is long past and now we even have INTEL_TGL_12_GT1_IDS with all TGL H ids, so we can drop this PCI root check and only rely in the PCI ids to differentiate TGL U and Y from TGL H that actually has code differences. Besides the simplification this will fix issues in virtualization environments where the PCI root is virtualized and don't have the same id as actual hardware. v2: - add and set INTEL_SUBPLATFORM_UY Cc: Fred Gao Cc: Tvrtko Ursulin Signed-off-by: José Roberto de Souza Reviewed-by: Tvrtko Ursulin Tested-by: Yu He Link: https://patchwork.freedesktop.org/patch/msgid/20220222141424.35165-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 11 ++++----- drivers/gpu/drm/i915/i915_reg.h | 6 ----- drivers/gpu/drm/i915/intel_device_info.c | 26 ++++++---------------- drivers/gpu/drm/i915/intel_device_info.h | 3 +++ drivers/gpu/drm/i915/intel_step.c | 2 +- 6 files changed, 16 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index e2dfb93a82bd..ed62c35e1518 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -1321,7 +1321,7 @@ tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (crtc_state->port_clock > 270000) { - if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + if (IS_TGL_UY(dev_priv)) { return intel_get_buf_trans(&tgl_uy_combo_phy_trans_dp_hbr2, n_entries); } else { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6ffadf4b3f1a..71d946b0249c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1342,11 +1342,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) -#define IS_TGL_U(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULT) - -#define IS_TGL_Y(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_TGL_UY(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_UY) #define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until)) @@ -1365,11 +1362,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_DISPLAY_STEP(__i915, since, until)) #define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \ - ((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ + (IS_TGL_UY(__i915) && \ IS_GRAPHICS_STEP(__i915, since, until)) #define IS_TGL_GRAPHICS_STEP(__i915, since, until) \ - (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ + (IS_TIGERLAKE(__i915) && !IS_TGL_UY(__i915)) && \ IS_GRAPHICS_STEP(__i915, since, until)) #define IS_RKL_DISPLAY_STEP(p, since, until) \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4b95c94084d9..8d28d1e29aaf 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -13012,12 +13012,6 @@ enum skl_power_gate { #define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) -#define TGL_ROOT_DEVICE_ID 0x9A00 -#define TGL_ROOT_DEVICE_MASK 0xFF00 -#define TGL_ROOT_DEVICE_SKU_MASK 0xF -#define TGL_ROOT_DEVICE_SKU_ULX 0x2 -#define TGL_ROOT_DEVICE_SKU_ULT 0x4 - #define CLKREQ_POLICY _MMIO(0x101038) #define CLKREQ_POLICY_MEM_UP_OVRD REG_BIT(1) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 04fd266d70e2..3a713b46b573 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -170,6 +170,10 @@ static const u16 subplatform_portf_ids[] = { INTEL_ICL_PORT_F_IDS(0), }; +static const u16 subplatform_uy_ids[] = { + INTEL_TGL_12_GT2_IDS(0), +}; + static const u16 subplatform_rpls_ids[] = { INTEL_RPLS_IDS(0), }; @@ -210,30 +214,14 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_portf_ids, ARRAY_SIZE(subplatform_portf_ids))) { mask = BIT(INTEL_SUBPLATFORM_PORTF); + } else if (find_devid(devid, subplatform_uy_ids, + ARRAY_SIZE(subplatform_uy_ids))) { + mask = BIT(INTEL_SUBPLATFORM_UY); } else if (find_devid(devid, subplatform_rpls_ids, ARRAY_SIZE(subplatform_rpls_ids))) { mask = BIT(INTEL_SUBPLATFORM_RPL_S); } - if (IS_TIGERLAKE(i915)) { - struct pci_dev *root, *pdev = to_pci_dev(i915->drm.dev); - - root = list_first_entry(&pdev->bus->devices, typeof(*root), bus_list); - - drm_WARN_ON(&i915->drm, mask); - drm_WARN_ON(&i915->drm, (root->device & TGL_ROOT_DEVICE_MASK) != - TGL_ROOT_DEVICE_ID); - - switch (root->device & TGL_ROOT_DEVICE_SKU_MASK) { - case TGL_ROOT_DEVICE_SKU_ULX: - mask = BIT(INTEL_SUBPLATFORM_ULX); - break; - case TGL_ROOT_DEVICE_SKU_ULT: - mask = BIT(INTEL_SUBPLATFORM_ULT); - break; - } - } - GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d3cce0220018..63bab7431f60 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -106,6 +106,9 @@ enum intel_platform { /* ICL */ #define INTEL_SUBPLATFORM_PORTF (0) +/* TGL */ +#define INTEL_SUBPLATFORM_UY (0) + /* DG2 */ #define INTEL_SUBPLATFORM_G10 0 #define INTEL_SUBPLATFORM_G11 1 diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index ac1a796b2808..4fd69ecd1481 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -165,7 +165,7 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_ROCKETLAKE(i915)) { revids = rkl_revids; size = ARRAY_SIZE(rkl_revids); - } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { + } else if (IS_TGL_UY(i915)) { revids = tgl_uy_revids; size = ARRAY_SIZE(tgl_uy_revids); } else if (IS_TIGERLAKE(i915)) { -- cgit From bbd57d16d5ddeb9d8995a6ded81a6879be6af928 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 18 Feb 2022 13:03:30 -0800 Subject: drm/i915/rps/tgl+: Remove RPS interrupt support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TGL+ and newer platforms don't support RPS up and low interruption limits. It is not used for broadwell and newer plaforms that supports execlist but here making sure that it is explicit not used even in debug scenarios. BSpec: 33301 BSpec: 52069 BSpec: 9520 HSD: 1405911647 Cc: Vinay Belgaumkar Signed-off-by: José Roberto de Souza Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20220218210330.48653-1-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 54e7df788dbf..ec06cc0c70c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1484,7 +1484,7 @@ void intel_rps_enable(struct intel_rps *rps) if (has_busy_stats(rps)) intel_rps_set_timer(rps); - else if (GRAPHICS_VER(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) intel_rps_set_interrupts(rps); else /* Ironlake currently uses intel_ips.ko */ {} -- cgit From b8986c889e7ac26c57cb548f8f344456fa925a2f Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 23 Feb 2022 11:49:46 -0800 Subject: drm/i915: Check stolen memory size before calling drm_mm_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add check for zero usable stolen memory before calling drm_mm_init to support configurations where stolen memory exists but is fully reserved. Also skip memory test in cases that usable stolen is smaller than page size(amount mapped and used to test memory). v2: - skiping test if available memory is smaller than page size (Lucas) Cc: Ville Syrjälä Cc: Daniele Ceraolo Spurio Cc: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Steve Carbonari Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220223194946.725328-1-jose.souza@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 9 ++++++--- drivers/gpu/drm/i915/intel_memory_region.c | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 26975d857776..c1c45bdf0341 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -495,13 +495,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; + i915->stolen_usable_size = + resource_size(&i915->dsm) - reserved_total; + drm_dbg(&i915->drm, "Memory reserved for graphics device: %lluK, usable: %lluK\n", (u64)resource_size(&i915->dsm) >> 10, - ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); + (u64)i915->stolen_usable_size >> 10); - i915->stolen_usable_size = - resource_size(&i915->dsm) - reserved_total; + if (i915->stolen_usable_size == 0) + return 0; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index c70d7e286a51..16ab62d605b1 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -97,10 +97,14 @@ static int iomemtest(struct intel_memory_region *mem, bool test_all, const void *caller) { - resource_size_t last = resource_size(&mem->region) - PAGE_SIZE; - resource_size_t page; + resource_size_t last, page; int err; + if (resource_size(&mem->region) < PAGE_SIZE) + return 0; + + last = resource_size(&mem->region) - PAGE_SIZE; + /* * Quick test to check read/write access to the iomap (backing store). * -- cgit From 8fbf28934acfdac08073a2d5697c7cacae8d3997 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 21 Feb 2022 12:11:03 +0000 Subject: drm/i915/ttm: fixup the mock_bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running the mock selftests we currently blow up with: <6> [299.836278] i915: Running i915_gem_huge_page_mock_selftests/igt_mock_memory_region_huge_pages <1> [299.836356] BUG: kernel NULL pointer dereference, address: 00000000000000c8 <1> [299.836361] #PF: supervisor read access in kernel mode <1> [299.836364] #PF: error_code(0x0000) - not-present page <6> [299.836367] PGD 0 P4D 0 <4> [299.836369] Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [299.836372] CPU: 1 PID: 1429 Comm: i915_selftest Tainted: G U 5.17.0-rc4-CI-CI_DRM_11227+ #1 <4> [299.836376] Hardware name: Intel(R) Client Systems NUC11TNHi5/NUC11TNBi5, BIOS TNTGL357.0042.2020.1221.1743 12/21/2020 <4> [299.836380] RIP: 0010:ttm_resource_init+0x57/0x90 [ttm] <4> [299.836392] RSP: 0018:ffffc90001e4f680 EFLAGS: 00010203 <4> [299.836395] RAX: 0000000000000000 RBX: ffffc90001e4f708 RCX: 0000000000000000 <4> [299.836398] RDX: ffff888116172528 RSI: ffffc90001e4f6f8 RDI: 0000000000000000 <4> [299.836401] RBP: ffffc90001e4f6f8 R08: 00000000000001b0 R09: ffff888116172528 <4> [299.836403] R10: 0000000000000001 R11: 00000000a4cb2e51 R12: ffffc90001e4fa90 <4> [299.836406] R13: ffff888116172528 R14: ffff888130d7f4b0 R15: ffff888130d7f400 <4> [299.836409] FS: 00007ff241684500(0000) GS:ffff88849fe80000(0000) knlGS:0000000000000000 <4> [299.836412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [299.836416] CR2: 00000000000000c8 CR3: 0000000107b80001 CR4: 0000000000770ee0 <4> [299.836418] PKRU: 55555554 <4> [299.836420] Call Trace: <4> [299.836422] <4> [299.836423] i915_ttm_buddy_man_alloc+0x68/0x240 [i915] ttm_resource_init() now needs to access the bo->bdev, and also wants to store the bo reference. Try to keep both working. The mock_bo is a hack so we can interface directly with the ttm managers alloc() and free() hooks for our mock testing, without invoking other TTM features like eviction, moves, etc. v2: make sure we only touch res->bo if the alloc() returns successfully Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5123 Fixes: 0e05fc49c358 ("drm/ttm: add common accounting to the resource mgr v3") Signed-off-by: Matthew Auld Cc: Christian König Cc: Thomas Hellström Acked-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220221121103.2473831-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index f2b888c16958..7dea07c579aa 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -200,11 +200,14 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, int ret; mock_bo.base.size = size; + mock_bo.bdev = &mem->i915->bdev; place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) ret = -ENXIO; + if (!ret) + res->bo = NULL; /* Rather blow up, then some uaf */ return ret ? ERR_PTR(ret) : res; } @@ -219,6 +222,11 @@ void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res) { struct ttm_resource_manager *man = mem->region_private; + struct ttm_buffer_object mock_bo = {}; + + mock_bo.base.size = res->num_pages << PAGE_SHIFT; + mock_bo.bdev = &mem->i915->bdev; + res->bo = &mock_bo; man->func->free(man, res); } -- cgit From 1be6b46f731392267eeebef9d59600ff9999a987 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 10 Feb 2022 21:23:33 -0800 Subject: drm/i915/dg2: add Wa_14014947963 BSPEC: 46123 v2: Address review feedback [MattR] v3: move register definition to gt_regs [MattR] Cc: Matt Roper Signed-off-by: Clint Taylor Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220211052333.12306-1-clinton.a.taylor@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 18d158d77aba..d752db5669dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -465,6 +465,9 @@ #define GEN9_PGCTL_SSB_EU210_ACK (1 << 12) #define GEN9_PGCTL_SSB_EU311_ACK (1 << 14) +#define VF_PREEMPTION _MMIO(0x83a4) +#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) + #define GEN8_RC6_CTX_INFO _MMIO(0x8504) #define GEN12_SQCM _MMIO(0x8724) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b3067aed7f3e..0471d475e680 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -683,6 +683,11 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16013271637:dg2 */ wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_14014947963:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, -- cgit From 9659dd2b308bde4143855f5b57b0412be466eb8a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:34 -0800 Subject: drm/i915/gt: Add helper for shmem copy to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a variant of shmem_read() that takes a iosys_map pointer rather than a plain pointer as argument. It's mostly a copy __shmem_rw() but adapting the api and removing the write support since there's currently only need to use iosys_map as destination. Reworking __shmem_rw() to share the implementation was tempting, but finding a good balance between reuse and clarity pushed towards a little code duplication. Since the function is small, just add the similar function with a copy/paste/adapt approach. v2: Add an offset as argument and instead of using a map iterator, use the offset to keep track of where we are writing data to. Cc: Matt Roper Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/shmem_utils.c | 32 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/shmem_utils.h | 3 +++ 2 files changed, 35 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 0683b27a3890..402f085f3a02 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -3,6 +3,7 @@ * Copyright © 2020 Intel Corporation */ +#include #include #include #include @@ -123,6 +124,37 @@ static int __shmem_rw(struct file *file, loff_t off, return 0; } +int shmem_read_to_iosys_map(struct file *file, loff_t off, + struct iosys_map *map, size_t map_off, size_t len) +{ + unsigned long pfn; + + for (pfn = off >> PAGE_SHIFT; len; pfn++) { + unsigned int this = + min_t(size_t, PAGE_SIZE - offset_in_page(off), len); + struct page *page; + void *vaddr; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (IS_ERR(page)) + return PTR_ERR(page); + + vaddr = kmap(page); + iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off), + this); + mark_page_accessed(page); + kunmap(page); + put_page(page); + + len -= this; + map_off += this; + off = 0; + } + + return 0; +} + int shmem_read(struct file *file, loff_t off, void *dst, size_t len) { return __shmem_rw(file, off, dst, len, false); diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h index c1669170c351..b2b04d88c6e5 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.h +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h @@ -8,6 +8,7 @@ #include +struct iosys_map; struct drm_i915_gem_object; struct file; @@ -17,6 +18,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj); void *shmem_pin_map(struct file *file); void shmem_unpin_map(struct file *file, void *ptr); +int shmem_read_to_iosys_map(struct file *file, loff_t off, + struct iosys_map *map, size_t map_off, size_t len); int shmem_read(struct file *file, loff_t off, void *dst, size_t len); int shmem_write(struct file *file, loff_t off, void *src, size_t len); -- cgit From 1c0b1175e6f3729c91835f179eb9c97b5067bb3a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:35 -0800 Subject: drm/i915/guc: Keep iosys_map of ads_blob around MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert intel_guc_ads_create() and initialization to use iosys_map rather than plain pointer and save it in the guc struct. This will help with additional updates to the ads_blob after the creation/initialization by abstracting the IO vs system memory. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 +++- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 9d779de16613..f857e9190750 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -6,8 +6,9 @@ #ifndef _INTEL_GUC_H_ #define _INTEL_GUC_H_ -#include #include +#include +#include #include "intel_uncore.h" #include "intel_guc_fw.h" @@ -148,6 +149,7 @@ struct intel_guc { struct i915_vma *ads_vma; /** @ads_blob: contents of the GuC ADS */ struct __guc_ads_blob *ads_blob; + struct iosys_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7e41175618f5..4ea842752bbc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -666,6 +666,11 @@ int intel_guc_ads_create(struct intel_guc *guc) if (ret) return ret; + if (i915_gem_object_is_lmem(guc->ads_vma->obj)) + iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)guc->ads_blob); + else + iosys_map_set_vaddr(&guc->ads_map, guc->ads_blob); + __guc_ads_init(guc); return 0; @@ -687,6 +692,7 @@ void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); guc->ads_blob = NULL; + iosys_map_clear(&guc->ads_map); kfree(guc->ads_regset); } -- cgit From 91a33f7e20700d58941b38bcf8eca0d0b281ddb0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:36 -0800 Subject: drm/i915/guc: Add read/write helpers for ADS blob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers on top of iosys_map_read_field() / iosys_map_write_field() functions so they always use the right arguments and make code easier to read. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-6-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 4ea842752bbc..b645df7d46b6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -60,6 +60,13 @@ struct __guc_ads_blob { struct guc_mmio_reg regset[0]; } __packed; +#define ads_blob_read(guc_, field_) \ + iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_) + +#define ads_blob_write(guc_, field_, val_) \ + iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, \ + field_, val_) + static u32 guc_ads_regset_size(struct intel_guc *guc) { GEM_BUG_ON(!guc->ads_regset_size); -- cgit From 219aada263f909d61443a8d1196592797c6e0281 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:37 -0800 Subject: drm/i915/guc: Convert golden context init to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now the map is saved during creation, so use it to initialize the golden context, reading from shmem and writing to either system or IO memory. v2: Do not use a map iterator: add an offset to keep track of destination Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-7-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index b645df7d46b6..5c52bee516cf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -512,18 +512,16 @@ static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_ static void guc_init_golden_context(struct intel_guc *guc) { - struct __guc_ads_blob *blob = guc->ads_blob; struct intel_engine_cs *engine; struct intel_gt *gt = guc_to_gt(guc); - u32 addr_ggtt, offset; - u32 total_size = 0, alloc_size, real_size; + unsigned long offset; + u32 addr_ggtt, total_size = 0, alloc_size, real_size; u8 engine_class, guc_class; - u8 *ptr; if (!intel_uc_uses_guc_submission(>->uc)) return; - GEM_BUG_ON(!blob); + GEM_BUG_ON(iosys_map_is_null(&guc->ads_map)); /* * Go back and fill in the golden context data now that it is @@ -531,15 +529,13 @@ static void guc_init_golden_context(struct intel_guc *guc) */ offset = guc_ads_golden_ctxt_offset(guc); addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; - ptr = ((u8 *)blob) + offset; for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { if (engine_class == OTHER_CLASS) continue; guc_class = engine_class_to_guc_class(engine_class); - - if (!blob->system_info.engine_enabled_masks[guc_class]) + if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class])) continue; real_size = intel_engine_context_size(gt, engine_class); @@ -550,18 +546,20 @@ static void guc_init_golden_context(struct intel_guc *guc) if (!engine) { drm_err(>->i915->drm, "No engine state recorded for class %d!\n", engine_class); - blob->ads.eng_state_size[guc_class] = 0; - blob->ads.golden_context_lrca[guc_class] = 0; + ads_blob_write(guc, ads.eng_state_size[guc_class], 0); + ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0); continue; } - GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != + GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) != real_size - LRC_SKIP_SIZE); - GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); + GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt); + addr_ggtt += alloc_size; - shmem_read(engine->default_state, 0, ptr, real_size); - ptr += alloc_size; + shmem_read_to_iosys_map(engine->default_state, 0, &guc->ads_map, + offset, real_size); + offset += alloc_size; } GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); -- cgit From 2dce68fa325ecf5be7007b9e8b13174262a185cf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:38 -0800 Subject: drm/i915/guc: Convert policies update to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use iosys_map to write the policies update so access to IO and system memory is abstracted away. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-8-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 41 +++++++++++++++++------------- 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 5c52bee516cf..7906a4df5a53 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -130,33 +130,37 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) guc_ads_private_data_size(guc); } -static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies) +static void guc_policies_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; + u32 global_flags = 0; - policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; + ads_blob_write(guc, policies.dpc_promote_time, + GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); + ads_blob_write(guc, policies.max_num_work_items, + GLOBAL_POLICY_MAX_NUM_WI); - policies->global_flags = 0; if (i915->params.reset < 2) - policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; - policies->is_valid = 1; + ads_blob_write(guc, policies.global_flags, global_flags); + ads_blob_write(guc, policies.is_valid, 1); } void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *dp) { - struct __guc_ads_blob *blob = guc->ads_blob; - - if (unlikely(!blob)) + if (unlikely(iosys_map_is_null(&guc->ads_map))) return; drm_printf(dp, "Global scheduling policies:\n"); - drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time); - drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items); - drm_printf(dp, " Flags = %u\n", blob->policies.global_flags); + drm_printf(dp, " DPC promote time = %u\n", + ads_blob_read(guc, policies.dpc_promote_time)); + drm_printf(dp, " Max num work items = %u\n", + ads_blob_read(guc, policies.max_num_work_items)); + drm_printf(dp, " Flags = %u\n", + ads_blob_read(guc, policies.global_flags)); } static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) @@ -171,23 +175,24 @@ static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) int intel_guc_global_policies_update(struct intel_guc *guc) { - struct __guc_ads_blob *blob = guc->ads_blob; struct intel_gt *gt = guc_to_gt(guc); + u32 scheduler_policies; intel_wakeref_t wakeref; int ret; - if (!blob) + if (iosys_map_is_null(&guc->ads_map)) return -EOPNOTSUPP; - GEM_BUG_ON(!blob->ads.scheduler_policies); + scheduler_policies = ads_blob_read(guc, ads.scheduler_policies); + GEM_BUG_ON(!scheduler_policies); - guc_policies_init(guc, &blob->policies); + guc_policies_init(guc); if (!intel_guc_is_ready(guc)) return 0; with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - ret = guc_action_policies_update(guc, blob->ads.scheduler_policies); + ret = guc_action_policies_update(guc, scheduler_policies); return ret; } @@ -593,7 +598,7 @@ static void __guc_ads_init(struct intel_guc *guc) u32 base; /* GuC scheduling policies */ - guc_policies_init(guc, &blob->policies); + guc_policies_init(guc); /* System info */ fill_engine_enable_masks(gt, &blob->system_info); -- cgit From 4801b99588a2e022da50e43ec5f768707de3f862 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:39 -0800 Subject: drm/i915/guc: Convert engine record to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use iosys_map to read fields from the dma_blob so access to IO and system memory is abstracted away. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-9-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 14 ++++++-------- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 3 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 17 ++++++++++------- 3 files changed, 18 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7906a4df5a53..c61648ef3920 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -738,18 +738,16 @@ void intel_guc_ads_reset(struct intel_guc *guc) u32 intel_guc_engine_usage_offset(struct intel_guc *guc) { - struct __guc_ads_blob *blob = guc->ads_blob; - u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma); - u32 offset = base + ptr_offset(blob, engine_usage); - - return offset; + return intel_guc_ggtt_offset(guc, guc->ads_vma) + + offsetof(struct __guc_ads_blob, engine_usage); } -struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine) +struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine) { struct intel_guc *guc = &engine->gt->uc.guc; - struct __guc_ads_blob *blob = guc->ads_blob; u8 guc_class = engine_class_to_guc_class(engine->class); + size_t offset = offsetof(struct __guc_ads_blob, + engine_usage.engines[guc_class][ilog2(engine->logical_mask)]); - return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)]; + return IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offset); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index e74c110facff..1c64f4d6ea21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -7,6 +7,7 @@ #define _INTEL_GUC_ADS_H_ #include +#include struct intel_guc; struct drm_printer; @@ -18,7 +19,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *p); -struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine); +struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine); u32 intel_guc_engine_usage_offset(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b3a429a92c0d..ab3cea352fb3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1139,6 +1139,9 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) *prev_start = ((u64)gt_stamp_hi << 32) | new_start; } +#define record_read(map_, field_) \ + iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) + /* * GuC updates shared memory and KMD reads it. Since this is not synchronized, * we run into a race where the value read is inconsistent. Sometimes the @@ -1153,17 +1156,17 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) { - struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); int i = 0; do { - *last_in = READ_ONCE(rec->last_switch_in_stamp); - *id = READ_ONCE(rec->current_context_index); - *total = READ_ONCE(rec->total_runtime); + *last_in = record_read(&rec_map, last_switch_in_stamp); + *id = record_read(&rec_map, current_context_index); + *total = record_read(&rec_map, total_runtime); - if (READ_ONCE(rec->last_switch_in_stamp) == *last_in && - READ_ONCE(rec->current_context_index) == *id && - READ_ONCE(rec->total_runtime) == *total) + if (record_read(&rec_map, last_switch_in_stamp) == *last_in && + record_read(&rec_map, current_context_index) == *id && + record_read(&rec_map, total_runtime) == *total) break; } while (++i < 6); } -- cgit From 98529e950d4cd25c05643f9811e2387df58bc6be Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:40 -0800 Subject: drm/i915/guc: Convert guc_ads_private_data_reset to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use iosys_map_memset() to zero the private data as ADS may be either on system or IO memory. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-10-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index c61648ef3920..d924486490c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -714,8 +714,8 @@ static void guc_ads_private_data_reset(struct intel_guc *guc) if (!size) return; - memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0, - size); + iosys_map_memset(&guc->ads_map, guc_ads_private_data_offset(guc), + 0, size); } /** -- cgit From d9a5696e7d52edf68776599f2a38b9aee1382be9 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:41 -0800 Subject: drm/i915/guc: Convert golden context prep to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the saved ads_map to prepare the golden context. One difference from the init context is that this function can be called before there is a gem object (and thus the guc->ads_map) to calculare the size of the golden context that should be allocated for that object. So in this case the function needs to be prepared for not having the system_info with enabled engines filled out. To accomplish that an info_map is prepared on the side to point either to the gem object or the local variable on the stack. This allows making fill_engine_enable_masks() operate always with a iosys_map argument. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-11-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 52 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index d924486490c1..0077a63832ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -67,6 +67,12 @@ struct __guc_ads_blob { iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, \ field_, val_) +#define info_map_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_gt_system_info, field_, val_) + +#define info_map_read(map_, field_) \ + iosys_map_rd_field(map_, 0, struct guc_gt_system_info, field_) + static u32 guc_ads_regset_size(struct intel_guc *guc) { GEM_BUG_ON(!guc->ads_regset_size); @@ -417,24 +423,24 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc, } static void fill_engine_enable_masks(struct intel_gt *gt, - struct guc_gt_system_info *info) + struct iosys_map *info_map) { - info->engine_enabled_masks[GUC_RENDER_CLASS] = 1; - info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1; - info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); - info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); + info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); + info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt)); } #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) #define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) -static int guc_prep_golden_context(struct intel_guc *guc, - struct __guc_ads_blob *blob) +static int guc_prep_golden_context(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); u32 addr_ggtt, offset; u32 total_size = 0, alloc_size, real_size; u8 engine_class, guc_class; - struct guc_gt_system_info *info, local_info; + struct guc_gt_system_info local_info; + struct iosys_map info_map; /* * Reserve the memory for the golden contexts and point GuC at it but @@ -448,14 +454,15 @@ static int guc_prep_golden_context(struct intel_guc *guc, * GuC will also validate that the LRC base + size fall within the * allowed GGTT range. */ - if (blob) { + if (!iosys_map_is_null(&guc->ads_map)) { offset = guc_ads_golden_ctxt_offset(guc); addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; - info = &blob->system_info; + info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map, + offsetof(struct __guc_ads_blob, system_info)); } else { memset(&local_info, 0, sizeof(local_info)); - info = &local_info; - fill_engine_enable_masks(gt, info); + iosys_map_set_vaddr(&info_map, &local_info); + fill_engine_enable_masks(gt, &info_map); } for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { @@ -464,14 +471,14 @@ static int guc_prep_golden_context(struct intel_guc *guc, guc_class = engine_class_to_guc_class(engine_class); - if (!info->engine_enabled_masks[guc_class]) + if (!info_map_read(&info_map, engine_enabled_masks[guc_class])) continue; real_size = intel_engine_context_size(gt, engine_class); alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; - if (!blob) + if (iosys_map_is_null(&guc->ads_map)) continue; /* @@ -485,12 +492,15 @@ static int guc_prep_golden_context(struct intel_guc *guc, * what comes before it in the context image (which is identical * on all engines). */ - blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; - blob->ads.golden_context_lrca[guc_class] = addr_ggtt; + ads_blob_write(guc, ads.eng_state_size[guc_class], + real_size - LRC_SKIP_SIZE); + ads_blob_write(guc, ads.golden_context_lrca[guc_class], + addr_ggtt); + addr_ggtt += alloc_size; } - if (!blob) + if (iosys_map_is_null(&guc->ads_map)) return total_size; GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); @@ -595,13 +605,15 @@ static void __guc_ads_init(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; struct __guc_ads_blob *blob = guc->ads_blob; + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map, + offsetof(struct __guc_ads_blob, system_info)); u32 base; /* GuC scheduling policies */ guc_policies_init(guc); /* System info */ - fill_engine_enable_masks(gt, &blob->system_info); + fill_engine_enable_masks(gt, &info_map); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = hweight8(gt->info.sseu.slice_mask); @@ -617,7 +629,7 @@ static void __guc_ads_init(struct intel_guc *guc) } /* Golden contexts for re-initialising after a watchdog reset */ - guc_prep_golden_context(guc, blob); + guc_prep_golden_context(guc); guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); @@ -663,7 +675,7 @@ int intel_guc_ads_create(struct intel_guc *guc) guc->ads_regset_size = ret; /* Likewise the golden contexts: */ - ret = guc_prep_golden_context(guc, NULL); + ret = guc_prep_golden_context(guc); if (ret < 0) return ret; guc->ads_golden_ctxt_size = ret; -- cgit From 58fb284c52b6262b9bd43420aa39124c39e2342a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:42 -0800 Subject: drm/i915/guc: Replace check for golden context size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the other places in this function, guc->ads_map is being protected from access when it's not yet set. However the last check is actually about guc->ads_golden_ctxt_size been set before. These checks should always match as the size is initialized on the first call to guc_prep_golden_context(), but it's clearer if we have a single return and check for guc->ads_golden_ctxt_size. This is just a readability improvement, no change in behavior. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-12-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 0077a63832ad..b739781bd133 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -500,10 +500,10 @@ static int guc_prep_golden_context(struct intel_guc *guc) addr_ggtt += alloc_size; } - if (iosys_map_is_null(&guc->ads_map)) - return total_size; + /* Make sure current size matches what we calculated previously */ + if (guc->ads_golden_ctxt_size) + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); - GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); return total_size; } -- cgit From c723b8ee3864bdf41cc560da958b7ef7f6138f0b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:43 -0800 Subject: drm/i915/guc: Convert mapping table to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use iosys_map to write the fields system_info.mapping_table[][]. Since we already have the info_map around where needed, just use it instead of going through guc->ads_map. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-13-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index b739781bd133..c3c31b679e79 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -204,7 +204,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc) } static void guc_mapping_table_init(struct intel_gt *gt, - struct guc_gt_system_info *system_info) + struct iosys_map *info_map) { unsigned int i, j; struct intel_engine_cs *engine; @@ -213,14 +213,14 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* Table must be set to invalid values for entries not used */ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) - system_info->mapping_table[i][j] = - GUC_MAX_INSTANCES_PER_CLASS; + info_map_write(info_map, mapping_table[i][j], + GUC_MAX_INSTANCES_PER_CLASS); for_each_engine(engine, gt, id) { u8 guc_class = engine_class_to_guc_class(engine->class); - system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] = - engine->instance; + info_map_write(info_map, mapping_table[guc_class][ilog2(engine->logical_mask)], + engine->instance); } } @@ -631,7 +631,7 @@ static void __guc_ads_init(struct intel_guc *guc) /* Golden contexts for re-initialising after a watchdog reset */ guc_prep_golden_context(guc); - guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); + guc_mapping_table_init(guc_to_gt(guc), &info_map); base = intel_guc_ggtt_offset(guc, guc->ads_vma); -- cgit From f3d45c9d556bf6174258507e0e10519ab7f5679f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:44 -0800 Subject: drm/i915/guc: Convert capture list to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use iosys_map to write the fields ads.capture_*. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-14-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index c3c31b679e79..ec0ccdf98dfa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -580,7 +580,7 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } -static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob) +static void guc_capture_list_init(struct intel_guc *guc) { int i, j; u32 addr_ggtt, offset; @@ -592,11 +592,11 @@ static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob * for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { - blob->ads.capture_instance[i][j] = addr_ggtt; - blob->ads.capture_class[i][j] = addr_ggtt; + ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt); + ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt); } - blob->ads.capture_global[i] = addr_ggtt; + ads_blob_write(guc, ads.capture_global[i], addr_ggtt); } } @@ -636,7 +636,7 @@ static void __guc_ads_init(struct intel_guc *guc) base = intel_guc_ggtt_offset(guc, guc->ads_vma); /* Capture list for hang debug */ - guc_capture_list_init(guc, blob); + guc_capture_list_init(guc); /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); -- cgit From 5fc83950572a65d33ccd661cd9d8be7ab885d446 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:45 -0800 Subject: drm/i915/guc: Convert guc_mmio_reg_state_init to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the regset list is prepared, convert guc_mmio_reg_state_init() to use iosys_map to copy the array to the final location and initialize additional fields in ads.reg_state_list. v2: Just use an offset instead of temporary iosys_map. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-15-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ec0ccdf98dfa..90cbb93a2945 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -383,40 +383,44 @@ fail_regset_init: return ret; } -static void guc_mmio_reg_state_init(struct intel_guc *guc, - struct __guc_ads_blob *blob) +static void guc_mmio_reg_state_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; - struct guc_mmio_reg *ads_registers; enum intel_engine_id id; u32 addr_ggtt, offset; offset = guc_ads_regset_offset(guc); addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; - ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); - memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size); + iosys_map_memcpy_to(&guc->ads_map, offset, guc->ads_regset, + guc->ads_regset_size); for_each_engine(engine, gt, id) { u32 count = guc->ads_regset_count[id]; - struct guc_mmio_reg_set *ads_reg_set; u8 guc_class; /* Class index is checked in class converter */ GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); guc_class = engine_class_to_guc_class(engine->class); - ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; if (!count) { - ads_reg_set->address = 0; - ads_reg_set->count = 0; + ads_blob_write(guc, + ads.reg_state_list[guc_class][engine->instance].address, + 0); + ads_blob_write(guc, + ads.reg_state_list[guc_class][engine->instance].count, + 0); continue; } - ads_reg_set->address = addr_ggtt; - ads_reg_set->count = count; + ads_blob_write(guc, + ads.reg_state_list[guc_class][engine->instance].address, + addr_ggtt); + ads_blob_write(guc, + ads.reg_state_list[guc_class][engine->instance].count, + count); addr_ggtt += count * sizeof(struct guc_mmio_reg); } @@ -643,7 +647,7 @@ static void __guc_ads_init(struct intel_guc *guc) blob->ads.gt_system_info = base + ptr_offset(blob, system_info); /* MMIO save/restore list */ - guc_mmio_reg_state_init(guc, blob); + guc_mmio_reg_state_init(guc); /* Private Data */ blob->ads.private_data = base + guc_ads_private_data_offset(guc); -- cgit From 691ebb1109c97da2943e2d753add9ad5fc63200c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:46 -0800 Subject: drm/i915/guc: Convert __guc_ads_init to iosys_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all the called functions from __guc_ads_init() are converted to use ads_map, stop using ads_blob in __guc_ads_init(). Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-16-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 90cbb93a2945..d0593063c0dc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -608,7 +608,6 @@ static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; - struct __guc_ads_blob *blob = guc->ads_blob; struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offsetof(struct __guc_ads_blob, system_info)); u32 base; @@ -619,17 +618,18 @@ static void __guc_ads_init(struct intel_guc *guc) /* System info */ fill_engine_enable_masks(gt, &info_map); - blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = - hweight8(gt->info.sseu.slice_mask); - blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] = - gt->info.vdbox_sfc_access; + ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED], + hweight8(gt->info.sseu.slice_mask)); + ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK], + gt->info.vdbox_sfc_access); if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) { u32 distdbreg = intel_uncore_read(gt->uncore, GEN12_DIST_DBS_POPULATED); - blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] = - ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) & - GEN12_DOORBELLS_PER_SQIDI) + 1; + ads_blob_write(guc, + system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) + & GEN12_DOORBELLS_PER_SQIDI) + 1); } /* Golden contexts for re-initialising after a watchdog reset */ @@ -643,14 +643,17 @@ static void __guc_ads_init(struct intel_guc *guc) guc_capture_list_init(guc); /* ADS */ - blob->ads.scheduler_policies = base + ptr_offset(blob, policies); - blob->ads.gt_system_info = base + ptr_offset(blob, system_info); + ads_blob_write(guc, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(guc, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); /* MMIO save/restore list */ guc_mmio_reg_state_init(guc); /* Private Data */ - blob->ads.private_data = base + guc_ads_private_data_offset(guc); + ads_blob_write(guc, ads.private_data, base + + guc_ads_private_data_offset(guc)); i915_gem_object_flush_map(guc->ads_vma->obj); } -- cgit From 0df0c76cc3fd99d72bc1b18eae25ee3e8fb9d1f7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:47 -0800 Subject: drm/i915/guc: Remove plain ads_blob pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we have the access to content of GuC ADS either using iosys_map API or using a temporary buffer. Remove guc->ads_blob as there shouldn't be updates using the bare pointer anymore. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-17-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 3 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f857e9190750..bf7079480d47 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -147,8 +147,7 @@ struct intel_guc { /** @ads_vma: object allocated to hold the GuC ADS */ struct i915_vma *ads_vma; - /** @ads_blob: contents of the GuC ADS */ - struct __guc_ads_blob *ads_blob; + /** @ads_map: contents of the GuC ADS */ struct iosys_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index d0593063c0dc..847e00390b00 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -667,6 +667,7 @@ static void __guc_ads_init(struct intel_guc *guc) */ int intel_guc_ads_create(struct intel_guc *guc) { + void *ads_blob; u32 size; int ret; @@ -691,14 +692,14 @@ int intel_guc_ads_create(struct intel_guc *guc) size = guc_ads_blob_size(guc); ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, - (void **)&guc->ads_blob); + &ads_blob); if (ret) return ret; if (i915_gem_object_is_lmem(guc->ads_vma->obj)) - iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)guc->ads_blob); + iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)ads_blob); else - iosys_map_set_vaddr(&guc->ads_map, guc->ads_blob); + iosys_map_set_vaddr(&guc->ads_map, ads_blob); __guc_ads_init(guc); @@ -720,7 +721,6 @@ void intel_guc_ads_init_late(struct intel_guc *guc) void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); - guc->ads_blob = NULL; iosys_map_clear(&guc->ads_map); kfree(guc->ads_regset); } -- cgit From d2cc01e1794bd13199f7568298614f1bdcea1683 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 10:34:43 +0000 Subject: drm/i915: apply PM_EARLY for non-GTT mappable objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DG2 we allow objects that are smaller than the min_page_size, under the premise that these are never mapped by the GTT, like with the paging structures. Currently the suspend-resume path will try to map such objects through the migration vm, which hits: [ 560.529217] kernel BUG at drivers/gpu/drm/i915/gt/intel_migrate.c:431! [ 560.536081] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 560.541629] CPU: 4 PID: 2062 Comm: rtcwake Tainted: G W 5.17.0-rc5-demarchi+ #175 [ 560.550716] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWR1.R00.X220.B00.2103302221 03/30/2021 [ 560.563627] RIP: 0010:emit_pte+0x2e7/0x380 [i915] [ 560.568665] Code: ee 02 48 89 69 04 83 c6 05 83 c0 05 39 f0 0f 4f c6 48 8b 73 08 39 d0 0f 4f c2 44 89 f2 4c 8d 4a ff 49 85 f1 0f 84 62 fe ff ff <0f> 0b 48 c7 03 00 00 00 00 4d 89 c6 8b 01 48 29 ce 48 8d 57 0c 48 [ 560.587691] RSP: 0018:ffffc9000104f8a0 EFLAGS: 00010206 [ 560.592906] RAX: 0000000000000040 RBX: ffffc9000104f908 RCX: ffffc900025114d0 [ 560.600024] RDX: 0000000000010000 RSI: 00000003f9fe2000 RDI: ffffc900025114dc [ 560.607458] RBP: 0000000001840000 R08: ffff88810f335540 R09: 000000000000ffff [ 560.614865] R10: 000000000000081b R11: 0000000000000001 R12: 000000000000081b [ 560.622300] R13: 0000000000000000 R14: 0000000000010000 R15: ffff888107c3e240 [ 560.629716] FS: 00007f5b7c086580(0000) GS:ffff88846dc00000(0000) knlGS:0000000000000000 [ 560.638090] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 560.644132] CR2: 00007f3ab0a133a8 CR3: 000000010a43e003 CR4: 00000000003706e0 [ 560.651590] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 560.659002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 560.666438] Call Trace: [ 560.668885] [ 560.670983] intel_context_migrate_copy+0x1b1/0x4c0 [i915] [ 560.676794] __i915_ttm_move+0x628/0x790 [i915] [ 560.681704] ? dma_resv_iter_next+0x8f/0xb0 [ 560.686223] ? dma_resv_iter_first+0xe5/0x140 [ 560.690894] ? i915_deps_add_resv+0x4b/0x110 [i915] [ 560.696147] ? dma_resv_reserve_shared+0x161/0x310 [ 560.701228] i915_gem_obj_copy_ttm+0x10f/0x220 [i915] [ 560.706650] i915_ttm_backup+0x191/0x2f0 [i915] [ 560.711558] i915_gem_process_region+0x266/0x3b0 [i915] [ 560.717153] ? verify_cpu+0xf0/0x100 [ 560.721040] ? pci_pm_resume_early+0x20/0x20 [ 560.725603] i915_ttm_backup_region+0x47/0x70 [i915] [ 560.730927] i915_gem_backup_suspend+0x141/0x170 [i91 For now let's just force the memcpy path for such objects during suspend-resume. Fixes: 00e27ad85bc9 ("drm/i915/migrate: add acceleration support for DG2") Reported-by: Lucas De Marchi Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220225103443.225228-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_region.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index a4350227e9ae..6d65c5849874 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -67,6 +67,17 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); + /* + * Anything smaller than the min_page_size can't be freely inserted into + * the GTT, due to alignemnt restrictions. For such special objects, + * make sure we force memcpy based suspend-resume. In the future we can + * revisit this, either by allowing special mis-aligned objects in the + * migration path, or by mapping all of LMEM upfront using cheap 1G + * GTT entries. + */ + if (default_page_size < mem->min_page_size) + flags |= I915_BO_ALLOC_PM_EARLY; + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; -- cgit From 235582ca96214b50fa03ea77a5e85e59d94cf358 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:54:56 +0000 Subject: drm/i915: add io_size plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With small LMEM-BAR we need to be able to differentiate between the total size of LMEM, and how much of it is CPU mappable. The end goal is to be able to utilize the entire range, even if part of is it not CPU accessible. v2: also update intelfb_create Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 +++++--- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 5 ++++- drivers/gpu/drm/i915/intel_memory_region.c | 6 ++++-- drivers/gpu/drm/i915/intel_memory_region.h | 2 ++ drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 ++++---- drivers/gpu/drm/i915/selftests/mock_region.c | 6 ++++-- drivers/gpu/drm/i915/selftests/mock_region.h | 3 ++- 11 files changed, 29 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index fd5bc7acf08d..2cd62a187df3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -265,7 +265,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct intel_memory_region *mem = obj->mm.region; info->apertures->ranges[0].base = mem->io_start; - info->apertures->ranges[0].size = mem->total; + info->apertures->ranges[0].size = mem->io_size; /* Use fbdev's framebuffer from lmem for discrete */ info->fix.smem_start = diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4efa821f3cb1..3a1c782ed791 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -699,7 +699,7 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, { return intel_memory_region_create(i915, 0, totalram_pages() << PAGE_SHIFT, - PAGE_SIZE, 0, + PAGE_SIZE, 0, 0, type, instance, &shmem_region_ops); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 636cdf8a73b0..0bf8f61134af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -492,6 +492,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) /* Exclude the reserved region from driver use */ mem->region.end = reserved_base - 1; + mem->io_size = resource_size(&mem->region); /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ @@ -751,7 +752,7 @@ static int init_stolen_lmem(struct intel_memory_region *mem) if (!io_mapping_init_wc(&mem->iomap, mem->io_start, - resource_size(&mem->region))) + mem->io_size)) return -EIO; /* @@ -806,7 +807,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, I915_GTT_PAGE_SIZE_4K; mem = intel_memory_region_create(i915, lmem_base, lmem_size, - min_page_size, io_start, + min_page_size, + io_start, lmem_size, type, instance, &i915_region_stolen_lmem_ops); if (IS_ERR(mem)) @@ -837,7 +839,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, mem = intel_memory_region_create(i915, intel_graphics_stolen_res.start, resource_size(&intel_graphics_stolen_res), - PAGE_SIZE, 0, type, instance, + PAGE_SIZE, 0, 0, type, instance, &i915_region_stolen_smem_ops); if (IS_ERR(mem)) return mem; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 8419096d4056..53c183f13a38 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1103,7 +1103,7 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, mr = intel_memory_region_create(i915, 0, totalram_pages() << PAGE_SHIFT, - PAGE_SIZE, 0, + PAGE_SIZE, 0, 0, type, instance, &ttm_system_region_ops); if (IS_ERR(mr)) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 0528fe1fc9b3..dbbae53f820a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -500,7 +500,7 @@ static int igt_mock_memory_region_huge_pages(void *arg) int bit; int err = 0; - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); if (IS_ERR(mem)) { pr_err("%s failed to create memory region\n", __func__); return PTR_ERR(mem); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index fc00888ca4b2..6cecfdae07ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -32,7 +32,7 @@ region_lmem_init(struct intel_memory_region *mem) if (!io_mapping_init_wc(&mem->iomap, mem->io_start, - resource_size(&mem->region))) + mem->io_size)) return -EIO; ret = intel_region_ttm_init(mem); @@ -134,6 +134,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size, min_page_size, io_start, + lmem_size, INTEL_MEMORY_LOCAL, 0, &intel_region_lmem_ops); @@ -147,6 +148,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region); drm_dbg(&i915->drm, "Local memory IO start: %pa\n", &mem->io_start); + drm_info(&i915->drm, "Local memory IO size: %pa\n", + &mem->io_size); drm_info(&i915->drm, "Local memory available: %pa\n", &lmem_size); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 16ab62d605b1..1c841f68169a 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -100,10 +100,10 @@ static int iomemtest(struct intel_memory_region *mem, resource_size_t last, page; int err; - if (resource_size(&mem->region) < PAGE_SIZE) + if (mem->io_size < PAGE_SIZE) return 0; - last = resource_size(&mem->region) - PAGE_SIZE; + last = mem->io_size - PAGE_SIZE; /* * Quick test to check read/write access to the iomap (backing store). @@ -221,6 +221,7 @@ intel_memory_region_create(struct drm_i915_private *i915, resource_size_t size, resource_size_t min_page_size, resource_size_t io_start, + resource_size_t io_size, u16 type, u16 instance, const struct intel_memory_region_ops *ops) @@ -235,6 +236,7 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->i915 = i915; mem->region = (struct resource)DEFINE_RES_MEM(start, size); mem->io_start = io_start; + mem->io_size = io_size; mem->min_page_size = min_page_size; mem->ops = ops; mem->total = size; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 06464b8865fc..21dcbd620758 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -68,6 +68,7 @@ struct intel_memory_region { struct resource region; resource_size_t io_start; + resource_size_t io_size; resource_size_t min_page_size; resource_size_t total; resource_size_t avail; @@ -98,6 +99,7 @@ intel_memory_region_create(struct drm_i915_private *i915, resource_size_t size, resource_size_t min_page_size, resource_size_t io_start, + resource_size_t io_size, u16 type, u16 instance, const struct intel_memory_region_ops *ops); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 7acba1d2135e..247f65f02bbf 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -170,7 +170,7 @@ static int igt_mock_reserve(void *arg) if (!order) return 0; - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); if (IS_ERR(mem)) { pr_err("failed to create memory region\n"); err = PTR_ERR(mem); @@ -383,7 +383,7 @@ static int igt_mock_splintered_region(void *arg) */ size = (SZ_4G - 1) & PAGE_MASK; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -471,7 +471,7 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -1188,7 +1188,7 @@ int intel_memory_region_mock_selftests(void) if (!i915) return -ENOMEM; - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); if (IS_ERR(mem)) { pr_err("failed to create memory region\n"); err = PTR_ERR(mem); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 19bff8afcaaa..467eeae6d5f0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -107,7 +107,8 @@ mock_region_create(struct drm_i915_private *i915, resource_size_t start, resource_size_t size, resource_size_t min_page_size, - resource_size_t io_start) + resource_size_t io_start, + resource_size_t io_size) { int instance = ida_alloc_max(&i915->selftest.mock_region_instances, TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1, @@ -117,6 +118,7 @@ mock_region_create(struct drm_i915_private *i915, return ERR_PTR(instance); return intel_memory_region_create(i915, start, size, min_page_size, - io_start, INTEL_MEMORY_MOCK, instance, + io_start, io_size, + INTEL_MEMORY_MOCK, instance, &mock_region_ops); } diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h index 329bf74dfaca..e36c3a433551 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.h +++ b/drivers/gpu/drm/i915/selftests/mock_region.h @@ -16,6 +16,7 @@ mock_region_create(struct drm_i915_private *i915, resource_size_t start, resource_size_t size, resource_size_t min_page_size, - resource_size_t io_start); + resource_size_t io_start, + resource_size_t io_size); #endif /* !__MOCK_REGION_H */ -- cgit From 3312a4ac8a464daa66b97452ec148b69c5959bec Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:54:57 +0000 Subject: drm/i915/ttm: require mappable by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On devices with non-mappable LMEM ensure we always allocate the pages within the mappable portion. For now we assume that all LMEM buffers will require CPU access, which is also inline with pretty much all current kernel internal users. In the next patch we will introduce a new flag to override this behaviour. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++ drivers/gpu/drm/i915/intel_region_ttm.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 53c183f13a38..0363987dee97 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -130,6 +130,10 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr, if (flags & I915_BO_ALLOC_CONTIGUOUS) place->flags = TTM_PL_FLAG_CONTIGUOUS; + if (mr->io_size && mr->io_size < mr->total) { + place->fpfn = 0; + place->lpfn = mr->io_size >> PAGE_SHIFT; + } } static void diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 7dea07c579aa..5a40310d6fdd 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -199,6 +199,11 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; + if (mem->io_size && mem->io_size < mem->total) { + place.fpfn = 0; + place.lpfn = mem->io_size >> PAGE_SHIFT; + } + mock_bo.base.size = size; mock_bo.bdev = &mem->i915->bdev; place.flags = flags; -- cgit From 30b9d1b3ef374403652fc10fa36b9a5f32cc274d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:54:58 +0000 Subject: drm/i915: add I915_BO_ALLOC_GPU_ONLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user doesn't require CPU access for the buffer, then ALLOC_GPU_ONLY should be used, in order to prioritise allocating in the non-mappable portion of LMEM, on devices with small BAR. v2(Thomas): - The BO_ALLOC_TOPDOWN naming here is poor, since this is pure lies on systems that don't even have small BAR. A better name is GPU_ONLY, which is accurate regardless of the configuration. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 17 ++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_region.c | 5 +++++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 ++++++++++--- drivers/gpu/drm/i915/gt/intel_gt.c | 4 +++- drivers/gpu/drm/i915/i915_vma.c | 3 +++ drivers/gpu/drm/i915/intel_region_ttm.c | 11 ++++++++--- drivers/gpu/drm/i915/selftests/mock_region.c | 7 +------ 8 files changed, 45 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0098a32490f0..fd54eb8f4826 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -319,16 +319,23 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_PM_VOLATILE BIT(4) /* Object needs to be restored early using memcpy during resume */ #define I915_BO_ALLOC_PM_EARLY BIT(5) +/* + * Object is likely never accessed by the CPU. This will prioritise the BO to be + * allocated in the non-mappable portion of lmem. This is merely a hint, and if + * dealing with userspace objects the CPU fault handler is free to ignore this. + */ +#define I915_BO_ALLOC_GPU_ONLY BIT(6) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ - I915_BO_ALLOC_PM_EARLY) -#define I915_BO_READONLY BIT(6) -#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(8) -#define I915_BO_WAS_BOUND_BIT 9 + I915_BO_ALLOC_PM_EARLY | \ + I915_BO_ALLOC_GPU_ONLY) +#define I915_BO_READONLY BIT(7) +#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(9) +#define I915_BO_WAS_BOUND_BIT 10 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 183b861620b8..97c820eee115 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -358,6 +358,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); + if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_GPU_ONLY)) + return ERR_PTR(-EINVAL); + assert_object_held(obj); pinned = !(type & I915_MAP_OVERRIDE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 6d65c5849874..6cf94469d5a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -45,6 +45,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem, GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + if (WARN_ON_ONCE(flags & I915_BO_ALLOC_GPU_ONLY && + (flags & I915_BO_ALLOC_CPU_CLEAR || + flags & I915_BO_ALLOC_PM_EARLY))) + return ERR_PTR(-EINVAL); + if (!mem) return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 0363987dee97..a13e0204a139 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -129,10 +129,14 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr, place->mem_type = intel_region_to_ttm_type(mr); if (flags & I915_BO_ALLOC_CONTIGUOUS) - place->flags = TTM_PL_FLAG_CONTIGUOUS; + place->flags |= TTM_PL_FLAG_CONTIGUOUS; if (mr->io_size && mr->io_size < mr->total) { - place->fpfn = 0; - place->lpfn = mr->io_size >> PAGE_SHIFT; + if (flags & I915_BO_ALLOC_GPU_ONLY) { + place->flags |= TTM_PL_FLAG_TOPDOWN; + } else { + place->fpfn = 0; + place->lpfn = mr->io_size >> PAGE_SHIFT; + } } } @@ -890,6 +894,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) if (!obj) return VM_FAULT_SIGBUS; + if (obj->flags & I915_BO_ALLOC_GPU_ONLY) + return -EINVAL; + /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ee46f933d070..8a2483ccbfb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -456,7 +456,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) struct i915_vma *vma; int ret; - obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE); + obj = i915_gem_object_create_lmem(i915, size, + I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_GPU_ONLY); if (IS_ERR(obj)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3558b16a929c..dc28e6e3efef 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -540,6 +540,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void __iomem *ptr; int err; + if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY)) + return IO_ERR_PTR(-EINVAL); + if (!i915_gem_object_is_lmem(vma->obj)) { if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 5a40310d6fdd..6cfe9090ede7 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -199,14 +199,19 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (mem->io_size && mem->io_size < mem->total) { - place.fpfn = 0; - place.lpfn = mem->io_size >> PAGE_SHIFT; + if (flags & I915_BO_ALLOC_GPU_ONLY) { + place.flags |= TTM_PL_FLAG_TOPDOWN; + } else { + place.fpfn = 0; + place.lpfn = mem->io_size >> PAGE_SHIFT; + } } mock_bo.base.size = size; mock_bo.bdev = &mem->i915->bdev; - place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 467eeae6d5f0..f64325491f35 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -22,17 +22,12 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, static int mock_region_get_pages(struct drm_i915_gem_object *obj) { - unsigned int flags; struct sg_table *pages; int err; - flags = 0; - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= TTM_PL_FLAG_CONTIGUOUS; - obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, obj->base.size, - flags); + obj->flags); if (IS_ERR(obj->mm.res)) return PTR_ERR(obj->mm.res); -- cgit From 26ffcbbef712f6fb52f16e6f7d5cde736b80d8c4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:54:59 +0000 Subject: drm/i915/buddy: track available visible size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the total amount of available visible memory, and also track per-resource the amount of used visible memory. For now this is useful for our debug output, and deciding if it is even worth calling into the buddy allocator. In the future tracking the per-resource visible usage will be useful for when deciding if we should attempt to evict certain buffers. v2: - s/place->lpfn/lpfn/, that way we can avoid scanning the list if the entire range is already mappable. - Move the end declaration inside the if block(Thomas). - Make sure to also account for reserved memory. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 68 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 8 +++- drivers/gpu/drm/i915/intel_region_ttm.c | 1 + 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 76d5211c25eb..e47a3d46c6ff 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -19,6 +19,9 @@ struct i915_ttm_buddy_manager { struct drm_buddy mm; struct list_head reserved; struct mutex lock; + unsigned long visible_size; + unsigned long visible_avail; + unsigned long visible_reserved; u64 default_page_size; }; @@ -87,6 +90,12 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, n_pages = size >> ilog2(mm->chunk_size); mutex_lock(&bman->lock); + if (lpfn <= bman->visible_size && n_pages > bman->visible_avail) { + mutex_unlock(&bman->lock); + err = -ENOSPC; + goto err_free_res; + } + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, (u64)lpfn << PAGE_SHIFT, (u64)n_pages << PAGE_SHIFT, @@ -107,6 +116,31 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, mutex_unlock(&bman->lock); } + if (lpfn <= bman->visible_size) { + bman_res->used_visible_size = bman_res->base.num_pages; + } else { + struct drm_buddy_block *block; + + list_for_each_entry(block, &bman_res->blocks, link) { + unsigned long start = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + + if (start < bman->visible_size) { + unsigned long end = start + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + bman_res->used_visible_size += + min(end, bman->visible_size) - start; + } + } + } + + if (bman_res->used_visible_size) { + mutex_lock(&bman->lock); + bman->visible_avail -= bman_res->used_visible_size; + mutex_unlock(&bman->lock); + } + *res = &bman_res->base; return 0; @@ -128,6 +162,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, mutex_lock(&bman->lock); drm_buddy_free_list(&bman->mm, &bman_res->blocks); + bman->visible_avail += bman_res->used_visible_size; mutex_unlock(&bman->lock); ttm_resource_fini(man, res); @@ -143,6 +178,12 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man, mutex_lock(&bman->lock); drm_printf(printer, "default_page_size: %lluKiB\n", bman->default_page_size >> 10); + drm_printf(printer, "visible_avail: %lluMiB\n", + (u64)bman->visible_avail << PAGE_SHIFT >> 20); + drm_printf(printer, "visible_size: %lluMiB\n", + (u64)bman->visible_size << PAGE_SHIFT >> 20); + drm_printf(printer, "visible_reserved: %lluMiB\n", + (u64)bman->visible_reserved << PAGE_SHIFT >> 20); drm_buddy_print(&bman->mm, printer); @@ -164,6 +205,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { * @type: Memory type we want to manage * @use_tt: Set use_tt for the manager * @size: The size in bytes to manage + * @visible_size: The CPU visible size in bytes to manage * @default_page_size: The default minimum page size in bytes for allocations, * this must be at least as large as @chunk_size, and can be overridden by * setting the BO page_alignment, to be larger or smaller as needed. @@ -187,7 +229,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { */ int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned int type, bool use_tt, - u64 size, u64 default_page_size, + u64 size, u64 visible_size, u64 default_page_size, u64 chunk_size) { struct ttm_resource_manager *man; @@ -206,6 +248,8 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev, INIT_LIST_HEAD(&bman->reserved); GEM_BUG_ON(default_page_size < chunk_size); bman->default_page_size = default_page_size; + bman->visible_size = visible_size >> PAGE_SHIFT; + bman->visible_avail = bman->visible_size; man = &bman->manager; man->use_tt = use_tt; @@ -250,6 +294,8 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) mutex_lock(&bman->lock); drm_buddy_free_list(mm, &bman->reserved); drm_buddy_fini(mm); + bman->visible_avail += bman->visible_reserved; + WARN_ON_ONCE(bman->visible_avail != bman->visible_size); mutex_unlock(&bman->lock); ttm_resource_manager_cleanup(man); @@ -273,6 +319,7 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, { struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); struct drm_buddy *mm = &bman->mm; + unsigned long fpfn = start >> PAGE_SHIFT; unsigned long flags = 0; int ret; @@ -284,8 +331,27 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, size, mm->chunk_size, &bman->reserved, flags); + + if (fpfn < bman->visible_size) { + unsigned long lpfn = fpfn + (size >> PAGE_SHIFT); + unsigned long visible = min(lpfn, bman->visible_size) - fpfn; + + bman->visible_reserved += visible; + bman->visible_avail -= visible; + } mutex_unlock(&bman->lock); return ret; } +/** + * i915_ttm_buddy_man_visible_size - Return the size of the CPU visible portion + * in pages. + * @man: The buddy allocator ttm manager + */ +u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + return bman->visible_size; +} diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 72c90b432e87..35fe03a6a78c 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -21,6 +21,8 @@ struct drm_buddy; * @base: struct ttm_resource base class we extend * @blocks: the list of struct i915_buddy_block for this resource/allocation * @flags: DRM_BUDDY_*_ALLOCATION flags + * @used_visible_size: How much of this resource, if any, uses the CPU visible + * portion, in pages. * @mm: the struct i915_buddy_mm for this resource * * Extends the struct ttm_resource to manage an address space allocation with @@ -30,6 +32,7 @@ struct i915_ttm_buddy_resource { struct ttm_resource base; struct list_head blocks; unsigned long flags; + unsigned long used_visible_size; struct drm_buddy *mm; }; @@ -48,11 +51,14 @@ to_ttm_buddy_resource(struct ttm_resource *res) int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned type, bool use_tt, - u64 size, u64 default_page_size, u64 chunk_size); + u64 size, u64 visible_size, + u64 default_page_size, u64 chunk_size); int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type); int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, u64 start, u64 size); +u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man); + #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 6cfe9090ede7..737ef3f4ab54 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -87,6 +87,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) ret = i915_ttm_buddy_man_init(bdev, mem_type, false, resource_size(&mem->region), + mem->io_size, mem->min_page_size, PAGE_SIZE); if (ret) return ret; -- cgit From f9eb742988e20fb86926de8ad18d7e93b53d1a62 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:55:00 +0000 Subject: drm/i915/buddy: adjust res->start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differentiate between mappable vs non-mappable resources, also if this is an actual range allocation ensure we set res->start as the starting pfn. Later when we need to do non-mappable -> mappable moves then we want TTM to see that the current placement is not compatible, which should result in an actual move, instead of being turned into a noop. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index e47a3d46c6ff..0ac6b2463fd5 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -141,6 +141,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, mutex_unlock(&bman->lock); } + if (place->lpfn - place->fpfn == n_pages) + bman_res->base.start = place->fpfn; + else if (lpfn <= bman->visible_size) + bman_res->base.start = 0; + else + bman_res->base.start = bman->visible_size; + *res = &bman_res->base; return 0; -- cgit From f199bf55104d480370515bc736418808e5cfa6b9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:55:01 +0000 Subject: drm/i915/buddy: tweak 2big check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we get -EINVAL, instead of the more useful -E2BIG if the allocation doesn't fit within the pfn range, like with mappable lmem. The hugepages selftest, for example, needs this to know if a smaller size is needed. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-6-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 0ac6b2463fd5..92d49a3c378c 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -82,7 +82,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, lpfn = pages; } - if (size > mm->size) { + if (size > lpfn << PAGE_SHIFT) { err = -E2BIG; goto err_free_res; } -- cgit From 2d45f66887055e169173345a7e6249ecf3ee7e21 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Feb 2022 14:55:02 +0000 Subject: drm/i915/selftests: mock test io_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that mappable vs non-mappable matches our expectations. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220225145502.331818-7-matthew.auld@intel.com --- .../gpu/drm/i915/selftests/intel_memory_region.c | 143 +++++++++++++++++++++ 1 file changed, 143 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 247f65f02bbf..56dec9723601 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -17,6 +17,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" #include "gt/intel_engine_pm.h" @@ -512,6 +513,147 @@ out_put: return err; } +static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = obj->mm.region; + struct i915_ttm_buddy_resource *bman_res = + to_ttm_buddy_resource(obj->mm.res); + struct drm_buddy *mm = bman_res->mm; + struct drm_buddy_block *block; + u64 total; + + total = 0; + list_for_each_entry(block, &bman_res->blocks, link) { + u64 start = drm_buddy_block_offset(block); + u64 end = start + drm_buddy_block_size(mm, block); + + if (start < mr->io_size) + total += min_t(u64, end, mr->io_size) - start; + } + + return total; +} + +static int igt_mock_io_size(void *arg) +{ + struct intel_memory_region *mr = arg; + struct drm_i915_private *i915 = mr->i915; + struct drm_i915_gem_object *obj; + u64 mappable_theft_total; + u64 io_size; + u64 total; + u64 ps; + u64 rem; + u64 size; + I915_RND_STATE(prng); + LIST_HEAD(objects); + int err = 0; + + ps = SZ_4K; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total); + total = round_down(total, ps); + total = max_t(u64, total, SZ_1G); + + div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size); + io_size = round_down(io_size, ps); + io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */ + + pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n", + __func__, ps, io_size, total); + + mr = mock_region_create(i915, 0, total, ps, 0, io_size); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto out_err; + } + + mappable_theft_total = 0; + rem = total - io_size; + do { + div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); + size = round_down(size, ps); + size = max(size, ps); + + obj = igt_object_create(mr, &objects, size, + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) { + pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n", + __func__, rem, size); + err = PTR_ERR(obj); + goto out_close; + } + + mappable_theft_total += igt_object_mappable_total(obj); + rem -= size; + } while (rem); + + pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n", + __func__, + (u64)mappable_theft_total >> 20, + (u64)io_size >> 20, + (u64)total >> 20); + + /* + * Even if we allocate all of the non-mappable portion, we should still + * be able to dip into the mappable portion. + */ + obj = igt_object_create(mr, &objects, io_size, + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) { + pr_err("%s allocation unexpectedly failed\n", __func__); + err = PTR_ERR(obj); + goto out_close; + } + + close_objects(mr, &objects); + + rem = io_size; + do { + div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); + size = round_down(size, ps); + size = max(size, ps); + + obj = igt_object_create(mr, &objects, size, 0); + if (IS_ERR(obj)) { + pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n", + __func__, rem, size); + err = PTR_ERR(obj); + goto out_close; + } + + if (igt_object_mappable_total(obj) != size) { + pr_err("%s allocation is not mappable(size=%llx)\n", + __func__, size); + err = -EINVAL; + goto out_close; + } + rem -= size; + } while (rem); + + /* + * We assume CPU access is required by default, which should result in a + * failure here, even though the non-mappable portion is free. + */ + obj = igt_object_create(mr, &objects, ps, 0); + if (!IS_ERR(obj)) { + pr_err("%s allocation unexpectedly succeeded\n", __func__); + err = -EINVAL; + goto out_close; + } + +out_close: + close_objects(mr, &objects); + intel_memory_region_destroy(mr); +out_err: + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -1179,6 +1321,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), SUBTEST(igt_mock_max_segment), + SUBTEST(igt_mock_io_size), }; struct intel_memory_region *mem; struct drm_i915_private *i915; -- cgit From 1b279f6ad467535c3b8a66b4edefaca2cdd5bdc3 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 16 Feb 2022 10:15:04 -0800 Subject: drm/i915/guc/slpc: Correct the param count for unset param SLPC unset param H2G only needs one parameter - the id of the param. Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency limits") Suggested-by: Umesh Nerlige Ramappa Signed-off-by: Vinay Belgaumkar Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220216181504.7155-1-vinay.belgaumkar@intel.com (cherry picked from commit 9648f1c3739505557d94ff749a4f32192ea81fe3) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 13b27b8ff74e..ba21ace973da 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -110,7 +110,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) { u32 request[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), id, }; -- cgit From 08783aa7693f55619859f4f63f384abf17cb58c5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 24 Feb 2022 15:21:42 +0200 Subject: drm/i915: s/JSP2/ICP2/ PCH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This JSP2 PCH actually seems to be some special Apple specific ICP variant rather than a JSP. Make it so. Or at least all the references to it seem to be some Apple ICL machines. Didn't manage to find these PCI IDs in any public chipset docs unfortunately. The only thing we're losing here with this JSP->ICP change is Wa_14011294188, but based on the HSD that isn't actually needed on any ICP based design (including JSP), only TGP based stuff (including MCC) really need it. The documented w/a just never made that distinction because Windows didn't want to differentiate between JSP and MCC (not sure how they handle hpd/ddc/etc. then though...). Cc: stable@vger.kernel.org Cc: Matt Roper Cc: Vivek Kasireddy Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4226 Fixes: 943682e3bd19 ("drm/i915: Introduce Jasper Lake PCH") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220224132142.12927-1-ville.syrjala@linux.intel.com Acked-by: Vivek Kasireddy Tested-by: Tomas Bzatek (cherry picked from commit 53581504a8e216d435f114a4f2596ad0dfd902fc) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pch.c | 2 +- drivers/gpu/drm/i915/intel_pch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index da8f82c2342f..fc8a68f3a2ed 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -108,6 +108,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* Comet Lake V PCH is based on KBP, which is SPT compatible */ return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: + case INTEL_PCH_ICP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n"); drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); return PCH_ICP; @@ -123,7 +124,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) !IS_GEN9_BC(dev_priv)); return PCH_TGP; case INTEL_PCH_JSP_DEVICE_ID_TYPE: - case INTEL_PCH_JSP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n"); drm_WARN_ON(&dev_priv->drm, !IS_JSL_EHL(dev_priv)); return PCH_JSP; diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 6bff77521094..4ba0f1967cca 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -50,11 +50,11 @@ enum intel_pch { #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 +#define INTEL_PCH_ICP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 #define INTEL_PCH_TGP2_DEVICE_ID_TYPE 0x4380 #define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80 -#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_ADP_DEVICE_ID_TYPE 0x7A80 #define INTEL_PCH_ADP2_DEVICE_ID_TYPE 0x5180 #define INTEL_PCH_ADP3_DEVICE_ID_TYPE 0x7A00 -- cgit From c03d982670335d732e5d623af31c126387ae126a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 22 Feb 2022 14:32:09 +0100 Subject: drm/i915: Clarify vma lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unclear what reference the initial vma kref reference refers to. A vma can have multiple weak references, the object vma list, the vm's bound list and the GT's closed_list, and the initial vma reference can be put from lookups of all these lists. With the current implementation this means that any holder of yet another vma refcount (currently only i915_gem_object_unbind()) needs to be holding two of either *) An object refcount, *) A vm open count *) A vma open count in order for us to not risk leaking a reference by having the initial vma reference being put twice. Address this by re-introducing i915_vma_destroy() which removes all weak references of the vma and *then* puts the initial vma refcount. This makes a strong vma reference hold on to the vma unconditionally. Perhaps a better name would be i915_vma_revoke() or i915_vma_zombify(), since other callers may still hold a refcount, but with the prospect of being able to replace the vma refcount with the object lock in the near future, let's stick with i915_vma_destroy(). Finally this commit fixes a race in that previously i915_vma_release() and now i915_vma_destroy() could destroy a vma without taking the vm->mutex after an advisory check that the vma mm_node was not allocated. This would race with the ungrab_vma() function creating a trace similar to the below one. This was fixed in one of the __i915_vma_put() callsites in commit bc1922e5d349 ("drm/i915: Fix a race between vma / object destruction and unbinding") but although not seemingly triggered by CI, that is not sufficient. This patch is needed to fix that properly. [823.012188] Console: switching to colour dummy device 80x25 [823.012422] [IGT] gem_ppgtt: executing [823.016667] [IGT] gem_ppgtt: starting subtest blt-vs-render-ctx0 [852.436465] stack segment: 0000 [#1] PREEMPT SMP NOPTI [852.436480] CPU: 0 PID: 3200 Comm: gem_ppgtt Not tainted 5.16.0-CI-CI_DRM_11115+ #1 [852.436489] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.2422.A00.2110131104 10/13/2021 [852.436499] RIP: 0010:ungrab_vma+0x9/0x80 [i915] [852.436711] Code: ef e8 4b 85 cf e0 e8 36 a3 d6 e0 8b 83 f8 9c 00 00 85 c0 75 e1 5b 5d 41 5c 41 5d c3 e9 d6 fd 14 00 55 53 48 8b af c0 00 00 00 <8b> 45 00 85 c0 75 03 5b 5d c3 48 8b 85 a0 02 00 00 48 89 fb 48 8b [852.436727] RSP: 0018:ffffc90006db7880 EFLAGS: 00010246 [852.436734] RAX: 0000000000000000 RBX: ffffc90006db7598 RCX: 0000000000000000 [852.436742] RDX: ffff88815349e898 RSI: ffff88815349e858 RDI: ffff88810a284140 [852.436748] RBP: 6b6b6b6b6b6b6b6b R08: ffff88815349e898 R09: ffff88815349e8e8 [852.436754] R10: 0000000000000001 R11: 0000000051ef1141 R12: ffff88810a284140 [852.436762] R13: 0000000000000000 R14: ffff88815349e868 R15: ffff88810a284458 [852.436770] FS: 00007f5c04b04e40(0000) GS:ffff88849f000000(0000) knlGS:0000000000000000 [852.436781] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [852.436788] CR2: 00007f5c04b38fe0 CR3: 000000010a6e8001 CR4: 0000000000770ef0 [852.436797] PKRU: 55555554 [852.436801] Call Trace: [852.436806] [852.436811] i915_gem_evict_for_node+0x33c/0x3c0 [i915] [852.437014] i915_gem_gtt_reserve+0x106/0x130 [i915] [852.437211] i915_vma_pin_ww+0x8f4/0xb60 [i915] [852.437412] eb_validate_vmas+0x688/0x860 [i915] [852.437596] i915_gem_do_execbuffer+0xc0e/0x25b0 [i915] [852.437770] ? deactivate_slab+0x5f2/0x7d0 [852.437778] ? _raw_spin_unlock_irqrestore+0x50/0x60 [852.437789] ? i915_gem_execbuffer2_ioctl+0xc6/0x2c0 [i915] [852.437944] ? init_object+0x49/0x80 [852.437950] ? __lock_acquire+0x5e6/0x2580 [852.437963] i915_gem_execbuffer2_ioctl+0x116/0x2c0 [i915] [852.438129] ? i915_gem_do_execbuffer+0x25b0/0x25b0 [i915] [852.438300] drm_ioctl_kernel+0xac/0x140 [852.438310] drm_ioctl+0x201/0x3d0 [852.438316] ? i915_gem_do_execbuffer+0x25b0/0x25b0 [i915] [852.438490] __x64_sys_ioctl+0x6a/0xa0 [852.438498] do_syscall_64+0x37/0xb0 [852.438507] entry_SYSCALL_64_after_hwframe+0x44/0xae [852.438515] RIP: 0033:0x7f5c0415b317 [852.438523] Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48 [852.438542] RSP: 002b:00007ffd765039a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [852.438553] RAX: ffffffffffffffda RBX: 000055e4d7829dd0 RCX: 00007f5c0415b317 [852.438562] RDX: 00007ffd76503a00 RSI: 00000000c0406469 RDI: 0000000000000017 [852.438571] RBP: 00007ffd76503a00 R08: 0000000000000000 R09: 0000000000000081 [852.438579] R10: 00000000ffffff7f R11: 0000000000000246 R12: 00000000c0406469 [852.438587] R13: 0000000000000017 R14: 00007ffd76503a00 R15: 0000000000000000 [852.438598] [852.438602] Modules linked in: snd_hda_codec_hdmi i915 mei_hdcp x86_pkg_temp_thermal snd_hda_intel snd_intel_dspcfg drm_buddy coretemp crct10dif_pclmul crc32_pclmul snd_hda_codec ttm ghash_clmulni_intel snd_hwdep snd_hda_core e1000e drm_dp_helper ptp snd_pcm mei_me drm_kms_helper pps_core mei syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers intel_lpss_pci smsc75xx usbnet mii [852.440310] ---[ end trace e52cdd2fe4fd911c ]--- v2: Fix typos in the commit message. Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") Fixes: bc1922e5d349 ("drm/i915: Fix a race between vma / object destruction and unbinding") Cc: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220222133209.587978-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 14 +--- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 17 +++-- drivers/gpu/drm/i915/i915_vma.c | 74 ++++++++++++++++++---- drivers/gpu/drm/i915/i915_vma.h | 3 + 5 files changed, 79 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 2d593d573ef1..372bc220faeb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -272,12 +272,6 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) if (!list_empty(&obj->vma.list)) { struct i915_vma *vma; - /* - * Note that the vma keeps an object reference while - * it is active, so it *should* not sleep while we - * destroy it. Our debug code errs insits it *might*. - * For the moment, play along. - */ spin_lock(&obj->vma.lock); while ((vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, @@ -285,13 +279,7 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) GEM_BUG_ON(vma->obj != obj); spin_unlock(&obj->vma.lock); - /* Verify that the vma is unbound under the vm mutex. */ - mutex_lock(&vma->vm->mutex); - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - __i915_vma_unbind(vma); - mutex_unlock(&vma->vm->mutex); - - __i915_vma_put(vma); + i915_vma_destroy(vma); spin_lock(&obj->vma.lock); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 8ae1a1530bd8..f98719a1ab86 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -169,7 +169,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, out: i915_gem_object_lock(obj, NULL); - __i915_vma_put(vma); + i915_vma_destroy(vma); i915_gem_object_unlock(obj); return err; } @@ -266,7 +266,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, return err; i915_gem_object_lock(obj, NULL); - __i915_vma_put(vma); + i915_vma_destroy(vma); i915_gem_object_unlock(obj); if (igt_timeout(end_time, diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index c548c193cd35..4bcdfcab3642 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -107,14 +107,19 @@ void __i915_vm_close(struct i915_address_space *vm) list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - /* Keep the obj (and hence the vma) alive as _we_ destroy it */ - if (!kref_get_unless_zero(&obj->base.refcount)) + if (!kref_get_unless_zero(&obj->base.refcount)) { + /* + * Unbind the dying vma to ensure the bound_list + * is completely drained. We leave the destruction to + * the object destructor. + */ + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); continue; + } - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - WARN_ON(__i915_vma_unbind(vma)); - __i915_vma_put(vma); - + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + i915_vma_destroy_locked(vma); i915_gem_object_put(obj); } GEM_BUG_ON(!list_empty(&vm->bound_list)); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index dc28e6e3efef..94fcdb7bd21d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1621,15 +1621,27 @@ void i915_vma_reopen(struct i915_vma *vma) void i915_vma_release(struct kref *ref) { struct i915_vma *vma = container_of(ref, typeof(*vma), ref); + + i915_vm_put(vma->vm); + i915_active_fini(&vma->active); + GEM_WARN_ON(vma->resource); + i915_vma_free(vma); +} + +static void force_unbind(struct i915_vma *vma) +{ + if (!drm_mm_node_allocated(&vma->node)) + return; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); +} + +static void release_references(struct i915_vma *vma) +{ struct drm_i915_gem_object *obj = vma->obj; - if (drm_mm_node_allocated(&vma->node)) { - mutex_lock(&vma->vm->mutex); - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - WARN_ON(__i915_vma_unbind(vma)); - mutex_unlock(&vma->vm->mutex); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - } GEM_BUG_ON(i915_vma_is_active(vma)); spin_lock(&obj->vma.lock); @@ -1639,11 +1651,49 @@ void i915_vma_release(struct kref *ref) spin_unlock(&obj->vma.lock); __i915_vma_remove_closed(vma); - i915_vm_put(vma->vm); - i915_active_fini(&vma->active); - GEM_WARN_ON(vma->resource); - i915_vma_free(vma); + __i915_vma_put(vma); +} + +/** + * i915_vma_destroy_locked - Remove all weak reference to the vma and put + * the initial reference. + * + * This function should be called when it's decided the vma isn't needed + * anymore. The caller must assure that it doesn't race with another lookup + * plus destroy, typically by taking an appropriate reference. + * + * Current callsites are + * - __i915_gem_object_pages_fini() + * - __i915_vm_close() - Blocks the above function by taking a reference on + * the object. + * - __i915_vma_parked() - Blocks the above functions by taking an open-count on + * the vm and a reference on the object. + * + * Because of locks taken during destruction, a vma is also guaranteed to + * stay alive while the following locks are held if it was looked up while + * holding one of the locks: + * - vm->mutex + * - obj->vma.lock + * - gt->closed_lock + * + * A vma user can also temporarily keep the vma alive while holding a vma + * reference. + */ +void i915_vma_destroy_locked(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->mutex); + + force_unbind(vma); + release_references(vma); +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + mutex_lock(&vma->vm->mutex); + force_unbind(vma); + mutex_unlock(&vma->vm->mutex); + release_references(vma); } void i915_vma_parked(struct intel_gt *gt) @@ -1677,7 +1727,7 @@ void i915_vma_parked(struct intel_gt *gt) if (i915_gem_object_trylock(obj, NULL)) { INIT_LIST_HEAD(&vma->closed_link); - __i915_vma_put(vma); + i915_vma_destroy(vma); i915_gem_object_unlock(obj); } else { /* back you go.. */ diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 011af044ad4f..67ae7341c7e0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -236,6 +236,9 @@ static inline void __i915_vma_put(struct i915_vma *vma) kref_put(&vma->ref, i915_vma_release); } +void i915_vma_destroy_locked(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + #define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv) static inline void i915_vma_lock(struct i915_vma *vma) -- cgit From 9373505967ffc1b7b8331a21ba86ea436c09b981 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Feb 2022 12:36:04 +0000 Subject: drm/i915/ttm: make eviction mappable aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we need to make room for some mappable object, then we should only victimize objects that have one or pages that occupy the visible portion of LMEM. Let's also create a new priority hint for objects that are placed in mappable memory, where we know that CPU access was requested, that way we hopefully victimize these last. v2(Thomas): s/TTM_PL_PRIV/I915_PL_LMEM0/ Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220228123607.580432-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 65 ++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index a13e0204a139..3c2f044b9c6b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -7,8 +7,10 @@ #include #include +#include #include "i915_drv.h" +#include "i915_ttm_buddy_manager.h" #include "intel_memory_region.h" #include "intel_region_ttm.h" @@ -22,6 +24,7 @@ #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 +#define I915_TTM_PRIO_NEEDS_CPU_ACCESS 3 /* * Size of struct ttm_place vector in on-stack struct ttm_placement allocs @@ -339,6 +342,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource *res = bo->resource; if (!obj) return false; @@ -352,7 +356,48 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, return false; /* Will do for now. Our pinned objects are still on TTM's LRU lists */ - return i915_gem_object_evictable(obj); + if (!i915_gem_object_evictable(obj)) + return false; + + switch (res->mem_type) { + case I915_PL_LMEM0: { + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, res->mem_type); + struct i915_ttm_buddy_resource *bman_res = + to_ttm_buddy_resource(res); + struct drm_buddy *mm = bman_res->mm; + struct drm_buddy_block *block; + + if (!place->fpfn && !place->lpfn) + return true; + + GEM_BUG_ON(!place->lpfn); + + /* + * If we just want something mappable then we can quickly check + * if the current victim resource is using any of the CPU + * visible portion. + */ + if (!place->fpfn && + place->lpfn == i915_ttm_buddy_man_visible_size(man)) + return bman_res->used_visible_size > 0; + + /* Real range allocation */ + list_for_each_entry(block, &bman_res->blocks, link) { + unsigned long fpfn = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + if (place->fpfn < lpfn && place->lpfn > fpfn) + return true; + } + return false; + } default: + break; + } + + return true; } static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, @@ -852,7 +897,23 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) } else if (!i915_gem_object_has_pages(obj)) { bo->priority = I915_TTM_PRIO_NO_PAGES; } else { - bo->priority = I915_TTM_PRIO_HAS_PAGES; + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + + /* + * If we need to place an LMEM resource which doesn't need CPU + * access then we should try not to victimize mappable objects + * first, since we likely end up stealing more of the mappable + * portion. And likewise when we try to find space for a mappble + * object, we know not to ever victimize objects that don't + * occupy any mappable pages. + */ + if (i915_ttm_cpu_maps_iomem(bo->resource) && + i915_ttm_buddy_man_visible_size(man) < man->size && + !(obj->flags & I915_BO_ALLOC_GPU_ONLY)) + bo->priority = I915_TTM_PRIO_NEEDS_CPU_ACCESS; + else + bo->priority = I915_TTM_PRIO_HAS_PAGES; } ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); -- cgit From 503725c2d9865533b2894f7363be16c762260b7e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Feb 2022 12:36:05 +0000 Subject: drm/i915/ttm: mappable migration on fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The end goal is to have userspace tell the kernel what buffers will require CPU access, however if we ever reach the CPU fault handler, and the current resource is not mappable, then we should attempt to migrate the buffer to the mappable portion of LMEM, or even system memory, if the allowable placements permit it. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220228123607.580432-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 54 +++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3c2f044b9c6b..45cc5837ce00 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -638,11 +638,24 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) i915_ttm_purge(obj); } +static bool i915_ttm_resource_mappable(struct ttm_resource *res) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + + if (!i915_ttm_cpu_maps_iomem(res)) + return true; + + return bman_res->used_visible_size == bman_res->base.num_pages; +} + static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { if (!i915_ttm_cpu_maps_iomem(mem)) return 0; + if (!i915_ttm_resource_mappable(mem)) + return -EINVAL; + mem->bus.caching = ttm_write_combined; mem->bus.is_iomem = true; @@ -781,14 +794,15 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) * Gem forced migration using the i915_ttm_migrate() op, is allowed even * to regions that are not in the object's list of allowable placements. */ -static int i915_ttm_migrate(struct drm_i915_gem_object *obj, - struct intel_memory_region *mr) +static int __i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr, + unsigned int flags) { struct ttm_place requested; struct ttm_placement placement; int ret; - i915_ttm_place_from_region(mr, &requested, obj->flags); + i915_ttm_place_from_region(mr, &requested, flags); placement.num_placement = 1; placement.num_busy_placement = 1; placement.placement = &requested; @@ -811,6 +825,12 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj, return 0; } +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + return __i915_ttm_migrate(obj, mr, obj->flags); +} + static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st) { @@ -955,9 +975,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) if (!obj) return VM_FAULT_SIGBUS; - if (obj->flags & I915_BO_ALLOC_GPU_ONLY) - return -EINVAL; - /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) @@ -972,6 +989,31 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (!i915_ttm_resource_mappable(bo->resource)) { + int err = -ENODEV; + int i; + + for (i = 0; i < obj->mm.n_placements; i++) { + struct intel_memory_region *mr = obj->mm.placements[i]; + unsigned int flags; + + if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM) + continue; + + flags = obj->flags; + flags &= ~I915_BO_ALLOC_GPU_ONLY; + err = __i915_ttm_migrate(obj, mr, flags); + if (!err) + break; + } + + if (err) { + drm_dbg(dev, "Unable to make resource CPU accessible\n"); + dma_resv_unlock(bo->base.resv); + return VM_FAULT_SIGBUS; + } + } + if (drm_dev_enter(dev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, TTM_BO_VM_NUM_PREFAULT); -- cgit From 6e0c5bf0cc1369df0057bccb3fc1af3b38d07e32 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Feb 2022 12:36:06 +0000 Subject: drm/i915/selftests: handle allocation failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have to contend with non-mappable LMEM, then we need to ensure the object fits within the mappable portion, like in the selftests, where we later try to CPU access the pages. However if it can't then we need to gracefully handle this, without throwing an error. Also it looks like TTM will return -ENOMEM, in ttm_bo_mem_space() after exhausting all possible placements. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220228123607.580432-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dbbae53f820a..7a84fa68a99c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1345,7 +1345,7 @@ try_again: err = i915_gem_object_pin_pages_unlocked(obj); if (err) { - if (err == -ENXIO || err == -E2BIG) { + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { i915_gem_object_put(obj); size >>= 1; goto try_again; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 56dec9723601..ba32893e0873 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -822,8 +822,14 @@ static int igt_lmem_create_with_ps(void *arg) i915_gem_object_lock(obj, NULL); err = i915_gem_object_pin_pages(obj); - if (err) + if (err) { + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { + pr_info("%s not enough lmem for ps(%u) err=%d\n", + __func__, ps, err); + err = 0; + } goto out_put; + } daddr = i915_gem_object_get_dma_address(obj, 0); if (!IS_ALIGNED(daddr, ps)) { -- cgit From fb87550d2517878ebcd112f080865a2dc38e9bae Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Feb 2022 12:36:07 +0000 Subject: drm/i915/selftests: exercise mmap migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercise each of the migration scenarios, verifying that the final placement and buffer contents match our expectations. v2(Thomas): Replace for_i915_gem_ww() block with simpler object_lock() v3: - For testing purposes allow forcing the io_size such that we can exercise the allocation + migration path on devices that don't have the small BAR limit. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220228123607.580432-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 329 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 + drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 5 + 3 files changed, 344 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index f98719a1ab86..a132e241c3ee 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -8,10 +8,13 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_migrate.h" +#include "i915_ttm_buddy_manager.h" #include "huge_gem_object.h" #include "i915_selftest.h" @@ -1001,6 +1004,331 @@ static int igt_mmap(void *arg) return 0; } +static void igt_close_objects(struct drm_i915_private *i915, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + i915_gem_object_lock(obj, NULL); + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + /* No polluting the memory region between tests */ + __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); + list_del(&obj->st_link); + i915_gem_object_put(obj); + } + + cond_resched(); + + i915_gem_drain_freed_objects(i915); +} + +static void igt_make_evictable(struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + + list_for_each_entry(obj, objects, st_link) { + i915_gem_object_lock(obj, NULL); + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); + } + + cond_resched(); +} + +static int igt_fill_mappable(struct intel_memory_region *mr, + struct list_head *objects) +{ + u64 size, total; + int err; + + total = 0; + size = mr->io_size; + do { + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_region(mr, size, 0, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close; + } + + list_add(&obj->st_link, objects); + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) { + if (err != -ENXIO && err != -ENOMEM) + goto err_close; + + if (size == mr->min_page_size) { + err = 0; + break; + } + + size >>= 1; + continue; + } + + total += obj->base.size; + } while (1); + + pr_info("%s filled=%lluMiB\n", __func__, total >> 20); + return 0; + +err_close: + igt_close_objects(mr->i915, objects); + return err; +} + +static int ___igt_mmap_migrate(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + unsigned long addr, + bool unfaultable) +{ + struct vm_area_struct *area; + int err = 0, i; + + pr_info("igt_mmap(%s, %d) @ %lx\n", + obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr); + + mmap_read_lock(current->mm); + area = vma_lookup(current->mm, addr); + mmap_read_unlock(current->mm); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + for (i = 0; i < obj->base.size / sizeof(u32); i++) { + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); + u32 x; + + if (get_user(x, ux)) { + err = -EFAULT; + if (!unfaultable) { + pr_err("%s: Unable to read from mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + goto out_unmap; + } + + continue; + } + + if (unfaultable) { + pr_err("%s: Faulted unmappable memory\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + if (x != expand32(POISON_INUSE)) { + pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + obj->mm.region->name, + i * sizeof(x), x, expand32(POISON_INUSE)); + err = -EINVAL; + goto out_unmap; + } + + x = expand32(POISON_FREE); + if (put_user(x, ux)) { + pr_err("%s: Unable to write to mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + err = -EFAULT; + goto out_unmap; + } + } + + if (unfaultable) { + if (err == -EFAULT) + err = 0; + } else { + obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; + err = wc_check(obj); + } +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +#define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0) +#define IGT_MMAP_MIGRATE_FILL (1 << 1) +#define IGT_MMAP_MIGRATE_EVICTABLE (1 << 2) +#define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3) +static int __igt_mmap_migrate(struct intel_memory_region **placements, + int n_placements, + struct intel_memory_region *expected_mr, + unsigned int flags) +{ + struct drm_i915_private *i915 = placements[0]->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq = NULL; + unsigned long addr; + LIST_HEAD(objects); + u64 offset; + int err; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + placements, + n_placements); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (flags & IGT_MMAP_MIGRATE_TOPDOWN) + obj->flags |= I915_BO_ALLOC_GPU_ONLY; + + err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset, NULL); + if (err) + goto out_put; + + /* + * This will eventually create a GEM context, due to opening dummy drm + * file, which needs a tiny amount of mappable device memory for the top + * level paging structures(and perhaps scratch), so make sure we + * allocate early, to avoid tears. + */ + addr = igt_mmap_offset(i915, offset, obj->base.size, + PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) { + err = addr; + goto out_put; + } + + if (flags & IGT_MMAP_MIGRATE_FILL) { + err = igt_fill_mappable(placements[0], &objects); + if (err) + goto out_put; + } + + err = i915_gem_object_lock(obj, NULL); + if (err) + goto out_put; + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_unlock(obj); + goto out_put; + } + + err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, + obj->mm.pages->sgl, obj->cache_level, + i915_gem_object_is_lmem(obj), + expand32(POISON_INUSE), &rq); + i915_gem_object_unpin_pages(obj); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_gem_object_set_moving_fence(obj, &rq->fence); + i915_request_put(rq); + } + i915_gem_object_unlock(obj); + if (err) + goto out_put; + + if (flags & IGT_MMAP_MIGRATE_EVICTABLE) + igt_make_evictable(&objects); + + err = ___igt_mmap_migrate(i915, obj, addr, + flags & IGT_MMAP_MIGRATE_UNFAULTABLE); + if (!err && obj->mm.region != expected_mr) { + pr_err("%s region mismatch %s\n", __func__, expected_mr->name); + err = -EINVAL; + } + +out_put: + i915_gem_object_put(obj); + igt_close_objects(i915, &objects); + return err; +} + +static int igt_mmap_migrate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *system = i915->mm.regions[INTEL_REGION_SMEM]; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct intel_memory_region *mixed[] = { mr, system }; + struct intel_memory_region *single[] = { mr }; + struct ttm_resource_manager *man = mr->region_private; + resource_size_t saved_io_size; + int err; + + if (mr->private) + continue; + + if (!mr->io_size) + continue; + + /* + * For testing purposes let's force small BAR, if not already + * present. + */ + saved_io_size = mr->io_size; + if (mr->io_size == mr->total) { + resource_size_t io_size = mr->io_size; + + io_size = rounddown_pow_of_two(io_size >> 1); + if (io_size < PAGE_SIZE) + continue; + + mr->io_size = io_size; + i915_ttm_buddy_man_force_visible_size(man, + io_size >> PAGE_SHIFT); + } + + /* + * Allocate in the mappable portion, should be no suprises here. + */ + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0); + if (err) + goto out_io_size; + + /* + * Allocate in the non-mappable portion, but force migrating to + * the mappable portion on fault (LMEM -> LMEM) + */ + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, + IGT_MMAP_MIGRATE_TOPDOWN | + IGT_MMAP_MIGRATE_FILL | + IGT_MMAP_MIGRATE_EVICTABLE); + if (err) + goto out_io_size; + + /* + * Allocate in the non-mappable portion, but force spilling into + * system memory on fault (LMEM -> SMEM) + */ + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system, + IGT_MMAP_MIGRATE_TOPDOWN | + IGT_MMAP_MIGRATE_FILL); + if (err) + goto out_io_size; + + /* + * Allocate in the non-mappable portion, but since the mappable + * portion is already full, and we can't spill to system memory, + * then we should expect the fault to fail. + */ + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, + IGT_MMAP_MIGRATE_TOPDOWN | + IGT_MMAP_MIGRATE_FILL | + IGT_MMAP_MIGRATE_UNFAULTABLE); +out_io_size: + mr->io_size = saved_io_size; + i915_ttm_buddy_man_force_visible_size(man, + mr->io_size >> PAGE_SHIFT); + if (err) + return err; + } + + return 0; +} + static const char *repr_mmap_type(enum i915_mmap_type type) { switch (type) { @@ -1426,6 +1754,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), SUBTEST(igt_mmap), + SUBTEST(igt_mmap_migrate), SUBTEST(igt_mmap_access), SUBTEST(igt_mmap_revoke), SUBTEST(igt_mmap_gpu), diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 92d49a3c378c..129f668f21ff 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -362,3 +362,13 @@ u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man) return bman->visible_size; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man, + u64 size) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + bman->visible_size = size; +} +#endif diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 35fe03a6a78c..52d9586d242c 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -61,4 +61,9 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man); +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man, + u64 size); +#endif + #endif -- cgit From eee5215bd784eee5f8520f2deebf437c3fa95abb Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 17 Feb 2022 13:29:42 -0800 Subject: drm/i915/guc: Fix flag query helper function to not modify state A flag query helper was actually writing to the flags word rather than just reading. Fix that. Also update the function's comment as it was out of date. NB: No need for a 'Fixes' tag. The test was only ever used inside a BUG_ON during context registration. Rather than asserting that the condition was true, it was making the condition true. So, in theory, there was no consequence because we should never have hit a BUG_ON anyway. Which means the write should always have been a no-op. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220217212942.629922-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ab3cea352fb3..25cac44583d9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -174,11 +174,8 @@ static inline void init_sched_state(struct intel_context *ce) __maybe_unused static bool sched_state_is_init(struct intel_context *ce) { - /* - * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after - * suspend. - */ - return !(ce->guc_state.sched_state &= + /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ + return !(ce->guc_state.sched_state & ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); } -- cgit From e068ef3fd5a3574359b80ff823089ca59057f9c8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 14 Feb 2022 17:11:23 -0800 Subject: drm/i915/guc: Initialize GuC submission locks and queues early Move initialization of submission-related spinlock, lists and workers to init_early. This fixes an issue where if the GuC init fails we might still try to get the lock in the context cleanup code. Note that it is safe to call the GuC context cleanup code even if the init failed because all contexts are initialized with an invalid GuC ID, which will cause the GuC side of the cleanup to be skipped, so it is easier to just make sure the variables are initialized than to special case the cleanup to handle the case when they're not. References: https://gitlab.freedesktop.org/drm/intel/-/issues/4932 Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220215011123.734572-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 27 ++++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 25cac44583d9..b0de9110884a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1818,24 +1818,11 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->lrc_desc_pool); - xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); - - spin_lock_init(&guc->submission_state.lock); - INIT_LIST_HEAD(&guc->submission_state.guc_id_list); - ida_init(&guc->submission_state.guc_ids); - INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); - INIT_WORK(&guc->submission_state.destroyed_worker, - destroyed_worker_func); - INIT_WORK(&guc->submission_state.reset_fail_worker, - reset_fail_worker_func); - guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; - spin_lock_init(&guc->timestamp.lock); - INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); @@ -3831,6 +3818,20 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); + + spin_lock_init(&guc->submission_state.lock); + INIT_LIST_HEAD(&guc->submission_state.guc_id_list); + ida_init(&guc->submission_state.guc_ids); + INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); + INIT_WORK(&guc->submission_state.destroyed_worker, + destroyed_worker_func); + INIT_WORK(&guc->submission_state.reset_fail_worker, + reset_fail_worker_func); + + spin_lock_init(&guc->timestamp.lock); + INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS; guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); -- cgit From e2a1e7abaee128020fde54d386ec8959b1e9eb61 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 24 Feb 2022 17:52:32 -0800 Subject: drm/i915/guc: Do not complain about stale reset notifications It is possible for reset notifications to arrive for a context that is in the process of being banned. So don't flag these as an error, just report it as informational (because it is still useful to know that resets are happening even if they are being ignored). v2: Better wording for the message (review feedback from Tvrtko). v3: Fix rebase issue (review feedback from Daniele). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220225015232.1939497-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b0de9110884a..810910e8042e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4023,10 +4023,9 @@ static void guc_handle_context_reset(struct intel_guc *guc, capture_error_state(guc, ce); guc_context_replay(ce); } else { - drm_err(&guc_to_gt(guc)->i915->drm, - "Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d", - ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce), - context_blocked(ce)); + drm_info(&guc_to_gt(guc)->i915->drm, + "Ignoring context reset notification of banned context 0x%04X on %s", + ce->guc_id.id, ce->engine->name); } } -- cgit From a8b2b8b06e10224c547d90ea97c483b4de511b22 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 14 Feb 2022 19:59:08 +0100 Subject: drm/i915: Depend on !PREEMPT_RT. There are a few sections in the driver which are not compatible with PREEMPT_RT. They trigger warnings and can lead to deadlocks at runtime. Disable the i915 driver on a PREEMPT_RT enabled kernel. This way PREEMPT_RT itself can be enabled without needing to address the i915 issues first. The RT related patches are still in RT queue and will be handled later. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/YgqmfKhwU5spS069@linutronix.de --- drivers/gpu/drm/i915/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 2ac220bfd0ed..63db8bcf03bf 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -3,6 +3,7 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM depends on X86 && PCI + depends on !PREEMPT_RT select INTEL_GTT select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular -- cgit From 944823c9463916dd53f365e9aa07f23360968080 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:37 -0800 Subject: drm/i915/xehp: Define compute class and engine Introduce a Compute Command Streamer (CCS), which has access to the media and GPGPU pipelines (but not the 3D pipeline). To begin with, define the compute class/engine common functions, based on the existing render ones. v2: - Add kerneldoc for drm_i915_gem_engine_class since we're adding a new element to it. (Daniel) - Make engine class <-> guc class converters use lookup tables to make it more clear/explicit how the IDs map. (Tvrtko) v3: - Don't update uapi for now; we'll just include the driver-internal changes for the time being. Bspec: 46167, 45544 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 28 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 +++++++- drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 ++++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 32 ++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ 6 files changed, 71 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e855c801ba28..3190b7b462a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -156,6 +156,34 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } }, }, + [CCS0] = { + .class = COMPUTE_CLASS, + .instance = 0, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE } + } + }, + [CCS1] = { + .class = COMPUTE_CLASS, + .instance = 1, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE } + } + }, + [CCS2] = { + .class = COMPUTE_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE } + } + }, + [CCS3] = { + .class = COMPUTE_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE } + } + }, }; /** diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 36365bdbe1ee..f4533ccafbaf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -33,7 +33,8 @@ #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 -#define MAX_ENGINE_CLASS 4 +#define COMPUTE_CLASS 5 +#define MAX_ENGINE_CLASS 5 #define MAX_ENGINE_INSTANCE 7 #define I915_MAX_SLICES 3 @@ -95,6 +96,7 @@ struct i915_ctx_workarounds { #define I915_MAX_VCS 8 #define I915_MAX_VECS 4 +#define I915_MAX_CCS 4 /* * Engine IDs definitions. @@ -117,6 +119,11 @@ enum intel_engine_id { VECS2, VECS3, #define _VECS(n) (VECS0 + (n)) + CCS0, + CCS1, + CCS2, + CCS3, +#define _CCS(n) (CCS0 + (n)) I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 9ce85a845105..b8c9b6b89003 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -47,6 +47,7 @@ static const u8 uabi_classes[] = { [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, + /* TODO: Add COMPUTE_CLASS mapping once ABI is available */ }; static int engine_cmp(void *priv, const struct list_head *A, @@ -139,6 +140,7 @@ const char *intel_engine_class_repr(u8 class) [COPY_ENGINE_CLASS] = "bcs", [VIDEO_DECODE_CLASS] = "vcs", [VIDEO_ENHANCEMENT_CLASS] = "vecs", + [COMPUTE_CLASS] = "ccs", }; if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class]) @@ -162,6 +164,7 @@ static int legacy_ring_idx(const struct legacy_ring *ring) [COPY_ENGINE_CLASS] = { BCS0, 1 }, [VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS }, [VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS }, + [COMPUTE_CLASS] = { CCS0, I915_MAX_CCS }, }; if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) @@ -190,7 +193,7 @@ static void add_legacy_ring(struct legacy_ring *ring, void intel_engines_driver_register(struct drm_i915_private *i915) { struct legacy_ring ring = {}; - u8 uabi_instances[4] = {}; + u8 uabi_instances[5] = {}; struct list_head *it, *next; struct rb_node **p, *prev; LIST_HEAD(engines); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d752db5669dd..530807bfe9a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1452,6 +1452,10 @@ #define GEN11_KCR (19) #define GEN11_GTPM (16) #define GEN11_BCS (15) +#define GEN12_CCS3 (7) +#define GEN12_CCS2 (6) +#define GEN12_CCS1 (5) +#define GEN12_CCS0 (4) #define GEN11_RCS0 (0) #define GEN11_VECS(x) (31 - (x)) #define GEN11_VCS(x) (x) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 6a4612a852e2..4b300b6cc0f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -46,8 +46,8 @@ #define GUC_VIDEO_CLASS 1 #define GUC_VIDEOENHANCE_CLASS 2 #define GUC_BLITTER_CLASS 3 -#define GUC_RESERVED_CLASS 4 -#define GUC_LAST_ENGINE_CLASS GUC_RESERVED_CLASS +#define GUC_COMPUTE_CLASS 4 +#define GUC_LAST_ENGINE_CLASS GUC_COMPUTE_CLASS #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 @@ -156,23 +156,37 @@ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \ ) +/* the GuC arrays don't include OTHER_CLASS */ +static u8 engine_class_guc_class_map[] = { + [RENDER_CLASS] = GUC_RENDER_CLASS, + [COPY_ENGINE_CLASS] = GUC_BLITTER_CLASS, + [VIDEO_DECODE_CLASS] = GUC_VIDEO_CLASS, + [VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS, + [COMPUTE_CLASS] = GUC_COMPUTE_CLASS, +}; + +static u8 guc_class_engine_class_map[] = { + [GUC_RENDER_CLASS] = RENDER_CLASS, + [GUC_BLITTER_CLASS] = COPY_ENGINE_CLASS, + [GUC_VIDEO_CLASS] = VIDEO_DECODE_CLASS, + [GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS, + [GUC_COMPUTE_CLASS] = COMPUTE_CLASS, +}; + static inline u8 engine_class_to_guc_class(u8 class) { - BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); - BUILD_BUG_ON(GUC_BLITTER_CLASS != COPY_ENGINE_CLASS); - BUILD_BUG_ON(GUC_VIDEO_CLASS != VIDEO_DECODE_CLASS); - BUILD_BUG_ON(GUC_VIDEOENHANCE_CLASS != VIDEO_ENHANCEMENT_CLASS); + BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS + 1); GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS); - return class; + return engine_class_guc_class_map[class]; } static inline u8 guc_class_to_engine_class(u8 guc_class) { + BUILD_BUG_ON(ARRAY_SIZE(guc_class_engine_class_map) != GUC_LAST_ENGINE_CLASS + 1); GEM_BUG_ON(guc_class > GUC_LAST_ENGINE_CLASS); - GEM_BUG_ON(guc_class == GUC_RESERVED_CLASS); - return guc_class; + return guc_class_engine_class_map[guc_class]; } /* Work item for submitting workloads into work queue of GuC. */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cfd569684fa7..4da10e131216 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -971,6 +971,10 @@ #define GEN11_VEBOX2_RING_BASE 0x1d8000 #define XEHP_VEBOX3_RING_BASE 0x1e8000 #define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define GEN12_COMPUTE0_RING_BASE 0x1a000 +#define GEN12_COMPUTE1_RING_BASE 0x1c000 +#define GEN12_COMPUTE2_RING_BASE 0x1e000 +#define GEN12_COMPUTE3_RING_BASE 0x26000 #define BLT_RING_BASE 0x22000 -- cgit From 4b88ad503d6d2ea11891a355e656bf428ec815e6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:38 -0800 Subject: drm/i915/xehp: CCS shares the render reset domain The reset domain is shared between render and all compute engines, so resetting one will affect the others. Note: Before performing a reset on an RCS or CCS engine, the GuC will attempt to preempt-to-idle the other non-hung RCS/CCS engines to avoid impacting other clients (since some shared modules will be reset). If other engines are executing non-preemptable workloads, the impact is unavoidable and some work may be lost. Bspec: 52549 Original-author: Michel Thierry Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3190b7b462a9..3150c0847f65 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -341,6 +341,10 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id) [VECS1] = GEN11_GRDOM_VECS2, [VECS2] = GEN11_GRDOM_VECS3, [VECS3] = GEN11_GRDOM_VECS4, + [CCS0] = GEN11_GRDOM_RENDER, + [CCS1] = GEN11_GRDOM_RENDER, + [CCS2] = GEN11_GRDOM_RENDER, + [CCS3] = GEN11_GRDOM_RENDER, }; GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || !engine_reset_domains[id]); -- cgit From 505c4857fb13fb0ea88a42b843c91d0b9f8231fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:39 -0800 Subject: drm/i915/xehp: Add Compute CS IRQ handlers Add execlists and GuC interrupts for compute CS into existing IRQ handlers. All compute command streamers belong to the same compute class, so the only change needed to enable their interrupts is to program their GT engine interrupt mask registers. CCS0 shares the register with CCS1, while CCS2 and CCS3 are in a new one. BSpec: 50844, 54029, 54030, 53223, 53224. Original-author: Michel Thierry Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 15 ++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 983264e10e0a..e443ac4c8059 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -100,7 +100,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) if (unlikely(!intr)) return; - if (class <= COPY_ENGINE_CLASS) + if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS) return gen11_engine_irq_handler(gt, class, instance, intr); if (class == OTHER_CLASS) @@ -182,6 +182,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt) /* Disable RCS, BCS, VCS and VECS class engines. */ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); + if (CCS_MASK(gt)) + intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); @@ -195,6 +197,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0); + if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1)) + intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0); + if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) + intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -225,6 +231,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) /* Enable RCS, BCS, VCS and VECS class interrupts. */ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask); intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); + if (CCS_MASK(gt)) + intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); @@ -238,6 +246,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1)) + intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) + intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask); + /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 530807bfe9a0..69b826a3c381 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1468,6 +1468,7 @@ #define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) #define GEN11_CRYPTO_RSVD_INTR_ENABLE _MMIO(0x190040) #define GEN11_GUNIT_CSME_INTR_ENABLE _MMIO(0x190044) +#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048) #define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) #define GEN11_INTR_DATA_VALID (1 << 31) @@ -1493,6 +1494,8 @@ #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) #define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4) +#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100) +#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104) #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 51417e9b740f..d134838b3458 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1241,6 +1241,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) #define VEBOX_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) +#define CCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) /* * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution -- cgit From 803efd297e315859ac7830445699f01eeb1f7822 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 1 Mar 2022 15:15:40 -0800 Subject: drm/i915/xehp: compute engine pipe_control CCS will reuse the RCS functions for breadcrumb and flush emission. However, CCS pipe_control has additional programming restrictions: - Command Streamer Stall Enable must be always set - Post Sync Operations must not be set to Write PS Depth Count - 3D-related bits must not be set v2: - Drop unwanted blank line. (Lucas) Bspec: 47112 Cc: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 34 ++++++++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 15 ++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 1f8cf4f790b2..b1b9c3fd7bf9 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -201,6 +201,8 @@ static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { + struct intel_engine_cs *engine = rq->engine; + if (mode & EMIT_FLUSH) { u32 flags = 0; u32 *cs; @@ -219,6 +221,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CS_STALL; + if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -246,6 +251,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CS_STALL; + if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = intel_ring_begin(rq, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -618,19 +626,27 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + struct drm_i915_private *i915 = rq->engine->i915; + u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + /* Wa_1409600907 */ + flags |= PIPE_CONTROL_DEPTH_STALL; + + if (rq->engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_FLAGS; + cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + flags); return gen12_emit_fini_breadcrumb_tail(rq, cs); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index f8253012d166..d112ffd56418 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -228,11 +228,14 @@ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */ #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */ #define PIPE_CONTROL_FLUSH_L3 (1<<27) +#define PIPE_CONTROL_AMFS_FLUSH (1<<25) /* gen12+ */ #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_PSD_SYNC (1<<17) /* gen11+ */ #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) @@ -254,6 +257,18 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ +/* 3D-related flags can't be set on compute engine */ +#define PIPE_CONTROL_3D_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + #define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) /* Opcodes for MI_MATH_INSTR */ -- cgit From c674c5b9342e5cb0f3d9e9bcaf37dbe2087845e5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:41 -0800 Subject: drm/i915/xehp: CCS should use RCS setup functions The compute engine handles the same commands the render engine can (except 3D pipeline), so it makes sense that CCS is more similar to RCS than non-render engines. The CCS context state (lrc) is also similar to the render one, so reuse it. Note that the compute engine has its own CTX_R_PWR_CLK_STATE register. In order to avoid having multiple RCS && CCS checks, add the following engine flag: - I915_ENGINE_HAS_RCS_REG_STATE - use the render (larger) reg state ctx. BSpec: 46260 Original-author: Michel Thierry Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 8 +++++--- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index bd60d42238fb..7609db87df05 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -885,7 +885,9 @@ out_file: return err; } -static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) +static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, + struct i915_vma *vma, + struct intel_engine_cs *engine) { u32 *cmd; @@ -896,7 +898,7 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *v return PTR_ERR(cmd); *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE)); + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); *cmd++ = lower_32_bits(vma->node.start); *cmd++ = upper_32_bits(vma->node.start); *cmd = MI_BATCH_BUFFER_END; @@ -957,7 +959,7 @@ retry: if (err) goto err_vma; - err = rpcs_query_batch(rpcs, vma); + err = rpcs_query_batch(rpcs, vma, ce->engine); if (err) goto err_batch; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3150c0847f65..edba18c942cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -208,6 +208,8 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); switch (class) { + case COMPUTE_CLASS: + fallthrough; case RENDER_CLASS: switch (GRAPHICS_VER(gt->i915)) { default: @@ -431,6 +433,10 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0; + /* features common between engines sharing EUs */ + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) + engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->defaults = engine->props; /* never to change again */ engine->context_size = intel_engine_context_size(gt, engine->class); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f4533ccafbaf..5fa5f21bbf2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -524,6 +524,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) +#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) unsigned int flags; /* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 961d795220a3..47fca5ebfa76 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3480,7 +3480,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); - if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); lrc_init_wa_ctx(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 004e1216e654..d333400d29fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -623,7 +623,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && !intel_engine_has_relative_mmio(engine)); - if (engine->class == RENDER_CLASS) { + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -1619,7 +1619,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) unsigned int i; int err; - if (engine->class != RENDER_CLASS) + if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) return; switch (GRAPHICS_VER(engine->i915)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 810910e8042e..a5c17bb4edfe 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3776,7 +3776,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc_default_irqs(engine); guc_init_breadcrumbs(engine); - if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); lrc_init_wa_ctx(engine); -- cgit From f4c1fdb93992ffc55899f38ddebcc0e1c390226e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:42 -0800 Subject: drm/i915: Move context descriptor fields to intel_lrc.h This is a more appropriate header for these definitions. v2: - Cleanup whitespace. (Lucas) Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 34 ------------------------------- drivers/gpu/drm/i915/gt/intel_lrc.h | 34 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index edba18c942cf..b0982a9e4476 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,6 +21,7 @@ #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_gt_pm.h" +#include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 69b826a3c381..84f189738a68 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1499,38 +1499,4 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) -enum { - INTEL_ADVANCED_CONTEXT = 0, - INTEL_LEGACY_32B_CONTEXT, - INTEL_ADVANCED_AD_CONTEXT, - INTEL_LEGACY_64B_CONTEXT -}; - -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; - -#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) -#define GEN8_CTX_VALID (1 << 0) -#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) -#define GEN8_CTX_FORCE_RESTORE (1 << 2) -#define GEN8_CTX_L3LLC_COHERENT (1 << 5) -#define GEN8_CTX_PRIVILEGE (1 << 8) -#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 -#define GEN11_SW_CTX_ID_SHIFT 37 -#define GEN11_SW_CTX_ID_WIDTH 11 -#define GEN11_ENGINE_CLASS_SHIFT 61 -#define GEN11_ENGINE_CLASS_WIDTH 3 -#define GEN11_ENGINE_INSTANCE_SHIFT 48 -#define GEN11_ENGINE_INSTANCE_WIDTH 6 -#define XEHP_SW_CTX_ID_SHIFT 39 -#define XEHP_SW_CTX_ID_WIDTH 16 -#define XEHP_SW_COUNTER_SHIFT 58 -#define XEHP_SW_COUNTER_WIDTH 6 - #endif /* __INTEL_GT_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 0b76f096b559..bb0e6c5b9922 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -69,4 +69,38 @@ void lrc_check_regs(const struct intel_context *ce, void lrc_update_runtime(struct intel_context *ce); +enum { + INTEL_ADVANCED_CONTEXT = 0, + INTEL_LEGACY_32B_CONTEXT, + INTEL_ADVANCED_AD_CONTEXT, + INTEL_LEGACY_64B_CONTEXT +}; + +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) +#define GEN8_CTX_VALID (1 << 0) +#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) +#define GEN8_CTX_FORCE_RESTORE (1 << 2) +#define GEN8_CTX_L3LLC_COHERENT (1 << 5) +#define GEN8_CTX_PRIVILEGE (1 << 8) +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 +#define XEHP_SW_COUNTER_SHIFT 58 +#define XEHP_SW_COUNTER_WIDTH 6 + #endif /* __INTEL_LRC_H__ */ -- cgit From adfadb5638bf32e97326ec05ae379be561e13677 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:43 -0800 Subject: drm/i915/xehp: Define context scheduling attributes in lrc descriptor In Dual Context mode the EUs are shared between render and compute command streamers. The hardware provides a field in the lrc descriptor to indicate the prioritization of the thread dispatch associated to the corresponding context. The context priority is set to 'low' at creation time and relies on the existing context priority to set it to low/normal/high. Bspec: 46145, 46260 Original-author: Michel Thierry Cc: Tvrtko Ursulin Signed-off-by: Aravind Iddamsetty Signed-off-by: Prasad Nallani Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +++++- drivers/gpu/drm/i915/gt/intel_lrc.h | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b0982a9e4476..2136c56d3abc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -435,8 +435,10 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->props.preempt_timeout_ms = 0; /* features common between engines sharing EUs */ - if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; + } engine->defaults = engine->props; /* never to change again */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5fa5f21bbf2d..19ff8758e34d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -525,6 +525,7 @@ struct intel_engine_cs { #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) +#define I915_ENGINE_HAS_EU_PRIORITY BIT(10) unsigned int flags; /* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 47fca5ebfa76..c8407cc96c42 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -665,9 +665,13 @@ static inline void execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc.desc; + u64 desc; u32 tail, prev; + desc = ce->lrc.desc; + if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + desc |= lrc_desc_priority(rq_prio(rq)); + /* * WaIdleLiteRestore:bdw,skl * diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index bb0e6c5b9922..6e4f9f58fca5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -6,6 +6,9 @@ #ifndef __INTEL_LRC_H__ #define __INTEL_LRC_H__ +#include "i915_priolist_types.h" + +#include #include struct drm_i915_gem_object; @@ -90,6 +93,10 @@ enum { #define GEN8_CTX_L3LLC_COHERENT (1 << 5) #define GEN8_CTX_PRIVILEGE (1 << 8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN12_CTX_PRIORITY_MASK GENMASK(10, 9) +#define GEN12_CTX_PRIORITY_HIGH FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 2) +#define GEN12_CTX_PRIORITY_NORMAL FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 1) +#define GEN12_CTX_PRIORITY_LOW FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 0) #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 #define GEN11_SW_CTX_ID_SHIFT 37 @@ -103,4 +110,14 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 +static inline u32 lrc_desc_priority(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + #endif /* __INTEL_LRC_H__ */ -- cgit From 87cb6d80f2d196427e64d2e6179ee9b1a3609dce Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 16:15:54 -0800 Subject: drm/i915/xehp: Enable ccs/dual-ctx in RCU_MODE We have to specify in the Render Control Unit Mode register when CCS is enabled. v2: - Move RCU_MODE programming to a helper function. (Tvrtko) - Clean up and clarify comments. (Tvrtko) - Add RCU_MODE to the GuC save/restore list. (Daniele) v3: - Move this patch before the GuC ADS update to enable compute engines; the definition of RCU_MODE and its insertion into the save/restore list moves to this patch. (Daniele) v4: - Call xehp_enable_ccs_engines() directly in guc_resume() and execlists_resume() rather than adding an extra layer of wrapping to the engine->resume() vfunc. (Umesh) Bspec: 46034 Original-author: Michel Thierry Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Cc: Umesh Nerlige Ramappa Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20220302001554.1836066-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 3 +++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 4 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +++ 6 files changed, 32 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index be4b1e65442f..1c0ab05c3c40 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, void intel_engine_destroy_pinned_context(struct intel_context *ce); +void xehp_enable_ccs_engines(struct intel_engine_cs *engine); + #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2136c56d3abc..92f4cf9833ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2070,6 +2070,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) return active; } +void xehp_enable_ccs_engines(struct intel_engine_cs *engine) +{ + /* + * If there are any non-fused-off CCS engines, we need to enable CCS + * support in the RCU_MODE register. This only needs to be done once, + * so for simplicity we'll take care of this in the RCS engine's + * resume handler; since the RCS and all CCS engines belong to the + * same reset domain and are reset together, this will also take care + * of re-applying the setting after i915-triggered resets. + */ + if (!CCS_MASK(engine->gt)) + return; + + intel_uncore_write(engine->uncore, GEN12_RCU_MODE, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c8407cc96c42..3e0c81f06bd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2911,6 +2911,9 @@ static int execlists_resume(struct intel_engine_cs *engine) enable_execlists(engine); + if (engine->class == RENDER_CLASS) + xehp_enable_ccs_engines(engine); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 84f189738a68..e629443e07ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1327,6 +1327,9 @@ #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8) +#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) + #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 847e00390b00..29fbe4681ca7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -335,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + if (engine->class == RENDER_CLASS && + CCS_MASK(engine->gt)) + ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a5c17bb4edfe..1ce7e04aa837 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3595,6 +3595,9 @@ static int guc_resume(struct intel_engine_cs *engine) setup_hwsp(engine); start_engine(engine); + if (engine->class == RENDER_CLASS) + xehp_enable_ccs_engines(engine); + return 0; } -- cgit From ea4ca894a160002f4488324ec39083d992cc7163 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 1 Mar 2022 15:15:45 -0800 Subject: drm/i915/xehp/guc: enable compute engine inside GuC Tell GuC that CCS is enabled by setting the CCS mask in its ADS. Cc: Vinay Belgaumkar Original-author: Michel Thierry Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 29fbe4681ca7..9bb551b83e7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -434,6 +434,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, struct iosys_map *info_map) { info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt)); -- cgit From e393e2aa0ad7ae0d187de93f4cbcfc480d28b5f6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 1 Mar 2022 15:15:46 -0800 Subject: drm/i915/xehp: Don't support parallel submission on compute / render A different emit breadcrumbs ring programming is required for compute / render and we don't have UMD user so just reject parallel submission for these engine classes. Signed-off-by: Matthew Brost Signed-off-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-11-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 ++++++++++ drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c | 4 ++++ 2 files changed, 14 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index bc6d59df064d..9ae294eb7fb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -670,6 +670,16 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, goto out_err; } + /* + * We don't support breadcrumb handshake on these + * classes + */ + if (siblings[n]->class == RENDER_CLASS || + siblings[n]->class == COMPUTE_CLASS) { + err = -EINVAL; + goto out_err; + } + if (n) { if (prev_engine.engine_class != ci.engine_class) { diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 1297ddbf7f88..812220a43df8 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -154,6 +154,10 @@ static int intel_guc_multi_lrc_basic(void *arg) int ret; for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + /* We don't support breadcrumb handshake on these classes */ + if (class == COMPUTE_CLASS || class == RENDER_CLASS) + continue; + ret = __intel_guc_multi_lrc_basic(gt, class); if (ret) return ret; -- cgit From 88ed07cb2737e15b7ea412dd8ab37de2397cccdf Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 1 Mar 2022 21:20:08 -0800 Subject: drm/i915/xehp: handle fused off CCS engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HW resources are divided across the active CCS engines at the compute slice level, with each CCS having priority on one of the cslices. If a compute slice has no enabled DSS, its paired compute engine is not usable in full parallel execution because the other ones already fully saturate the HW, so consider it fused off. v2 (José): - moved it to its own function - fixed definition of ccs_mask v3 (Matt): - Replace fls() condition with a simple IP version test v4 (Matt): - Don't try to calculate a ccs_mask using intel_slicemask_from_dssmask() until we've determined that we're running on an Xe_HP platform where the logic makes sense (and won't overflow). Cc: Stuart Summers Cc: Vinay Belgaumkar Cc: Ashutosh Dixit Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220302052008.1884985-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 17 ++++++++++++++--- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- 3 files changed, 42 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 92f4cf9833ee..e1aa78b20d2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, return false; } +static void engine_mask_apply_compute_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; + unsigned long ccs_mask; + unsigned int i; + + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + return; + + ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), + ss_per_ccs); + /* + * If all DSS in a quadrant are fused off, the corresponding CCS + * engine is not available for use. + */ + for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { + info->engine_mask &= ~BIT(_CCS(i)); + drm_dbg(&i915->drm, "ccs%u fused off\n", i); + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); + engine_mask_apply_compute_fuses(gt); + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 29118c652811..4ac0bbaf0c31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +static u32 +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, + const u8 *subslice_mask, u8 slice) { int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) GEM_BUG_ON(slice >= sseu->max_slices); for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)sseu->subslice_mask[offset + i] << - i * BITS_PER_BYTE; + mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; return mask; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); +} + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) +{ + return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); +} + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 60882a74741e..8a79cd8eaab4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask); -- cgit From ff6b19d3a0f939465b1e40040c4c4869154bf516 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Mar 2022 15:15:48 -0800 Subject: drm/i915/xehp: Add compute workarounds Additional workarounds are required once we start exposing CCS engines. Note that we have a number of workarounds that update registers in the shared render/compute reset domain. Historically we've just added such registers to the RCS engine's workaround list. But going forward we should be more careful to place such workarounds on a wa_list for an engine that definitely exists and is not fused off (e.g., a platform with no RCS would never apply the RCS wa_list). We'll keep rcs_engine_wa_init() focused on RCS-specific workarounds that only need to be applied if the RCS engine is present. A separate general_render_compute_wa_init() function will be used to define workarounds that touch registers in the shared render/compute reset domain and that we need to apply regardless of what render and/or compute engines actually exist. Any workarounds defined in this new function will internally be added to the first present RCS or CCS engine's workaround list to ensure they get applied (and only get applied once rather than being needlessly re-applied several times). Co-author: Srinivasan Shanmugam Signed-off-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-13-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 8 +++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 47 +++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index e629443e07ae..19cd34f24263 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1060,6 +1060,7 @@ #define FLOW_CONTROL_ENABLE REG_BIT(15) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) +#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8) #define STALL_DOP_GATING_DISABLE REG_BIT(5) #define THROTTLE_12_5 REG_GENMASK(4, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d333400d29fe..07bef7128fdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1217,6 +1217,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_timestamp_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); + /* Wa_16013000631:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(ce->engine->i915)) + if (ce->engine->class == COMPUTE_CLASS) + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, + 0); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0471d475e680..0b9435d62808 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1921,6 +1921,11 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); break; + case COMPUTE_CLASS: + /* Wa_16011157294:dg2_g10 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + break; default: break; } @@ -2581,6 +2586,40 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } +/* + * The workarounds in this function apply to shared registers in + * the general render reset domain that aren't tied to a + * specific engine. Since all render+compute engines get reset + * together, and the contents of these registers are lost during + * the shared render domain reset, we'll define such workarounds + * here and then add them to just a single RCS or CCS engine's + * workaround list (whichever engine has the XXXX flag). + */ +static void +general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_XEHPSDV(i915)) { + /* Wa_1409954639 */ + wa_masked_en(wal, + GEN8_ROW_CHICKEN, + SYSTOLIC_DOP_CLOCK_GATING_DIS); + + /* Wa_1607196519 */ + wa_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); + + /* Wa_14010670810:xehpsdv */ + wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + + /* Wa_14010449647:xehpsdv */ + wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + } +} + static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2589,6 +2628,14 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal engine_fake_wa_init(engine, wal); + /* + * These are common workarounds that just need to applied + * to a single RCS/CCS engine's workaround list since + * they're reset as part of the general render domain reset. + */ + if (engine->class == RENDER_CLASS) + general_render_compute_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else -- cgit From b2006061ae28fe7e84af6c9757ee89c4e505e92b Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 1 Mar 2022 15:15:49 -0800 Subject: drm/i915/xehpsdv: Move render/compute engine reset domains related workarounds Registers that exist in the shared render/compute reset domain need to be placed on an engine workaround list to ensure that they are properly re-applied whenever an RCS or CCS engine is reset. We have a number of workarounds (updating registers MLTICTXCTL, L3SQCREG1_CCS0, GEN12_MERT_MOD_CTRL, and GEN12_GAMCNTRL_CTRL) that are incorrectly implemented on the 'gt' workaround list and need to be moved accordingly. Cc: Matt Roper Signed-off-by: Srinivasan Shanmugam Signed-off-by: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220301231549.1817978-14-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0b9435d62808..c014b40d2e9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1343,12 +1343,6 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409757795:xehpsdv */ wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); - /* Wa_18011725039:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); - } - /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1385,19 +1379,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMTLBVEBOX0_CLKGATE_DIS); } - /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); - /* Wa_16012725990:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS); /* Wa_14011060649:xehpsdv */ wa_14011060649(gt, wal); - - /* Wa_14014368820:xehpsdv */ - wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | - GLOBAL_INVALIDATION_MODE); } static void @@ -2617,6 +2604,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_14010449647:xehpsdv */ wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* Wa_18011725039:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { + wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + } + + /* Wa_14012362059:xehpsdv */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_14014368820:xehpsdv */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); } } -- cgit From 804f468853179b9b58af05c153c411931aa5b310 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Fri, 25 Feb 2022 09:02:28 +0200 Subject: drm/i915/psr: Set "SF Partial Frame Enable" also on full update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are observing occasional screen flickering when PSR2 selective fetch is enabled. More specifically glitch seems to happen on full frame update when cursor moves to coords x = -1 or y = -1. According to Bspec SF Single full frame should not be set if SF Partial Frame Enable is not set. This happened to be true for ADLP as PSR2_MAN_TRK_CTL_ENABLE is always set and for ADL_P it's actually "SF Partial Frame Enable" (Bit 31). Setting "SF Partial Frame Enable" bit also on full update seems to fix screen flickering. Also make code more clear by setting PSR2_MAN_TRK_CTL_ENABLE only if not on ADL_P. Bit 31 has different meaning in ADL_P. Bspec: 49274 v2: Fix Mihai Harpau email address v3: Modify commit message and remove unnecessary comment Tested-by: Lyude Paul Fixes: 7f6002e58025 ("drm/i915/display: Enable PSR2 selective fetch by default") Reported-by: Lyude Paul Cc: Mihai Harpau Cc: José Roberto de Souza Cc: Ville Syrjälä Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/5077 Signed-off-by: Jouni Högander Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220225070228.855138-1-jouni.hogander@intel.com (cherry picked from commit 8d5516d18b323cf7274d1cf5fe76f4a691f879c6) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_psr.c | 16 ++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index a1a663f362e7..00279e8c2775 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1406,6 +1406,13 @@ static inline u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; } +static inline u32 man_trk_ctl_partial_frame_bit_get(struct drm_i915_private *dev_priv) +{ + return IS_ALDERLAKE_P(dev_priv) ? + ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE : + PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; +} + static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -1510,7 +1517,13 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val = PSR2_MAN_TRK_CTL_ENABLE; + u32 val = 0; + + if (!IS_ALDERLAKE_P(dev_priv)) + val = PSR2_MAN_TRK_CTL_ENABLE; + + /* SF partial frame enable has to be set even on full update */ + val |= man_trk_ctl_partial_frame_bit_get(dev_priv); if (full_update) { /* @@ -1530,7 +1543,6 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, } else { drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); - val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c2bb33febb68..902e4c802a12 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4829,6 +4829,7 @@ enum { #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(12, 0) #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(31) #define ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(14) #define ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(13) -- cgit From 43d26c4fc6c446d766253d546f0083d78023d34a Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 22 Feb 2022 10:05:32 -0500 Subject: drm/i915/gvt: add the missing mdev attribute "name" The mdev attribute "name" is required by some middle software, e.g. KubeVirt, an open source SW that manages VM on Kubernetes cluster uses the mdev sysfs directory/file structure to discover mediated device in nodes in the cluster. v2: - Fix the missing defination in gvt_type_attrs. (Zhenyu) Cc: Zhenyu Wang Cc: Hui Chun Ong Cc: Terrence Xu Cc: Zhi Wang Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220222150532.9090-1-zhi.a.wang@intel.com Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e8d6c76e9234..057ec4490104 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -188,14 +188,29 @@ static ssize_t description_show(struct mdev_type *mtype, type->weight); } +static ssize_t name_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf) +{ + struct intel_vgpu_type *type; + struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; + + type = &gvt->types[mtype_get_type_group_id(mtype)]; + if (!type) + return 0; + + return sprintf(buf, "%s\n", type->name); +} + static MDEV_TYPE_ATTR_RO(available_instances); static MDEV_TYPE_ATTR_RO(device_api); static MDEV_TYPE_ATTR_RO(description); +static MDEV_TYPE_ATTR_RO(name); static struct attribute *gvt_type_attrs[] = { &mdev_type_attr_available_instances.attr, &mdev_type_attr_device_api.attr, &mdev_type_attr_description.attr, + &mdev_type_attr_name.attr, NULL, }; -- cgit From 4fe4ed07c815044755075eaad5fe1815436a060f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 23 Feb 2022 15:13:03 +0200 Subject: drm/i915: Avoid negative shift due to bigjoiner_pipes==0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bigjoiner_pipes==0 leads bigjoiner_master_pipe() to do BIT(ffs(0)-1) which is undefined behaviour. The code should actually still work fine since the only place we provoke that is intel_crtc_bigjoiner_slave_pipes() and it'll bitwise AND the result with 0, so doesn't really matter what we get out of bigjoiner_master_pipe(). But best not provoke undefined behaviour anyway. Reported-by: kernel test robot Fixes: a6e7a006f5d5 ("drm/i915: Change bigjoiner state tracking to use the pipe bitmask") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220223131315.18016-2-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare (cherry picked from commit cccc71b552a1040ad3d738d7ec95570801fb0bf6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 80b19c304c43..f3f5f11a5abf 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -346,7 +346,10 @@ static enum pipe bigjoiner_master_pipe(const struct intel_crtc_state *crtc_state u8 intel_crtc_bigjoiner_slave_pipes(const struct intel_crtc_state *crtc_state) { - return crtc_state->bigjoiner_pipes & ~BIT(bigjoiner_master_pipe(crtc_state)); + if (crtc_state->bigjoiner_pipes) + return crtc_state->bigjoiner_pipes & ~BIT(bigjoiner_master_pipe(crtc_state)); + else + return 0; } bool intel_crtc_is_bigjoiner_slave(const struct intel_crtc_state *crtc_state) -- cgit From 117f5bb31c8cdb1e8f0b443f2a0ac761bf54694c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 14 Feb 2022 12:55:29 +0200 Subject: drm/i915: Don't skip ddb allocation if data_rate==0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit data_rate==0 no longer means a plane is disabled, it could also mean we want to use the minimum ddb allocation for it. Hence we can't bail out early during ddb allocation or else we'll simply forget to allocate any ddb for such planes. Cc: Stanislav Lisovskiy Fixes: 6a4d8cc6bbbf ("drm/i915: Don't allocate extra ddb during async flip for DG2") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220214105532.13049-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit 6475e106821babc0dc478a9cb3fc3973739c43fb) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_pm.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5af16ca4dabd..71f7fba2c9e2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5145,12 +5145,15 @@ skl_allocate_plane_ddb(struct skl_plane_ddb_iter *iter, const struct skl_wm_level *wm, u64 data_rate) { - u16 extra; + u16 extra = 0; - extra = min_t(u16, iter->size, - DIV64_U64_ROUND_UP(iter->size * data_rate, iter->data_rate)); - iter->size -= extra; - iter->data_rate -= data_rate; + if (data_rate) { + extra = min_t(u16, iter->size, + DIV64_U64_ROUND_UP(iter->size * data_rate, + iter->data_rate)); + iter->size -= extra; + iter->data_rate -= data_rate; + } return wm->min_ddb_alloc + extra; } @@ -5193,9 +5196,6 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, skl_ddb_entry_init(&crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR], alloc->end - iter.total[PLANE_CURSOR], alloc->end); - if (iter.data_rate == 0) - return 0; - /* * Find the highest watermark level for which we can satisfy the block * requirement of active planes. @@ -5234,6 +5234,10 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, return -EINVAL; } + /* avoid the WARN later when we don't allocate any extra DDB */ + if (iter.data_rate == 0) + iter.size = 0; + /* * Grant each plane the blocks it requires at the highest achievable * watermark level, plus an extra share of the leftover blocks @@ -5246,20 +5250,10 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, if (plane_id == PLANE_CURSOR) continue; - /* - * We've accounted for all active planes; remaining planes are - * all disabled. - */ - if (iter.data_rate == 0) - break; - iter.total[plane_id] = skl_allocate_plane_ddb(&iter, &wm->wm[level], crtc_state->plane_data_rate[plane_id]); - if (iter.data_rate == 0) - break; - iter.uv_total[plane_id] = skl_allocate_plane_ddb(&iter, &wm->uv_wm[level], crtc_state->uv_plane_data_rate[plane_id]); -- cgit From 176c0b55d9bfe6e2a7c8ccf3edaec7c92d856b2e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 14 Feb 2022 12:55:30 +0200 Subject: drm/i915: Check async flip capability early on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the async flip state check is done very late and thus it can see potentially all the planes in the state (due to the wm/ddb optimization) we need to move the "can the requested plane do async flips at all?" check much earlier. For this purpose we introduce intel_async_flip_check_uapi() that gets called early during the atomic check. And for good measure we'll throw in a couple of basic checks: - is the crtc active? - was a modeset flagged? - is+was the plane enabled? Though atm all of those should be guaranteed by the fact that the async flip can only be requested through the legacy page flip ioctl. Cc: Stanislav Lisovskiy Fixes: c3639f3be480 ("drm/i915: Use wm0 only during async flips for DG2") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220214105532.13049-3-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit b0b2bed2a1305c8f977c6b7d5fa162773693a212) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display.c | 79 +++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f3f5f11a5abf..7099c8aaf4fb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7401,7 +7401,7 @@ static void kill_bigjoiner_slave(struct intel_atomic_state *state, * Correspondingly, support is currently added for primary plane only. * * Async flip can only change the plane surface address, so anything else - * changing is rejected from the intel_atomic_check_async() function. + * changing is rejected from the intel_async_flip_check_hw() function. * Once this check is cleared, flip done interrupt is enabled using * the intel_crtc_enable_flip_done() function. * @@ -7411,7 +7411,65 @@ static void kill_bigjoiner_slave(struct intel_atomic_state *state, * correspond to the last vblank and have no relation to the actual time when * the flip done event was sent. */ -static int intel_atomic_check_async(struct intel_atomic_state *state, struct intel_crtc *crtc) +static int intel_async_flip_check_uapi(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *old_plane_state; + struct intel_plane_state *new_plane_state; + struct intel_plane *plane; + int i; + + if (!new_crtc_state->uapi.async_flip) + return 0; + + if (!new_crtc_state->uapi.active) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s] not active\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; + } + + if (intel_crtc_needs_modeset(new_crtc_state)) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s] modeset required\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; + } + + for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, + new_plane_state, i) { + if (plane->pipe != crtc->pipe) + continue; + + /* + * TODO: Async flip is only supported through the page flip IOCTL + * as of now. So support currently added for primary plane only. + * Support for other planes on platforms on which supports + * this(vlv/chv and icl+) should be added when async flip is + * enabled in the atomic IOCTL path. + */ + if (!plane->async_flip) { + drm_dbg_kms(&i915->drm, + "[PLANE:%d:%s] async flip not supported\n", + plane->base.base.id, plane->base.name); + return -EINVAL; + } + + if (!old_plane_state->uapi.fb || !new_plane_state->uapi.fb) { + drm_dbg_kms(&i915->drm, + "[PLANE:%d:%s] no old or new framebuffer\n", + plane->base.base.id, plane->base.name); + return -EINVAL; + } + } + + return 0; +} + +static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -7422,6 +7480,9 @@ static int intel_atomic_check_async(struct intel_atomic_state *state, struct int old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (!new_crtc_state->uapi.async_flip) + return 0; + if (intel_crtc_needs_modeset(new_crtc_state)) { drm_dbg_kms(&i915->drm, "Modeset Required. Async flip not supported\n"); return -EINVAL; @@ -7616,6 +7677,12 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + ret = intel_async_flip_check_uapi(state, crtc); + if (ret) + return ret; + } + ret = intel_bigjoiner_add_affected_crtcs(state); if (ret) goto fail; @@ -7772,11 +7839,9 @@ static int intel_atomic_check(struct drm_device *dev, for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (new_crtc_state->uapi.async_flip) { - ret = intel_atomic_check_async(state, crtc); - if (ret) - goto fail; - } + ret = intel_async_flip_check_hw(state, crtc); + if (ret) + goto fail; if (!intel_crtc_needs_modeset(new_crtc_state) && !new_crtc_state->update_pipe) -- cgit From 5c8107dc9a9f33a88a380aea79be564597d00663 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 14 Feb 2022 12:55:31 +0200 Subject: drm/i915: Fix the async flip wm0/ddb optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of the async flip wm0/ddb optimization does not work at all. The biggest problem is that we skip the whole intel_pipe_update_{start,end}() dance and thus never actually complete the commit that is trying to do the wm/ddb change. To fix this we need to move the do_async_flip flag to the crtc state since we handle commits per-pipe, not per-plane. Also since all planes can now be included in the first/last "async flip" (which gets converted to a sync flip to do the wm/ddb mangling) we need to be more careful when checking if the plane state is async flip comptatible. Only planes doing the async flip should be checked and other planes are perfectly fine not adhereing to any async flip related limitations. However for subsequent commits which are actually going do the async flip in hardware we want to make sure no other planes are in the state. That should never happen assuming we did our job correctly, so we'll toss in a WARN to make sure we catch any bugs here. Cc: Stanislav Lisovskiy Fixes: c3639f3be480 ("drm/i915: Use wm0 only during async flips for DG2") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220214105532.13049-4-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit 2e08437160d1e8f2cd3f0d56d59e74423602116e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_atomic.c | 1 + drivers/gpu/drm/i915/display/intel_atomic_plane.c | 7 ++-- drivers/gpu/drm/i915/display/intel_crtc.c | 4 +-- drivers/gpu/drm/i915/display/intel_display.c | 38 +++++++++++++--------- drivers/gpu/drm/i915/display/intel_display_types.h | 6 ++-- 5 files changed, 31 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index e0667d163266..40da7910f845 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -262,6 +262,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->preload_luts = false; crtc_state->inherited = false; crtc_state->wm.need_postvbl_update = false; + crtc_state->do_async_flip = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; crtc_state->dsb = NULL; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index c53aa6a4c7a0..5712688232fb 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -110,7 +110,6 @@ intel_plane_duplicate_state(struct drm_plane *plane) intel_state->ggtt_vma = NULL; intel_state->dpt_vma = NULL; intel_state->flags = 0; - intel_state->do_async_flip = false; /* add reference to fb */ if (intel_state->hw.fb) @@ -506,7 +505,7 @@ static int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_cr new_crtc_state->disable_lp_wm = true; if (intel_plane_do_async_flip(plane, old_crtc_state, new_crtc_state)) - new_plane_state->do_async_flip = true; + new_crtc_state->do_async_flip = true; return 0; } @@ -678,7 +677,7 @@ void intel_plane_update_arm(struct intel_plane *plane, trace_intel_plane_update_arm(&plane->base, crtc); - if (plane_state->do_async_flip) + if (crtc_state->do_async_flip && plane->async_flip) plane->async_flip(plane, crtc_state, plane_state, true); else plane->update_arm(plane, crtc_state, plane_state); @@ -703,7 +702,7 @@ void intel_crtc_planes_update_noarm(struct intel_atomic_state *state, struct intel_plane *plane; int i; - if (new_crtc_state->uapi.async_flip) + if (new_crtc_state->do_async_flip) return; /* diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 08ee3e17ee5c..65827481c1b1 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -485,7 +485,7 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI); DEFINE_WAIT(wait); - if (new_crtc_state->uapi.async_flip) + if (new_crtc_state->do_async_flip) return; if (intel_crtc_needs_vblank_work(new_crtc_state)) @@ -630,7 +630,7 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) ktime_t end_vbl_time = ktime_get(); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (new_crtc_state->uapi.async_flip) + if (new_crtc_state->do_async_flip) return; trace_intel_pipe_update_end(crtc, end_vbl_count, scanline_end); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7099c8aaf4fb..7dfeb458aa65 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1263,10 +1263,8 @@ static void intel_crtc_enable_flip_done(struct intel_atomic_state *state, int i; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - if (plane->enable_flip_done && - plane->pipe == crtc->pipe && - update_planes & BIT(plane->id) && - plane_state->do_async_flip) + if (plane->pipe == crtc->pipe && + update_planes & BIT(plane->id)) plane->enable_flip_done(plane); } } @@ -1282,10 +1280,8 @@ static void intel_crtc_disable_flip_done(struct intel_atomic_state *state, int i; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - if (plane->disable_flip_done && - plane->pipe == crtc->pipe && - update_planes & BIT(plane->id) && - plane_state->do_async_flip) + if (plane->pipe == crtc->pipe && + update_planes & BIT(plane->id)) plane->disable_flip_done(plane); } } @@ -7504,15 +7500,25 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in continue; /* - * TODO: Async flip is only supported through the page flip IOCTL - * as of now. So support currently added for primary plane only. - * Support for other planes on platforms on which supports - * this(vlv/chv and icl+) should be added when async flip is - * enabled in the atomic IOCTL path. + * Only async flip capable planes should be in the state + * if we're really about to ask the hardware to perform + * an async flip. We should never get this far otherwise. */ - if (!plane->async_flip) + if (drm_WARN_ON(&i915->drm, + new_crtc_state->do_async_flip && !plane->async_flip)) return -EINVAL; + /* + * Only check async flip capable planes other planes + * may be involved in the initial commit due to + * the wm0/ddb optimization. + * + * TODO maybe should track which planes actually + * were requested to do the async flip... + */ + if (!plane->async_flip) + continue; + /* * FIXME: This check is kept generic for all platforms. * Need to verify this for all gen9 platforms to enable @@ -8463,7 +8469,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_dbuf_pre_plane_update(state); for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { - if (new_crtc_state->uapi.async_flip) + if (new_crtc_state->do_async_flip) intel_crtc_enable_flip_done(state, crtc); } @@ -8489,7 +8495,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) drm_atomic_helper_wait_for_flip_done(dev, &state->base); for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { - if (new_crtc_state->uapi.async_flip) + if (new_crtc_state->do_async_flip) intel_crtc_disable_flip_done(state, crtc); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index b50d0e6efe21..776b3e6662f2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -613,9 +613,6 @@ struct intel_plane_state { struct intel_fb_view view; - /* Indicates if async flip is required */ - bool do_async_flip; - /* Plane pxp decryption state */ bool decrypt; @@ -951,6 +948,9 @@ struct intel_crtc_state { bool preload_luts; bool inherited; /* state inherited from BIOS? */ + /* Ask the hardware to actually async flip? */ + bool do_async_flip; + /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, * and get clipped at the edges. */ -- cgit From 5e7f44b5c2c035fe2e5458193c2bbee56db6a090 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 3 Mar 2022 10:02:29 +0000 Subject: drm/i915/gtt: reduce overzealous alignment constraints for GGTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently this will enforce both 2M alignment and padding for any LMEM pages inserted into the GGTT. However, this was only meant to be applied to the compact-pt layout with the ppGTT. For the GGTT we can reduce the alignment and padding to 64K. Bspec: 45015 Fixes: 87bd701ee268 ("drm/i915: enforce min GTT alignment for discrete cards") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Robert Beckett Cc: Ramalingam C Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220303100229.839282-1-matthew.auld@intel.com (cherry picked from commit c64fa77dd4609cb8cd53fbb73b02434ae8212c7a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4bcdfcab3642..a5f5b2dda332 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -234,7 +234,8 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, ARRAY_SIZE(vm->min_alignment)); - if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) { + if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) && + subclass == VM_CLASS_PPGTT) { vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; } else if (HAS_64K_PAGES(vm->i915)) { -- cgit From 1344794a59db2bd44b4919d2d75300fd3b1c2cd7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 8 Mar 2022 22:56:12 +0100 Subject: Kbuild: add -Wno-shift-negative-value where -Wextra is used As a preparation for moving to -std=gnu11, turn off the -Wshift-negative-value option. This warning is enabled by gcc when building with -Wextra for c99 or higher, but not for c89. Since the kernel already relies on well-defined overflow behavior, the warning is not helpful and can simply be disabled in all locations that use -Wextra. Signed-off-by: Arnd Bergmann Acked-by: Jani Nikula Reviewed-by: Nathan Chancellor Tested-by: Sedat Dilek # LLVM/Clang v13.0.0 (x86-64) Signed-off-by: Masahiro Yamada --- drivers/gpu/drm/i915/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1b62b9f65196..1618a6e0af4e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,6 +17,7 @@ subdir-ccflags-y += -Wno-unused-parameter subdir-ccflags-y += -Wno-type-limits subdir-ccflags-y += -Wno-missing-field-initializers subdir-ccflags-y += -Wno-sign-compare +subdir-ccflags-y += -Wno-shift-negative-value subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror -- cgit From 3886a86e7e6cc6ce2ce93c440fecd8f42aed0ce7 Mon Sep 17 00:00:00 2001 From: Mastan Katragadda Date: Thu, 3 Mar 2022 11:34:28 +0530 Subject: drm/i915/gem: add missing boundary check in vm_access A missing bounds check in vm_access() can lead to an out-of-bounds read or write in the adjacent memory area, since the len attribute is not validated before the memcpy later in the function, potentially hitting: [ 183.637831] BUG: unable to handle page fault for address: ffffc90000c86000 [ 183.637934] #PF: supervisor read access in kernel mode [ 183.637997] #PF: error_code(0x0000) - not-present page [ 183.638059] PGD 100000067 P4D 100000067 PUD 100258067 PMD 106341067 PTE 0 [ 183.638144] Oops: 0000 [#2] PREEMPT SMP NOPTI [ 183.638201] CPU: 3 PID: 1790 Comm: poc Tainted: G D 5.17.0-rc6-ci-drm-11296+ #1 [ 183.638298] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake H DDR4 RVP, BIOS CNLSFWR1.R00.X208.B00.1905301319 05/30/2019 [ 183.638430] RIP: 0010:memcpy_erms+0x6/0x10 [ 183.640213] RSP: 0018:ffffc90001763d48 EFLAGS: 00010246 [ 183.641117] RAX: ffff888109c14000 RBX: ffff888111bece40 RCX: 0000000000000ffc [ 183.642029] RDX: 0000000000001000 RSI: ffffc90000c86000 RDI: ffff888109c14004 [ 183.642946] RBP: 0000000000000ffc R08: 800000000000016b R09: 0000000000000000 [ 183.643848] R10: ffffc90000c85000 R11: 0000000000000048 R12: 0000000000001000 [ 183.644742] R13: ffff888111bed190 R14: ffff888109c14000 R15: 0000000000001000 [ 183.645653] FS: 00007fe5ef807540(0000) GS:ffff88845b380000(0000) knlGS:0000000000000000 [ 183.646570] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 183.647481] CR2: ffffc90000c86000 CR3: 000000010ff02006 CR4: 00000000003706e0 [ 183.648384] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 183.649271] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 183.650142] Call Trace: [ 183.650988] [ 183.651793] vm_access+0x1f0/0x2a0 [i915] [ 183.652726] __access_remote_vm+0x224/0x380 [ 183.653561] mem_rw.isra.0+0xf9/0x190 [ 183.654402] vfs_read+0x9d/0x1b0 [ 183.655238] ksys_read+0x63/0xe0 [ 183.656065] do_syscall_64+0x38/0xc0 [ 183.656882] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 183.657663] RIP: 0033:0x7fe5ef725142 [ 183.659351] RSP: 002b:00007ffe1e81c7e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 183.660227] RAX: ffffffffffffffda RBX: 0000557055dfb780 RCX: 00007fe5ef725142 [ 183.661104] RDX: 0000000000001000 RSI: 00007ffe1e81d880 RDI: 0000000000000005 [ 183.661972] RBP: 00007ffe1e81e890 R08: 0000000000000030 R09: 0000000000000046 [ 183.662832] R10: 0000557055dfc2e0 R11: 0000000000000246 R12: 0000557055dfb1c0 [ 183.663691] R13: 00007ffe1e81e980 R14: 0000000000000000 R15: 0000000000000000 Changes since v1: - Updated if condition with range_overflows_t [Chris Wilson] Fixes: 9f909e215fea ("drm/i915: Implement vm_ops->access for gdb access into mmaps") Signed-off-by: Mastan Katragadda Suggested-by: Adam Zabrocki Reported-by: Jackson Cody Cc: Chris Wilson Cc: Jon Bloomfield Cc: Sudeep Dutt Cc: # v5.8+ Reviewed-by: Matthew Auld [mauld: tidy up the commit message and add Cc: stable] Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220303060428.1668844-1-mastanx.katragadda@intel.com (cherry picked from commit 661412e301e2ca86799aa4f400d1cf0bd38c57c6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index efe69d6b86f4..c3ea243d414d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -455,7 +455,7 @@ vm_access(struct vm_area_struct *area, unsigned long addr, return -EACCES; addr -= area->vm_start; - if (addr >= obj->base.size) + if (range_overflows_t(u64, addr, len, obj->base.size)) return -EINVAL; i915_gem_ww_ctx_init(&ww, true); -- cgit From 3a84fd1ed53582b31e843a152ee3219e9e4ccb8c Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 11 Mar 2022 10:51:48 -0800 Subject: drm/i915/display: Fix HPD short pulse handling for eDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 13ea6db2cf24 ("drm/i915/edp: Ignore short pulse when panel powered off") completely broke short pulse handling for eDP as it is usually generated by sink when it is displaying image and there is some error or status that source needs to handle. When power panel is enabled, this state is enough to power aux transactions and VDD override is disabled, so intel_pps_have_power() is always returning false causing short pulses to be ignored. So here better naming this function that intends to check if aux lines are powered to avoid the endless cycle mentioned in the commit being fixed and fixing the check for what it is intended. v2: - renamed to intel_pps_have_panel_power_or_vdd() - fixed indentation Fixes: 13ea6db2cf24 ("drm/i915/edp: Ignore short pulse when panel powered off") Cc: Anshuman Gupta Cc: Jani Nikula Cc: Uma Shankar Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220311185149.110527-1-jose.souza@intel.com (cherry picked from commit 8f0c1c0949b609acfad62b8d5f742a3b5e7b05ab) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_pps.c | 6 +++--- drivers/gpu/drm/i915/display/intel_pps.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1046e7fe310a..d667657e3606 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4863,7 +4863,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) struct intel_dp *intel_dp = &dig_port->dp; if (dig_port->base.type == INTEL_OUTPUT_EDP && - (long_hpd || !intel_pps_have_power(intel_dp))) { + (long_hpd || !intel_pps_have_panel_power_or_vdd(intel_dp))) { /* * vdd off can generate a long/short pulse on eDP which * would require vdd on to handle it, and thus we diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 9c986e8932f8..64bd4ca0edd4 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -1075,14 +1075,14 @@ static void intel_pps_vdd_sanitize(struct intel_dp *intel_dp) edp_panel_vdd_schedule_off(intel_dp); } -bool intel_pps_have_power(struct intel_dp *intel_dp) +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp) { intel_wakeref_t wakeref; bool have_power = false; with_intel_pps_lock(intel_dp, wakeref) { - have_power = edp_have_panel_power(intel_dp) && - edp_have_panel_vdd(intel_dp); + have_power = edp_have_panel_power(intel_dp) || + edp_have_panel_vdd(intel_dp); } return have_power; diff --git a/drivers/gpu/drm/i915/display/intel_pps.h b/drivers/gpu/drm/i915/display/intel_pps.h index fbb47f6f453e..e64144659d31 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.h +++ b/drivers/gpu/drm/i915/display/intel_pps.h @@ -37,7 +37,7 @@ void intel_pps_vdd_on(struct intel_dp *intel_dp); void intel_pps_on(struct intel_dp *intel_dp); void intel_pps_off(struct intel_dp *intel_dp); void intel_pps_vdd_off_sync(struct intel_dp *intel_dp); -bool intel_pps_have_power(struct intel_dp *intel_dp); +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp); void intel_pps_wait_power_cycle(struct intel_dp *intel_dp); void intel_pps_init(struct intel_dp *intel_dp); -- cgit From 278da06c03655c2bb9bc36ebdf45b90a079b3bfd Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 11 Mar 2022 10:51:49 -0800 Subject: drm/i915/display: Do not re-enable PSR after it was marked as not reliable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a error happens and sink_not_reliable is set, PSR should be disabled for good but that is not happening. It would be disabled by the function handling the PSR error but then on the next fastset it would be enabled again in _intel_psr_post_plane_update(). It would only be disabled for good in the next modeset where has_psr will be set false. v2: - release psr lock before continue Fixes: 9ce5884e5139 ("drm/i915/display: Only keep PSR enabled if there is active planes") Reported-by: Khaled Almahallawy Reported-by: Charlton Lin Cc: Jouni Högander Signed-off-by: José Roberto de Souza Reviewed-by: Jouni Högander Link: https://patchwork.freedesktop.org/patch/msgid/20220311185149.110527-2-jose.souza@intel.com (cherry picked from commit 15f26bdc81f7f03561aaea5a10d87bd6638e1459) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 2e0b092f4b6b..5895b89df03a 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1837,6 +1837,9 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, mutex_lock(&psr->lock); + if (psr->sink_not_reliable) + goto exit; + drm_WARN_ON(&dev_priv->drm, psr->enabled && !crtc_state->active_planes); /* Only enable if there is active planes */ @@ -1847,6 +1850,7 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, if (crtc_state->crc_enabled && psr->enabled) psr_force_hw_tracking_exit(intel_dp); +exit: mutex_unlock(&psr->lock); } } -- cgit From 9cddf03b2af07443bebdc73cba21acb360c079e8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 11 Mar 2022 23:28:45 +0200 Subject: drm/i915: Reject unsupported TMDS rates on ICL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICL+ PLLs can't genenerate certain frequencies. Running the PLL algorithms through for all frequencies 25-594MHz we see a gap just above 500 MHz. Specifically 500-522.8MHZ for TC PLLs, and 500-533.2 MHz for combo PHY PLLs. Reject those frequencies hdmi_port_clock_valid() so that we properly filter out unsupported modes and/or color depths for HDMI. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5247 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220311212845.32358-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola (cherry picked from commit e5086cb3f3d3f94091be29eec38cf13f8a75a778) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_hdmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 1aa5bdc7b0dc..6512f014cad4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1836,6 +1836,7 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, bool has_hdmi_sink) { struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi); + enum phy phy = intel_port_to_phy(dev_priv, hdmi_to_dig_port(hdmi)->base.port); if (clock < 25000) return MODE_CLOCK_LOW; @@ -1856,6 +1857,14 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */ + if (intel_phy_is_combo(dev_priv, phy) && clock > 500000 && clock < 533200) + return MODE_CLOCK_RANGE; + + /* ICL+ TC PHY PLL can't generate 500-532.8 MHz */ + if (intel_phy_is_tc(dev_priv, phy) && clock > 500000 && clock < 532800) + return MODE_CLOCK_RANGE; + /* * SNPS PHYs' MPLLB table-based programming can only handle a fixed * set of link rates. -- cgit From 1937f3feb0e84089ae4065e09c871b8ab4676f01 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 9 Mar 2022 18:49:41 +0200 Subject: drm/i915: Treat SAGV block time 0 as SAGV disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For modern platforms the spec explicitly states that a SAGV block time of zero means that SAGV is not supported. Let's extend that to all platforms. Supposedly there should be no systems where this isn't true, and it'll allow us to: - use the same code regardless of older vs. newer platform - wm latencies already treat 0 as disabled, so this fits well with other related code - make it a bit more clear when SAGV is used vs. not - avoid overflows from adding U32_MAX with a u16 wm0 latency value which could cause us to miscalculate the SAGV watermarks on tgl+ Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220309164948.10671-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit d8f5855b31c0523ea3b171db8dfb998830e8735d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 71f7fba2c9e2..9333f732cda8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3698,8 +3698,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) MISSING_CASE(DISPLAY_VER(dev_priv)); } - /* Default to an unusable block time */ - dev_priv->sagv_block_time_us = -1; + dev_priv->sagv_block_time_us = 0; } /* @@ -5645,7 +5644,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; result->enable = true; - if (DISPLAY_VER(dev_priv) < 12) + if (DISPLAY_VER(dev_priv) < 12 && dev_priv->sagv_block_time_us) result->can_sagv = latency >= dev_priv->sagv_block_time_us; } @@ -5678,7 +5677,10 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0; struct skl_wm_level *levels = plane_wm->wm; - unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + unsigned int latency = 0; + + if (dev_priv->sagv_block_time_us) + latency = dev_priv->sagv_block_time_us + dev_priv->wm.skl_latency[0]; skl_compute_plane_wm(crtc_state, plane, 0, latency, wm_params, &levels[0], -- cgit From 3ef8b5e19ead5a79600ea55f9549658281415893 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 9 Mar 2022 18:49:46 +0200 Subject: drm/i915: Fix PSF GV point mask when SAGV is not possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't just mask off all the PSF GV points when SAGV gets disabled. This should in fact cause the Pcode to reject the request since at least one PSF point must remain enabled at all times. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: 192fbfb76744 ("drm/i915: Implement PSF GV point support") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220309164948.10671-7-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit 0fed4ddd18f064d2359b430c6e83ee60dd1f49b1) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_bw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index ad1564ca7269..adf58c58513b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -992,7 +992,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) * cause. */ if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { - allowed_points = BIT(max_bw_point); + allowed_points &= ADLS_PSF_PT_MASK; + allowed_points |= BIT(max_bw_point); drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", max_bw_point); } -- cgit From 00f4150d27d2c01eaeffe1091fc311a7c0872c69 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Mar 2022 16:45:37 -0700 Subject: drm/i915: Fix renamed struct field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier versions of commit a5b7ef27da60 ("drm/i915: Add struct to hold IP version") named "ver" as "arch" and then when it was renamed it missed the rename on MEDIA_VER_FULL() since it it's currently not used. Fixes: a5b7ef27da60 ("drm/i915: Add struct to hold IP version") Cc: José Roberto de Souza Cc: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220316234538.434357-1-lucas.demarchi@intel.com (cherry picked from commit b4ac33b973233dc08a56c8ef9d3c2edeab7a4370) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d134838b3458..fa14da84362e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -947,7 +947,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.ver, \ INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) -- cgit