diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_gtt.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gtt.c | 231 |
1 files changed, 192 insertions, 39 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index a94be0306464..afbc5c769308 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -10,11 +10,34 @@ #include <drm/drm_cache.h> +#include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" +#include "i915_reg.h" #include "i915_trace.h" +#include "i915_utils.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" +#include "intel_gt_print.h" +#include "intel_gt_regs.h" #include "intel_gtt.h" +bool i915_ggtt_require_binder(struct drm_i915_private *i915) +{ + /* Wa_13010847436 & Wa_14019519902 */ + return !i915_direct_stolen_access(i915) && + MEDIA_VER_FULL(i915) == IP_VER(13, 0); +} + +static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) +{ + return IS_BROXTON(i915) && i915_vtd_active(i915); +} + +bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) +{ + return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); +} + struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) { struct drm_i915_gem_object *obj; @@ -41,6 +64,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -62,6 +88,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -72,7 +101,17 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) enum i915_map_type type; void *vaddr; - type = i915_coherent_map_type(vm->i915, obj, true); + type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -86,7 +125,17 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object enum i915_map_type type; void *vaddr; - type = i915_coherent_map_type(vm->i915, obj, true); + type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -95,27 +144,51 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object return 0; } -void __i915_vm_close(struct i915_address_space *vm) +static void clear_vm_list(struct list_head *list) { struct i915_vma *vma, *vn; - if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) - return; - - list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + list_for_each_entry_safe(vma, vn, list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - /* Keep the obj (and hence the vma) alive as _we_ destroy it */ - if (!kref_get_unless_zero(&obj->base.refcount)) - continue; + if (!i915_gem_object_get_rcu(obj)) { + /* + * Object is dying, but has not yet cleared its + * vma list. + * Unbind the dying vma to ensure our list + * is completely drained. We leave the destruction to + * the object destructor to avoid the vma + * disappearing under it. + */ + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + + /* Remove from the unbound list */ + list_del_init(&vma->vm_link); + + /* + * Delay the vm and vm mutex freeing until the + * object is done with destruction. + */ + i915_vm_resv_get(vma->vm); + vma->vm_ddestroy = true; + } else { + i915_vma_destroy_locked(vma); + i915_gem_object_put(obj); + } + } +} - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - WARN_ON(__i915_vma_unbind(vma)); - __i915_vma_put(vma); +static void __i915_vm_close(struct i915_address_space *vm) +{ + mutex_lock(&vm->mutex); - i915_gem_object_put(obj); - } + clear_vm_list(&vm->bound_list); + clear_vm_list(&vm->unbound_list); + + /* Check for must-fix unanticipated side-effects */ GEM_BUG_ON(!list_empty(&vm->bound_list)); + GEM_BUG_ON(!list_empty(&vm->unbound_list)); mutex_unlock(&vm->mutex); } @@ -137,7 +210,6 @@ int i915_vm_lock_objects(struct i915_address_space *vm, void i915_address_space_fini(struct i915_address_space *vm) { drm_mm_takedown(&vm->mm); - mutex_destroy(&vm->mutex); } /** @@ -145,7 +217,8 @@ void i915_address_space_fini(struct i915_address_space *vm) * @kref: Pointer to the &i915_address_space.resv_ref member. * * This function is called when the last lock sharer no longer shares the - * &i915_address_space._resv lock. + * &i915_address_space._resv lock, and also if we raced when + * destroying a vma by the vma destruction */ void i915_vm_resv_release(struct kref *kref) { @@ -153,6 +226,8 @@ void i915_vm_resv_release(struct kref *kref) container_of(kref, typeof(*vm), resv_ref); dma_resv_fini(&vm->_resv); + mutex_destroy(&vm->mutex); + kfree(vm); } @@ -161,6 +236,11 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + __i915_vm_close(vm); + + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -189,8 +269,8 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); + vm->pending_unbind = RB_ROOT_CACHED; INIT_WORK(&vm->release_work, __i915_vm_release); - atomic_set(&vm->open, 1); /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -219,9 +299,19 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; + } + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->bound_list); + INIT_LIST_HEAD(&vm->unbound_list); } void *__px_vaddr(struct drm_i915_gem_object *p) @@ -250,7 +340,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) void *vaddr = __px_vaddr(p); memset64(vaddr, val, count); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); } static void poison_scratch_page(struct drm_i915_gem_object *scratch) @@ -283,7 +373,8 @@ int setup_scratch_page(struct i915_address_space *vm) */ size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && + !HAS_64K_PAGES(vm->i915)) size = I915_GTT_PAGE_SIZE_64K; do { @@ -325,18 +416,6 @@ skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; - /* - * If we need 64K minimum GTT pages for device local-memory, - * like on XEHPSDV, then we need to fail the allocation here, - * otherwise we can't safely support the insertion of - * local-memory pages for this vm, since the HW expects the - * correct physical alignment and size when the page-table is - * operating in 64K GTT mode, which includes any scratch PTEs, - * since userspace can still touch them. - */ - if (HAS_64K_PAGES(vm->i915)) - return -ENOMEM; - size = I915_GTT_PAGE_SIZE_4K; } while (1); } @@ -345,6 +424,9 @@ void free_scratch(struct i915_address_space *vm) { int i; + if (!vm->scratch[0]) + return; + for (i = 0; i <= vm->top; i++) i915_gem_object_put(vm->scratch[i]); } @@ -411,12 +493,50 @@ void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); - drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && - intel_uncore_read(uncore, - HSW_GTT_CACHE_EN) == 0); + gt_WARN_ON_ONCE(gt, can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); } } +static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), + MTL_PPAT_L4_0_WB); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), + MTL_PPAT_L4_1_WT); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), + MTL_PPAT_L4_3_UC); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), + MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), + MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* + * Remaining PAT entries are left at the hardware-default + * fully-cached setting + */ +} + +static void xelpg_setup_private_ppat(struct intel_gt *gt) +{ + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), + MTL_PPAT_L4_0_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), + MTL_PPAT_L4_1_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), + MTL_PPAT_L4_3_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), + MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), + MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* + * Remaining PAT entries are left at the hardware-default + * fully-cached setting + */ +} + static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ @@ -430,6 +550,29 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } +static void xehp_setup_private_ppat(struct intel_gt *gt) +{ + enum forcewake_domains fw; + unsigned long flags; + + fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), + FW_REG_WRITE); + intel_uncore_forcewake_get(gt->uncore, fw); + + intel_gt_mcr_lock(gt, &flags); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); + intel_gt_mcr_unlock(gt, flags); + + intel_uncore_forcewake_put(gt->uncore, fw); +} + static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, @@ -522,13 +665,23 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } -void setup_private_pat(struct intel_uncore *uncore) +void setup_private_pat(struct intel_gt *gt) { - struct drm_i915_private *i915 = uncore->i915; + struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *i915 = gt->i915; GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (GRAPHICS_VER(i915) >= 12) + if (gt->type == GT_MEDIA) { + xelpmp_setup_private_ppat(gt->uncore); + return; + } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_setup_private_ppat(gt); + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) + xehp_setup_private_ppat(gt); + else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); else if (GRAPHICS_VER(i915) >= 11) icl_setup_private_ppat(uncore); @@ -548,7 +701,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) if (IS_ERR(obj)) return ERR_CAST(obj); - i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { |
