diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen8_ppgtt.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 289 |
1 files changed, 190 insertions, 99 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 2128b7a72a25..398d60a66410 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -5,6 +5,7 @@ #include <linux/log2.h> +#include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gen8_ppgtt.h" @@ -29,7 +30,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr, } static u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -37,10 +38,12 @@ static u64 gen8_pte_encode(dma_addr_t addr, if (unlikely(flags & PTE_READ_ONLY)) pte &= ~GEN8_PAGE_RW; - if (flags & PTE_LM) - pte |= GEN12_PPGTT_PTE_LM; - - switch (level) { + /* + * For pre-gen12 platforms pat_index is the same as enum + * i915_cache_level, so the switch-case here is still valid. + * See translation table defined by LEGACY_CACHELEVEL. + */ + switch (pat_index) { case I915_CACHE_NONE: pte |= PPAT_UNCACHED; break; @@ -55,6 +58,33 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM; + + if (pat_index & BIT(0)) + pte |= GEN12_PPGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= GEN12_PPGTT_PTE_PAT1; + + if (pat_index & BIT(2)) + pte |= GEN12_PPGTT_PTE_PAT2; + + if (pat_index & BIT(3)) + pte |= MTL_PPGTT_PTE_PAT3; + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -193,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + if (vm->rsvd.obj) + i915_gem_object_put(vm->rsvd.obj); + if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); @@ -213,9 +246,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); do { @@ -223,8 +256,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && gen8_pd_contains(start, end, lvl)) { - DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", - __func__, vm, lvl + 1, idx, start, end); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); clear_pd_entry(pd, idx, scratch); __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); start += (u64)I915_PDES << gen8_pd_shift(lvl); @@ -241,10 +274,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, u64 *vaddr; count = gen8_pt_count(start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", - __func__, vm, lvl, start, end, - gen8_pd_index(start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); num_ptes = count; @@ -296,9 +329,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(*start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, *start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); spin_lock(&pd->lock); @@ -309,8 +342,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, if (!pt) { spin_unlock(&pd->lock); - DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", - __func__, vm, lvl + 1, idx); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); @@ -340,10 +373,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, } else { unsigned int count = gen8_pt_count(*start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", - __func__, vm, lvl, *start, end, - gen8_pd_index(*start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); atomic_add(count, &pt->used); /* All other pdes may be simultaneously removed */ @@ -423,11 +456,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, struct sgt_dma *iter, u64 idx, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -467,15 +500,16 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, } static void -xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - struct sgt_dma *iter, - enum i915_cache_level cache_level, - u32 flags) +xehp_ppgtt_insert_huge(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + struct sgt_dma *iter, + unsigned int pat_index, + u32 flags) { - const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; + u64 end = start + vma_res->vma_size; GEM_BUG_ON(!i915_vm_is_4lvl(vm)); @@ -489,9 +523,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, gen8_pte_t encode = pte_encode; unsigned int page_size; gen8_pte_t *vaddr; - u16 index, max; + u16 index, max, nent, i; max = I915_PDES; + nent = 1; if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && @@ -503,25 +538,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, vaddr = px_vaddr(pd); } else { - if (encode & GEN12_PPGTT_PTE_LM) { - GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); - GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); - GEM_BUG_ON(!IS_ALIGNED(iter->dma, - I915_GTT_PAGE_SIZE_64K)); - - index = __gen8_pte_index(start, 0) / 16; - page_size = I915_GTT_PAGE_SIZE_64K; - - max /= 16; - - vaddr = px_vaddr(pd); - vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; - pt->is_compact = true; - } else { - GEM_BUG_ON(pt->is_compact); - index = __gen8_pte_index(start, 0); - page_size = I915_GTT_PAGE_SIZE; + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + /* + * Device local-memory on these platforms should + * always use 64K pages or larger (including GTT + * alignment), therefore if we know the whole + * page-table needs to be filled we can always + * safely use the compact-layout. Otherwise fall + * back to the TLB hint with PS64. If this is + * system memory we only bother with PS64. + */ + if ((encode & GEN12_PPGTT_PTE_LM) && + end - start >= SZ_2M && !index) { + index = __gen8_pte_index(start, 0) / 16; + page_size = I915_GTT_PAGE_SIZE_64K; + + max /= 16; + + vaddr = px_vaddr(pd); + vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + + pt->is_compact = true; + } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + rem >= I915_GTT_PAGE_SIZE_64K && + !(index % 16)) { + encode |= GEN12_PTE_PS64; + page_size = I915_GTT_PAGE_SIZE_64K; + nent = 16; + } } vaddr = px_vaddr(pt); @@ -529,7 +576,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, do { GEM_BUG_ON(rem < page_size); - vaddr[index++] = encode | iter->dma; + + for (i = 0; i < nent; i++) { + vaddr[index++] = + encode | (iter->dma + i * + I915_GTT_PAGE_SIZE); + } start += page_size; iter->dma += page_size; @@ -551,6 +603,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, } } while (rem >= page_size && index < max); + drm_clflush_virt_range(vaddr, PAGE_SIZE); vma_res->page_sizes_gtt |= page_size; } while (iter->sg && sg_dma_len(iter->sg)); } @@ -558,10 +611,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, struct i915_vma_resource *vma_res, struct sgt_dma *iter, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -681,17 +734,17 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, static void gen8_ppgtt_insert(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma_res); if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { - if (HAS_64K_PAGES(vm->i915)) - xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55)) + xehp_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); else - gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); } else { u64 idx = vma_res->start >> GEN8_PTE_SHIFT; @@ -700,7 +753,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, gen8_pdp_for_page_index(vm, idx); idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, - cache_level, flags); + pat_index, flags); } while (idx); vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; @@ -710,7 +763,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { u64 idx = offset >> GEN8_PTE_SHIFT; @@ -724,15 +777,15 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } -static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) +static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + unsigned int pat_index, + u32 flags) { u64 idx = offset >> GEN8_PTE_SHIFT; struct i915_page_directory * const pdp = @@ -745,6 +798,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); + /* XXX: we don't strictly need to use this layout */ + if (!pt->is_compact) { vaddr = px_vaddr(pd); vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; @@ -752,20 +807,20 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); } -static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) +static void xehp_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + unsigned int pat_index, + u32 flags) { if (flags & PTE_LM) - return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, - level, flags); + return xehp_ppgtt_insert_entry_lm(vm, addr, offset, + pat_index, flags); - return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); + return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); } static int gen8_init_scratch(struct i915_address_space *vm) @@ -799,8 +854,10 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + i915_gem_get_pat_index(vm->i915, + I915_CACHE_NONE), + pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -897,6 +954,44 @@ err_pd: return ERR_PTR(err); } +static int gen8_init_rsvd(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + if (!intel_gt_needs_wa_16018031267(vm->gt)) + return 0; + + /* The memory will be used only by GPU. */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, + I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (ret) + goto unref; + + vm->rsvd.vma = i915_vma_make_unshrinkable(vma); + vm->rsvd.obj = obj; + vm->total -= vma->node.size; + return 0; +unref: + i915_gem_object_put(obj); + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -929,36 +1024,28 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) { + if (HAS_LMEM(gt->i915)) ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - - /* - * On some platforms the hw has dropped support for 4K GTT pages - * when dealing with LMEM, and due to the design of 64K GTT - * pages in the hw, we can only mark the *entire* page-table as - * operating in 64K GTT mode, since the enable bit is still on - * the pde, and not the pte. And since we still need to allow - * 4K GTT pages for SMEM objects, we can't have a "normal" 4K - * page-table with scratch pointing to LMEM, since that's - * undefined from the hw pov. The simplest solution is to just - * move the 64K scratch page to SMEM on such platforms and call - * it a day, since that should work for all configurations. - */ - if (HAS_64K_PAGES(gt->i915)) - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - else - ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; - } else { + else ppgtt->vm.alloc_pt_dma = alloc_pt_dma; - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - } - ppgtt->vm.pte_encode = gen8_pte_encode; + /* + * Using SMEM here instead of LMEM has the advantage of not reserving + * high performance memory for a "never" used filler page. It also + * removes the device access that would be required to initialise the + * scratch page, reducing pressure on an even scarcer resource. + */ + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; + else + ppgtt->vm.pte_encode = gen8_pte_encode; ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; if (HAS_64K_PAGES(gt->i915)) - ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; + ppgtt->vm.insert_page = xehp_ppgtt_insert_entry; else ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; @@ -986,6 +1073,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); + err = gen8_init_rsvd(&ppgtt->vm); + if (err) + goto err_put; + return ppgtt; err_put: |
