summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_ppgtt.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2020-07-29 17:42:19 +0100
committerJoonas Lahtinen <joonas.lahtinen@linux.intel.com>2020-09-07 14:24:23 +0300
commit82adf901138cc0564656dc8dc3a47862a546cb2a (patch)
treea91ebb7a3198d28b64c2ef17b78cdfe8e5a6c248 /drivers/gpu/drm/i915/gt/intel_ppgtt.c
parent89351925a477441ae5fdd0136aec80b49ea1c53c (diff)
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a few pages). Since i915_page_directory is a page of pointers, plus a couple more, this is rounded up to 8K, and we waste nearly 50% of that allocation. Long terms this leads to poor memory utilisation, bloating the kernel footprint, but the problem is exacerbated by our conservative preallocation scheme for binding VMA. As we are required to allocate all levels for each vma just in case we need to insert them upon binding, this leads to a large multiplication factor for a single page vma. By halving the allocation we need for the page directory structure, we halve the impact of that factor, bringing workloads that once fitted into memory, hopefully back to fitting into memory. We maintain the split between i915_page_directory and i915_page_table as we only need half the allocation for the lowest, most populous, level. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_ppgtt.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c31
1 files changed, 24 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index ede6369a9092..46d9aceda64c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -28,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return pt;
}
-struct i915_page_directory *__alloc_pd(size_t sz)
+struct i915_page_directory *__alloc_pd(int count)
{
struct i915_page_directory *pd;
- pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
if (unlikely(!pd))
return NULL;
+ pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd->entry)) {
+ kfree(pd);
+ return NULL;
+ }
+
spin_lock_init(&pd->lock);
return pd;
}
@@ -44,12 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- pd = __alloc_pd(sizeof(*pd));
+ pd = __alloc_pd(I915_PDES);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(pd->pt.base)) {
+ kfree(pd->entry);
kfree(pd);
return ERR_PTR(-ENOMEM);
}
@@ -57,9 +64,19 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return pd;
}
-void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
+void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
{
- i915_gem_object_put(pt->base);
+ BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
+
+ if (lvl) {
+ struct i915_page_directory *pd =
+ container_of(pt, typeof(*pd), pt);
+ kfree(pd->entry);
+ }
+
+ if (pt->base)
+ i915_gem_object_put(pt->base);
+
kfree(pt);
}
@@ -82,7 +99,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
/* Each thread pre-pins the pd, and we may have a thread per pde. */
- GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+ GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
atomic_inc(px_used(pd));
pd->entry[idx] = to;
@@ -263,7 +280,7 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm,
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
while ((pt = stash->pt[n])) {
stash->pt[n] = pt->stash;
- free_px(vm, pt);
+ free_px(vm, pt, n);
}
}
}