summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c823
1 files changed, 423 insertions, 400 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0cbcb9f54e7d..4c82c9544b93 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -25,7 +25,6 @@
*
*/
-#include <drm/drm_vma_manager.h>
#include <linux/dma-fence-array.h>
#include <linux/kthread.h>
#include <linux/dma-resv.h>
@@ -37,13 +36,16 @@
#include <linux/dma-buf.h>
#include <linux/mman.h>
-#include "display/intel_display.h"
-#include "display/intel_frontbuffer.h"
+#include <drm/drm_cache.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vma_manager.h>
#include "gem/i915_gem_clflush.h"
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_ioctls.h"
#include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_object_frontbuffer.h"
+#include "gem/i915_gem_pm.h"
#include "gem/i915_gem_region.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
@@ -51,10 +53,10 @@
#include "gt/intel_workarounds.h"
#include "i915_drv.h"
+#include "i915_file_private.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
-
-#include "intel_pm.h"
+#include "intel_clock_gating.h"
static int
insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
@@ -88,7 +90,8 @@ int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
@@ -113,11 +116,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags)
{
struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
+ bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
LIST_HEAD(still_in_list);
intel_wakeref_t wakeref;
struct i915_vma *vma;
int ret;
+ assert_object_held(obj);
+
if (list_empty(&obj->vma.list))
return 0;
@@ -135,8 +141,6 @@ try_again:
while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
struct i915_vma,
obj_link))) {
- struct i915_address_space *vm = vma->vm;
-
list_move_tail(&vma->obj_link, &still_in_list);
if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
continue;
@@ -146,24 +150,44 @@ try_again:
break;
}
+ /*
+ * Requiring the vm destructor to take the object lock
+ * before destroying a vma would help us eliminate the
+ * i915_vm_tryget() here, AND thus also the barrier stuff
+ * at the end. That's an easy fix, but sleeping locks in
+ * a kthread should generally be avoided.
+ */
ret = -EAGAIN;
- if (!i915_vm_tryopen(vm))
+ if (!i915_vm_tryget(vma->vm))
break;
- /* Prevent vma being freed by i915_vma_parked as we unbind */
- vma = __i915_vma_get(vma);
spin_unlock(&obj->vma.lock);
- if (vma) {
- ret = -EBUSY;
- if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
- !i915_vma_is_active(vma))
- ret = i915_vma_unbind(vma);
+ /*
+ * Since i915_vma_parked() takes the object lock
+ * before vma destruction, it won't race us here,
+ * and destroy the vma from under us.
+ */
- __i915_vma_put(vma);
+ ret = -EBUSY;
+ if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+ assert_object_held(vma->obj);
+ ret = i915_vma_unbind_async(vma, vm_trylock);
+ }
+
+ if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+ !i915_vma_is_active(vma))) {
+ if (vm_trylock) {
+ if (mutex_trylock(&vma->vm->mutex)) {
+ ret = __i915_vma_unbind(vma);
+ mutex_unlock(&vma->vm->mutex);
+ }
+ } else {
+ ret = i915_vma_unbind(vma);
+ }
}
- i915_vm_close(vm);
+ i915_vm_put(vma->vm);
spin_lock(&obj->vma.lock);
}
list_splice_init(&still_in_list, &obj->vma.list);
@@ -180,132 +204,6 @@ try_again:
}
static int
-i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
-{
- void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
- char __user *user_data = u64_to_user_ptr(args->data_ptr);
-
- /*
- * We manually control the domain here and pretend that it
- * remains coherent i.e. in the GTT domain, like shmem_pwrite.
- */
- i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
-
- if (copy_from_user(vaddr, user_data, args->size))
- return -EFAULT;
-
- drm_clflush_virt_range(vaddr, args->size);
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
-
- i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
- return 0;
-}
-
-static int
-i915_gem_create(struct drm_file *file,
- struct intel_memory_region *mr,
- u64 *size_p,
- u32 *handle_p)
-{
- struct drm_i915_gem_object *obj;
- u32 handle;
- u64 size;
- int ret;
-
- GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
- size = round_up(*size_p, mr->min_page_size);
- if (size == 0)
- return -EINVAL;
-
- /* For most of the ABI (e.g. mmap) we think in system pages */
- GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
-
- /* Allocate the new object */
- obj = i915_gem_object_create_region(mr, size, 0);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- ret = drm_gem_handle_create(file, &obj->base, &handle);
- /* drop reference from allocate - handle holds it now */
- i915_gem_object_put(obj);
- if (ret)
- return ret;
-
- *handle_p = handle;
- *size_p = size;
- return 0;
-}
-
-int
-i915_gem_dumb_create(struct drm_file *file,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args)
-{
- enum intel_memory_type mem_type;
- int cpp = DIV_ROUND_UP(args->bpp, 8);
- u32 format;
-
- switch (cpp) {
- case 1:
- format = DRM_FORMAT_C8;
- break;
- case 2:
- format = DRM_FORMAT_RGB565;
- break;
- case 4:
- format = DRM_FORMAT_XRGB8888;
- break;
- default:
- return -EINVAL;
- }
-
- /* have to work out size/pitch and return them */
- args->pitch = ALIGN(args->width * cpp, 64);
-
- /* align stride to page size so that we can remap */
- if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
- DRM_FORMAT_MOD_LINEAR))
- args->pitch = ALIGN(args->pitch, 4096);
-
- if (args->pitch < args->width)
- return -EINVAL;
-
- args->size = mul_u32_u32(args->pitch, args->height);
-
- mem_type = INTEL_MEMORY_SYSTEM;
- if (HAS_LMEM(to_i915(dev)))
- mem_type = INTEL_MEMORY_LOCAL;
-
- return i915_gem_create(file,
- intel_memory_region_by_type(to_i915(dev),
- mem_type),
- &args->size, &args->handle);
-}
-
-/**
- * Creates a new mm object and returns a handle to it.
- * @dev: drm device pointer
- * @data: ioctl data blob
- * @file: drm file pointer
- */
-int
-i915_gem_create_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_private *i915 = to_i915(dev);
- struct drm_i915_gem_create *args = data;
-
- i915_gem_flush_free_objects(i915);
-
- return i915_gem_create(file,
- intel_memory_region_by_type(i915,
- INTEL_MEMORY_SYSTEM),
- &args->size, &args->handle);
-}
-
-static int
shmem_pread(struct page *page, int offset, int len, char __user *user_data,
bool needs_clflush)
{
@@ -329,20 +227,26 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pread *args)
{
unsigned int needs_clflush;
- unsigned int idx, offset;
- struct dma_fence *fence;
char __user *user_data;
+ unsigned long offset;
+ pgoff_t idx;
u64 remain;
int ret;
- ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
- fence = i915_gem_object_lock_fence(obj);
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err_unlock;
+
+ ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+ if (ret)
+ goto err_unpin;
+
i915_gem_object_finish_access(obj);
- if (!fence)
- return -ENOMEM;
+ i915_gem_object_unlock(obj);
remain = args->size;
user_data = u64_to_user_ptr(args->data_ptr);
@@ -361,7 +265,13 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
offset = 0;
}
- i915_gem_object_unlock_fence(obj, fence);
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+err_unlock:
+ i915_gem_object_unlock(obj);
return ret;
}
@@ -389,52 +299,106 @@ gtt_user_read(struct io_mapping *mapping,
return unwritten;
}
-static int
-i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
- const struct drm_i915_gem_pread *args)
+static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj,
+ struct drm_mm_node *node,
+ bool write)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
- intel_wakeref_t wakeref;
- struct drm_mm_node node;
- struct dma_fence *fence;
- void __user *user_data;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct i915_vma *vma;
- u64 remain, offset;
+ struct i915_gem_ww_ctx ww;
int ret;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
vma = ERR_PTR(-ENODEV);
+ ret = i915_gem_object_lock(obj, &ww);
+ if (ret)
+ goto err_ww;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret)
+ goto err_ww;
+
if (!i915_gem_object_is_tiled(obj))
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
- if (!IS_ERR(vma)) {
- node.start = i915_ggtt_offset(vma);
- node.flags = 0;
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (vma == ERR_PTR(-EDEADLK)) {
+ ret = -EDEADLK;
+ goto err_ww;
+ } else if (!IS_ERR(vma)) {
+ node->start = i915_ggtt_offset(vma);
+ node->flags = 0;
} else {
- ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
+ ret = insert_mappable_node(ggtt, node, PAGE_SIZE);
if (ret)
- goto out_rpm;
- GEM_BUG_ON(!drm_mm_node_allocated(&node));
+ goto err_ww;
+ GEM_BUG_ON(!drm_mm_node_allocated(node));
+ vma = NULL;
}
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- goto out_unpin;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
+ ret = i915_gem_object_pin_pages(obj);
if (ret) {
- i915_gem_object_unlock(obj);
- goto out_unpin;
+ if (drm_mm_node_allocated(node)) {
+ ggtt->vm.clear_range(&ggtt->vm, node->start, node->size);
+ remove_mappable_node(ggtt, node);
+ } else {
+ i915_vma_unpin(vma);
+ }
}
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_unlock(obj);
- if (!fence) {
- ret = -ENOMEM;
- goto out_unpin;
+err_ww:
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ return ret ? ERR_PTR(ret) : vma;
+}
+
+static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj,
+ struct drm_mm_node *node,
+ struct i915_vma *vma)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+ i915_gem_object_unpin_pages(obj);
+ if (drm_mm_node_allocated(node)) {
+ ggtt->vm.clear_range(&ggtt->vm, node->start, node->size);
+ remove_mappable_node(ggtt, node);
+ } else {
+ i915_vma_unpin(vma);
+ }
+}
+
+static int
+i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pread *args)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+ unsigned long remain, offset;
+ intel_wakeref_t wakeref;
+ struct drm_mm_node node;
+ void __user *user_data;
+ struct i915_vma *vma;
+ int ret = 0;
+
+ if (overflows_type(args->size, remain) ||
+ overflows_type(args->offset, offset))
+ return -EINVAL;
+
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ vma = i915_gem_gtt_prepare(obj, &node, false);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out_rpm;
}
user_data = u64_to_user_ptr(args->data_ptr);
@@ -454,8 +418,11 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
page_length = remain < page_length ? remain : page_length;
if (drm_mm_node_allocated(&node)) {
ggtt->vm.insert_page(&ggtt->vm,
- i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
- node.start, I915_CACHE_NONE, 0);
+ i915_gem_object_get_dma_address(obj,
+ offset >> PAGE_SHIFT),
+ node.start,
+ i915_gem_get_pat_index(i915,
+ I915_CACHE_NONE), 0);
} else {
page_base += offset & PAGE_MASK;
}
@@ -471,21 +438,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
offset += page_length;
}
- i915_gem_object_unlock_fence(obj, fence);
-out_unpin:
- if (drm_mm_node_allocated(&node)) {
- ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
- remove_mappable_node(ggtt, &node);
- } else {
- i915_vma_unpin(vma);
- }
+ i915_gem_gtt_cleanup(obj, &node, vma);
out_rpm:
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
return ret;
}
/**
- * Reads data from the object referenced by handle.
+ * i915_gem_pread_ioctl - Reads data from the object referenced by handle.
* @dev: drm device pointer
* @data: ioctl data blob
* @file: drm file pointer
@@ -496,10 +456,17 @@ int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_pread *args = data;
struct drm_i915_gem_object *obj;
int ret;
+ /* PREAD is disallowed for all platforms after TGL-LP. This also
+ * covers all platforms with local memory.
+ */
+ if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915))
+ return -EOPNOTSUPP;
+
if (args->size == 0)
return 0;
@@ -518,6 +485,11 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
}
trace_i915_gem_object_pread(obj, args->offset, args->size);
+ ret = -ENODEV;
+ if (obj->ops->pread)
+ ret = obj->ops->pread(obj, args);
+ if (ret != -ENODEV)
+ goto out;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
@@ -525,15 +497,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
if (ret)
goto out;
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- goto out;
-
ret = i915_gem_shmem_pread(obj, args);
if (ret == -EFAULT || ret == -ENODEV)
ret = i915_gem_gtt_pread(obj, args);
- i915_gem_object_unpin_pages(obj);
out:
i915_gem_object_put(obj);
return ret;
@@ -567,7 +534,7 @@ ggtt_write(struct io_mapping *mapping,
}
/**
- * This is the fast pwrite path, where we copy the data directly from the
+ * i915_gem_gtt_pwrite_fast - This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached.
* @obj: i915 GEM object
* @args: pwrite arguments structure
@@ -577,15 +544,18 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *args)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_runtime_pm *rpm = &i915->runtime_pm;
+ unsigned long remain, offset;
intel_wakeref_t wakeref;
struct drm_mm_node node;
- struct dma_fence *fence;
struct i915_vma *vma;
- u64 remain, offset;
void __user *user_data;
- int ret;
+ int ret = 0;
+
+ if (overflows_type(args->size, remain) ||
+ overflows_type(args->offset, offset))
+ return -EINVAL;
if (i915_gem_object_has_struct_page(obj)) {
/*
@@ -603,37 +573,10 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
wakeref = intel_runtime_pm_get(rpm);
}
- vma = ERR_PTR(-ENODEV);
- if (!i915_gem_object_is_tiled(obj))
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
- if (!IS_ERR(vma)) {
- node.start = i915_ggtt_offset(vma);
- node.flags = 0;
- } else {
- ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
- if (ret)
- goto out_rpm;
- GEM_BUG_ON(!drm_mm_node_allocated(&node));
- }
-
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- goto out_unpin;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret) {
- i915_gem_object_unlock(obj);
- goto out_unpin;
- }
-
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_unlock(obj);
- if (!fence) {
- ret = -ENOMEM;
- goto out_unpin;
+ vma = i915_gem_gtt_prepare(obj, &node, true);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out_rpm;
}
i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
@@ -656,8 +599,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
/* flush the write before we modify the GGTT */
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
ggtt->vm.insert_page(&ggtt->vm,
- i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
- node.start, I915_CACHE_NONE, 0);
+ i915_gem_object_get_dma_address(obj,
+ offset >> PAGE_SHIFT),
+ node.start,
+ i915_gem_get_pat_index(i915,
+ I915_CACHE_NONE), 0);
wmb(); /* flush modifications to the GGTT (insert_page) */
} else {
page_base += offset & PAGE_MASK;
@@ -682,14 +628,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
- i915_gem_object_unlock_fence(obj, fence);
-out_unpin:
- if (drm_mm_node_allocated(&node)) {
- ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
- remove_mappable_node(ggtt, &node);
- } else {
- i915_vma_unpin(vma);
- }
+ i915_gem_gtt_cleanup(obj, &node, vma);
out_rpm:
intel_runtime_pm_put(rpm, wakeref);
return ret;
@@ -728,20 +667,26 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
{
unsigned int partial_cacheline_write;
unsigned int needs_clflush;
- unsigned int offset, idx;
- struct dma_fence *fence;
void __user *user_data;
+ unsigned long offset;
+ pgoff_t idx;
u64 remain;
int ret;
- ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
- fence = i915_gem_object_lock_fence(obj);
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err_unlock;
+
+ ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+ if (ret)
+ goto err_unpin;
+
i915_gem_object_finish_access(obj);
- if (!fence)
- return -ENOMEM;
+ i915_gem_object_unlock(obj);
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
@@ -770,13 +715,19 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
}
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
- i915_gem_object_unlock_fence(obj, fence);
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+err_unlock:
+ i915_gem_object_unlock(obj);
return ret;
}
/**
- * Writes data to the object referenced by handle.
+ * i915_gem_pwrite_ioctl - Writes data to the object referenced by handle.
* @dev: drm device
* @data: ioctl data blob
* @file: drm file
@@ -787,10 +738,17 @@ int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_pwrite *args = data;
struct drm_i915_gem_object *obj;
int ret;
+ /* PWRITE is disallowed for all platforms after TGL-LP. This also
+ * covers all platforms with local memory.
+ */
+ if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915))
+ return -EOPNOTSUPP;
+
if (args->size == 0)
return 0;
@@ -828,10 +786,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret)
goto err;
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- goto err;
-
ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
@@ -840,7 +794,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* perspective, requiring manual detiling by the client.
*/
if (!i915_gem_object_has_struct_page(obj) ||
- cpu_write_needs_clflush(obj))
+ i915_gem_cpu_write_needs_clflush(obj))
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case.
@@ -850,18 +804,15 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (i915_gem_object_has_struct_page(obj))
ret = i915_gem_shmem_pwrite(obj, args);
- else
- ret = i915_gem_phys_pwrite(obj, args, file);
}
- i915_gem_object_unpin_pages(obj);
err:
i915_gem_object_put(obj);
return ret;
}
/**
- * Called when user space has done writes to this buffer
+ * i915_gem_sw_finish_ioctl - Called when user space has done writes to this buffer
* @dev: drm device
* @data: ioctl data blob
* @file: drm file
@@ -897,21 +848,24 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
/*
* Only called during RPM suspend. All users of the userfault_list
* must be holding an RPM wakeref to ensure that this can not
- * run concurrently with themselves (and use the struct_mutex for
- * protection between themselves).
+ * run concurrently with themselves.
*/
list_for_each_entry_safe(obj, on,
- &i915->ggtt.userfault_list, userfault_link)
+ &to_gt(i915)->ggtt->userfault_list, userfault_link)
__i915_gem_object_release_mmap_gtt(obj);
+ list_for_each_entry_safe(obj, on,
+ &i915->runtime_pm.lmem_userfault_list, userfault_link)
+ i915_gem_object_runtime_pm_release_mmap_offset(obj);
+
/*
* The fence will be lost when the device powers down. If any were
* in use by hardware (i.e. they are pinned), we should not be powering
* down! All other fences will be reacquired by the user upon waking.
*/
- for (i = 0; i < i915->ggtt.num_fences; i++) {
- struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+ for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) {
+ struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i];
/*
* Ideally we want to assert that the fence register is not
@@ -933,20 +887,33 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
}
}
+static void discard_ggtt_vma(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ spin_lock(&obj->vma.lock);
+ if (!RB_EMPTY_NODE(&vma->obj_node)) {
+ rb_erase(&vma->obj_node, &obj->vma.tree);
+ RB_CLEAR_NODE(&vma->obj_node);
+ }
+ spin_unlock(&obj->vma.lock);
+}
+
struct i915_vma *
-i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view,
- u64 size,
- u64 alignment,
- u64 flags)
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ const struct i915_gtt_view *view,
+ u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct i915_vma *vma;
int ret;
+ GEM_WARN_ON(!ww);
+
if (flags & PIN_MAPPABLE &&
- (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
+ (!view || view->type == I915_GTT_VIEW_NORMAL)) {
/*
* If the required space is larger than the available
* aperture, we will not able to find a slot for the
@@ -979,6 +946,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOSPC);
}
+new_vma:
vma = i915_vma_instance(obj, &ggtt->vm, view);
if (IS_ERR(vma))
return vma;
@@ -988,17 +956,34 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
return ERR_PTR(-ENOSPC);
+ /*
+ * If this misplaced vma is too big (i.e, at-least
+ * half the size of aperture) or hasn't been pinned
+ * mappable before, we ignore the misplacement when
+ * PIN_NONBLOCK is set in order to avoid the ping-pong
+ * issue described above. In other words, we try to
+ * avoid the costly operation of unbinding this vma
+ * from the GGTT and rebinding it back because there
+ * may not be enough space for this vma in the aperture.
+ */
if (flags & PIN_MAPPABLE &&
- vma->fence_size > ggtt->mappable_end / 2)
+ (vma->fence_size > ggtt->mappable_end / 2 ||
+ !i915_vma_is_map_and_fenceable(vma)))
return ERR_PTR(-ENOSPC);
}
+ if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) {
+ discard_ggtt_vma(vma);
+ goto new_vma;
+ }
+
ret = i915_vma_unbind(vma);
if (ret)
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
+
if (ret)
return ERR_PTR(ret);
@@ -1017,6 +1002,29 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return vma;
}
+struct i915_vma * __must_check
+i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
+ const struct i915_gtt_view *view,
+ u64 size, u64 alignment, u64 flags)
+{
+ struct i915_gem_ww_ctx ww;
+ struct i915_vma *ret;
+ int err;
+
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ ret = i915_gem_object_ggtt_pin_ww(obj, &ww, view, size,
+ alignment, flags);
+ if (IS_ERR(ret))
+ err = PTR_ERR(ret);
+ }
+
+ return err ? ERR_PTR(err) : ret;
+}
+
int
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -1038,35 +1046,38 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
if (!obj)
return -ENOENT;
- err = mutex_lock_interruptible(&obj->mm.lock);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
if (i915_gem_object_has_pages(obj) &&
i915_gem_object_is_tiled(obj) &&
- i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+ i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES) {
if (obj->mm.madv == I915_MADV_WILLNEED) {
- GEM_BUG_ON(!obj->mm.quirked);
- __i915_gem_object_unpin_pages(obj);
- obj->mm.quirked = false;
+ GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj));
+ i915_gem_object_clear_tiling_quirk(obj);
+ i915_gem_object_make_shrinkable(obj);
}
if (args->madv == I915_MADV_WILLNEED) {
- GEM_BUG_ON(obj->mm.quirked);
- __i915_gem_object_pin_pages(obj);
- obj->mm.quirked = true;
+ GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
+ i915_gem_object_make_unshrinkable(obj);
+ i915_gem_object_set_tiling_quirk(obj);
}
}
- if (obj->mm.madv != __I915_MADV_PURGED)
+ if (obj->mm.madv != __I915_MADV_PURGED) {
obj->mm.madv = args->madv;
+ if (obj->ops->adjust_lru)
+ obj->ops->adjust_lru(obj);
+ }
- if (i915_gem_object_has_pages(obj)) {
- struct list_head *list;
-
- if (i915_gem_object_is_shrinkable(obj)) {
- unsigned long flags;
+ if (i915_gem_object_has_pages(obj) ||
+ i915_gem_object_has_self_managed_shrink_list(obj)) {
+ unsigned long flags;
- spin_lock_irqsave(&i915->mm.obj_lock, flags);
+ spin_lock_irqsave(&i915->mm.obj_lock, flags);
+ if (!list_empty(&obj->mm.link)) {
+ struct list_head *list;
if (obj->mm.madv != I915_MADV_WILLNEED)
list = &i915->mm.purge_list;
@@ -1074,8 +1085,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
list = &i915->mm.shrink_list;
list_move_tail(&obj->mm.link, list);
- spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
+ spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
/* if the object is no longer attached, discard its backing storage */
@@ -1084,28 +1095,79 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
i915_gem_object_truncate(obj);
args->retained = obj->mm.madv != __I915_MADV_PURGED;
- mutex_unlock(&obj->mm.lock);
+ i915_gem_object_unlock(obj);
out:
i915_gem_object_put(obj);
return err;
}
+/*
+ * A single pass should suffice to release all the freed objects (along most
+ * call paths), but be a little more paranoid in that freeing the objects does
+ * take a little amount of time, during which the rcu callbacks could have added
+ * new objects into the freed list, and armed the work again.
+ */
+void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
+{
+ while (atomic_read(&i915->mm.free_count)) {
+ flush_work(&i915->mm.free_work);
+ drain_workqueue(i915->bdev.wq);
+ rcu_barrier();
+ }
+}
+
+/*
+ * Similar to objects above (see i915_gem_drain_freed-objects), in general we
+ * have workers that are armed by RCU and then rearm themselves in their
+ * callbacks. To be paranoid, we need to drain the workqueue a second time after
+ * waiting for the RCU grace period so that we catch work queued via RCU from
+ * the first pass. As neither drain_workqueue() nor flush_workqueue() report a
+ * result, we make an assumption that we only don't require more than 3 passes
+ * to catch all _recursive_ RCU delayed work.
+ */
+void i915_gem_drain_workqueue(struct drm_i915_private *i915)
+{
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ flush_workqueue(i915->wq);
+ rcu_barrier();
+ i915_gem_drain_freed_objects(i915);
+ }
+
+ drain_workqueue(i915->wq);
+}
+
int i915_gem_init(struct drm_i915_private *dev_priv)
{
+ struct intel_gt *gt;
+ unsigned int i;
int ret;
+ /*
+ * In the process of replacing cache_level with pat_index a tricky
+ * dependency is created on the definition of the enum i915_cache_level.
+ * In case this enum is changed, PTE encode would be broken.
+ * Add a WARNING here. And remove when we completely quit using this
+ * enum.
+ */
+ BUILD_BUG_ON(I915_CACHE_NONE != 0 ||
+ I915_CACHE_LLC != 1 ||
+ I915_CACHE_L3_LLC != 2 ||
+ I915_CACHE_WT != 3 ||
+ I915_MAX_CACHE_LEVEL != 4);
+
/* We need to fallback to 4K pages if host doesn't support huge gtt. */
if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
- mkwrite_device_info(dev_priv)->page_sizes =
- I915_GTT_PAGE_SIZE_4K;
+ RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K;
- ret = i915_gem_init_userptr(dev_priv);
- if (ret)
- return ret;
-
- intel_uc_fetch_firmwares(&dev_priv->gt.uc);
- intel_wopcm_init(&dev_priv->wopcm);
+ for_each_gt(gt, dev_priv, i) {
+ intel_uc_fetch_firmwares(&gt->uc);
+ intel_wopcm_init(&gt->wopcm);
+ if (GRAPHICS_VER(dev_priv) >= 8)
+ setup_private_pat(gt);
+ }
ret = i915_init_ggtt(dev_priv);
if (ret) {
@@ -1114,7 +1176,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
}
/*
- * Despite its name intel_init_clock_gating applies both display
+ * Despite its name intel_clock_gating_init applies both display
* clock gating workarounds; GT mmio workarounds and the occasional
* GT power context workaround. Worse, sometimes it includes a context
* register workaround which we need to apply before we record the
@@ -1122,11 +1184,20 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
*
* FIXME: break up the workarounds and apply them at the right time!
*/
- intel_init_clock_gating(dev_priv);
+ intel_clock_gating_init(dev_priv);
- ret = intel_gt_init(&dev_priv->gt);
- if (ret)
- goto err_unlock;
+ for_each_gt(gt, dev_priv, i) {
+ ret = intel_gt_init(gt);
+ if (ret)
+ goto err_unlock;
+ }
+
+ /*
+ * Register engines early to ensure the engine list is in its final
+ * rb-tree form, lowering the amount of code that has to deal with
+ * the intermediate llist state.
+ */
+ intel_engines_driver_register(dev_priv);
return 0;
@@ -1140,8 +1211,11 @@ err_unlock:
i915_gem_drain_workqueue(dev_priv);
if (ret != -EIO) {
- intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
- i915_gem_cleanup_userptr(dev_priv);
+ for_each_gt(gt, dev_priv, i) {
+ intel_gt_driver_remove(gt);
+ intel_gt_driver_release(gt);
+ intel_uc_cleanup_firmwares(&gt->uc);
+ }
}
if (ret == -EIO) {
@@ -1150,27 +1224,28 @@ err_unlock:
* as wedged. But we only want to do this when the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
- if (!intel_gt_is_wedged(&dev_priv->gt)) {
- i915_probe_error(dev_priv,
- "Failed to initialize GPU, declaring it wedged!\n");
- intel_gt_set_wedged(&dev_priv->gt);
+ for_each_gt(gt, dev_priv, i) {
+ if (!intel_gt_is_wedged(gt)) {
+ i915_probe_error(dev_priv,
+ "Failed to initialize GPU, declaring it wedged!\n");
+ intel_gt_set_wedged(gt);
+ }
}
/* Minimal basic recovery for KMS */
ret = i915_ggtt_enable_hw(dev_priv);
- i915_ggtt_resume(&dev_priv->ggtt);
- intel_init_clock_gating(dev_priv);
+ i915_ggtt_resume(to_gt(dev_priv)->ggtt);
+ intel_clock_gating_init(dev_priv);
}
i915_gem_drain_freed_objects(dev_priv);
+
return ret;
}
void i915_gem_driver_register(struct drm_i915_private *i915)
{
i915_gem_driver_register__shrinker(i915);
-
- intel_engines_driver_register(i915);
}
void i915_gem_driver_unregister(struct drm_i915_private *i915)
@@ -1180,30 +1255,30 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
{
- intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
+ struct intel_gt *gt;
+ unsigned int i;
i915_gem_suspend_late(dev_priv);
- intel_gt_driver_remove(&dev_priv->gt);
+ for_each_gt(gt, dev_priv, i)
+ intel_gt_driver_remove(gt);
dev_priv->uabi_engines = RB_ROOT;
/* Flush any outstanding unpin_work. */
i915_gem_drain_workqueue(dev_priv);
-
- i915_gem_drain_freed_objects(dev_priv);
}
void i915_gem_driver_release(struct drm_i915_private *dev_priv)
{
- i915_gem_driver_release__contexts(dev_priv);
-
- intel_gt_driver_release(&dev_priv->gt);
+ struct intel_gt *gt;
+ unsigned int i;
- intel_wa_list_free(&dev_priv->gt_wa_list);
-
- intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
- i915_gem_cleanup_userptr(dev_priv);
+ for_each_gt(gt, dev_priv, i) {
+ intel_gt_driver_release(gt);
+ intel_uc_cleanup_firmwares(&gt->uc);
+ }
- i915_gem_drain_freed_objects(dev_priv);
+ /* Flush any outstanding work, including i915_gem_context.release_work. */
+ i915_gem_drain_workqueue(dev_priv);
drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list));
}
@@ -1225,104 +1300,52 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
i915_gem_init__mm(dev_priv);
i915_gem_init__contexts(dev_priv);
- spin_lock_init(&dev_priv->fb_tracking.lock);
+ spin_lock_init(&dev_priv->frontbuffer_lock);
}
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
{
- i915_gem_drain_freed_objects(dev_priv);
+ i915_gem_drain_workqueue(dev_priv);
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);
}
-int i915_gem_freeze(struct drm_i915_private *dev_priv)
-{
- /* Discard all purgeable objects, let userspace recover those as
- * required after resuming.
- */
- i915_gem_shrink_all(dev_priv);
-
- return 0;
-}
-
-int i915_gem_freeze_late(struct drm_i915_private *i915)
-{
- struct drm_i915_gem_object *obj;
- intel_wakeref_t wakeref;
-
- /*
- * Called just before we write the hibernation image.
- *
- * We need to update the domain tracking to reflect that the CPU
- * will be accessing all the pages to create and restore from the
- * hibernation, and so upon restoration those pages will be in the
- * CPU domain.
- *
- * To make sure the hibernation image contains the latest state,
- * we update that state just before writing out the image.
- *
- * To try and reduce the hibernation image, we manually shrink
- * the objects as well, see i915_gem_freeze()
- */
-
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- i915_gem_shrink(i915, -1UL, NULL, ~0);
- i915_gem_drain_freed_objects(i915);
-
- list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
- i915_gem_object_lock(obj);
- drm_WARN_ON(&i915->drm,
- i915_gem_object_set_to_cpu_domain(obj, true));
- i915_gem_object_unlock(obj);
- }
-
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-
- return 0;
-}
-
-void i915_gem_release(struct drm_device *dev, struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv = file->driver_priv;
- struct i915_request *request;
-
- /* Clean up our request list when the client is going away, so that
- * later retire_requests won't dereference our soon-to-be-gone
- * file_priv.
- */
- spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_link)
- request->file_priv = NULL;
- spin_unlock(&file_priv->mm.lock);
-}
-
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
{
struct drm_i915_file_private *file_priv;
- int ret;
+ struct i915_drm_client *client;
+ int ret = -ENOMEM;
- DRM_DEBUG("\n");
+ drm_dbg(&i915->drm, "\n");
file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
if (!file_priv)
- return -ENOMEM;
+ goto err_alloc;
+
+ client = i915_drm_client_alloc();
+ if (!client)
+ goto err_client;
file->driver_priv = file_priv;
- file_priv->dev_priv = i915;
+ file_priv->i915 = i915;
file_priv->file = file;
-
- spin_lock_init(&file_priv->mm.lock);
- INIT_LIST_HEAD(&file_priv->mm.request_list);
+ file_priv->client = client;
file_priv->bsd_engine = -1;
file_priv->hang_timestamp = jiffies;
ret = i915_gem_context_open(i915, file);
if (ret)
- kfree(file_priv);
+ goto err_context;
+
+ return 0;
+err_context:
+ i915_drm_client_put(client);
+err_client:
+ kfree(file_priv);
+err_alloc:
return ret;
}