diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 5058 |
1 files changed, 896 insertions, 4162 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4200c32407ec..4c82c9544b93 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1,5 +1,5 @@ /* - * Copyright © 2008 Intel Corporation + * Copyright © 2008-2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,481 +25,430 @@ * */ -#include <drm/drmP.h> -#include <drm/i915_drm.h> -#include "i915_drv.h" -#include "i915_trace.h" -#include "intel_drv.h" +#include <linux/dma-fence-array.h> +#include <linux/kthread.h> +#include <linux/dma-resv.h> #include <linux/shmem_fs.h> #include <linux/slab.h> +#include <linux/stop_machine.h> #include <linux/swap.h> #include <linux/pci.h> #include <linux/dma-buf.h> +#include <linux/mman.h> + +#include <drm/drm_cache.h> +#include <drm/drm_print.h> +#include <drm/drm_vma_manager.h> + +#include "gem/i915_gem_clflush.h" +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_mman.h" +#include "gem/i915_gem_object_frontbuffer.h" +#include "gem/i915_gem_pm.h" +#include "gem/i915_gem_region.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_workarounds.h" -static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, - unsigned alignment, - bool map_and_fenceable, - bool nonblocking); -static int i915_gem_phys_pwrite(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file); - -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj); -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable); - -static int i915_gem_inactive_shrink(struct shrinker *shrinker, - struct shrink_control *sc); -static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); -static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); -static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); - -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) +#include "i915_drv.h" +#include "i915_file_private.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "intel_clock_gating.h" + +static int +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { - if (obj->tiling_mode) - i915_gem_release_mmap(obj); + int err; - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; -/* some bookkeeping */ -static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, - size_t size) -{ - dev_priv->mm.object_count++; - dev_priv->mm.object_memory += size; + memset(node, 0, sizeof(*node)); + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + + mutex_unlock(&ggtt->vm.mutex); + + return err; } -static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, - size_t size) +static void +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { - dev_priv->mm.object_count--; - dev_priv->mm.object_memory -= size; + mutex_lock(&ggtt->vm.mutex); + drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } -static int -i915_gem_wait_for_error(struct i915_gpu_error *error) +int +i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { - int ret; + struct drm_i915_private *i915 = to_i915(dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_i915_gem_get_aperture *args = data; + struct i915_vma *vma; + u64 pinned; -#define EXIT_COND (!i915_reset_in_progress(error) || \ - i915_terminally_wedged(error)) - if (EXIT_COND) - return 0; + if (mutex_lock_interruptible(&ggtt->vm.mutex)) + return -EINTR; - /* - * Only wait 10 seconds for the gpu reset to complete to avoid hanging - * userspace. If it takes that long something really bad is going on and - * we should simply try to bail out and fail as gracefully as possible. - */ - ret = wait_event_interruptible_timeout(error->reset_queue, - EXIT_COND, - 10*HZ); - if (ret == 0) { - DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); - return -EIO; - } else if (ret < 0) { - return ret; - } -#undef EXIT_COND + pinned = ggtt->vm.reserved; + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) + if (i915_vma_is_pinned(vma)) + pinned += vma->node.size; + + mutex_unlock(&ggtt->vm.mutex); + + args->aper_size = ggtt->vm.total; + args->aper_available_size = args->aper_size - pinned; return 0; } -int i915_mutex_lock_interruptible(struct drm_device *dev) +int i915_gem_object_unbind(struct drm_i915_gem_object *obj, + unsigned long flags) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; + bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK); + LIST_HEAD(still_in_list); + intel_wakeref_t wakeref; + struct i915_vma *vma; int ret; - ret = i915_gem_wait_for_error(&dev_priv->gpu_error); - if (ret) - return ret; + assert_object_held(obj); - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - WARN_ON(i915_verify_lists(dev)); - return 0; -} - -static inline bool -i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) -{ - return obj->gtt_space && !obj->active; -} + if (list_empty(&obj->vma.list)) + return 0; -int -i915_gem_init_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_init *args = data; + /* + * As some machines use ACPI to handle runtime-resume callbacks, and + * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex + * as they are required by the shrinker. Ergo, we wake the device up + * first just in case. + */ + wakeref = intel_runtime_pm_get(rpm); - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; +try_again: + ret = 0; + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) + continue; - if (args->gtt_start >= args->gtt_end || - (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) - return -EINVAL; + if (flags & I915_GEM_OBJECT_UNBIND_TEST) { + ret = -EBUSY; + break; + } - /* GEM with user mode setting was never supported on ilk and later. */ - if (INTEL_INFO(dev)->gen >= 5) - return -ENODEV; + /* + * Requiring the vm destructor to take the object lock + * before destroying a vma would help us eliminate the + * i915_vm_tryget() here, AND thus also the barrier stuff + * at the end. That's an easy fix, but sleeping locks in + * a kthread should generally be avoided. + */ + ret = -EAGAIN; + if (!i915_vm_tryget(vma->vm)) + break; - mutex_lock(&dev->struct_mutex); - i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, - args->gtt_end); - dev_priv->gtt.mappable_end = args->gtt_end; - mutex_unlock(&dev->struct_mutex); + spin_unlock(&obj->vma.lock); - return 0; -} + /* + * Since i915_vma_parked() takes the object lock + * before vma destruction, it won't race us here, + * and destroy the vma from under us. + */ -int -i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_get_aperture *args = data; - struct drm_i915_gem_object *obj; - size_t pinned; + ret = -EBUSY; + if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) { + assert_object_held(vma->obj); + ret = i915_vma_unbind_async(vma, vm_trylock); + } - pinned = 0; - mutex_lock(&dev->struct_mutex); - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (obj->pin_count) - pinned += obj->gtt_space->size; - mutex_unlock(&dev->struct_mutex); + if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || + !i915_vma_is_active(vma))) { + if (vm_trylock) { + if (mutex_trylock(&vma->vm->mutex)) { + ret = __i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); + } + } else { + ret = i915_vma_unbind(vma); + } + } - args->aper_size = dev_priv->gtt.total; - args->aper_available_size = args->aper_size - pinned; + i915_vm_put(vma->vm); + spin_lock(&obj->vma.lock); + } + list_splice_init(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); - return 0; -} + if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { + rcu_barrier(); /* flush the i915_vm_release() */ + goto try_again; + } -void *i915_gem_object_alloc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); -} + intel_runtime_pm_put(rpm, wakeref); -void i915_gem_object_free(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - kmem_cache_free(dev_priv->slab, obj); + return ret; } static int -i915_gem_create(struct drm_file *file, - struct drm_device *dev, - uint64_t size, - uint32_t *handle_p) +shmem_pread(struct page *page, int offset, int len, char __user *user_data, + bool needs_clflush) { - struct drm_i915_gem_object *obj; + char *vaddr; int ret; - u32 handle; - - size = roundup(size, PAGE_SIZE); - if (size == 0) - return -EINVAL; - /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return -ENOMEM; + vaddr = kmap(page); - ret = drm_gem_handle_create(file, &obj->base, &handle); - if (ret) { - drm_gem_object_release(&obj->base); - i915_gem_info_remove_obj(dev->dev_private, obj->base.size); - i915_gem_object_free(obj); - return ret; - } + if (needs_clflush) + drm_clflush_virt_range(vaddr + offset, len); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference(&obj->base); - trace_i915_gem_object_create(obj); + ret = __copy_to_user(user_data, vaddr + offset, len); - *handle_p = handle; - return 0; -} + kunmap(page); -int -i915_gem_dumb_create(struct drm_file *file, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); - args->size = args->pitch * args->height; - return i915_gem_create(file, dev, - args->size, &args->handle); + return ret ? -EFAULT : 0; } -int i915_gem_dumb_destroy(struct drm_file *file, - struct drm_device *dev, - uint32_t handle) +static int +i915_gem_shmem_pread(struct drm_i915_gem_object *obj, + struct drm_i915_gem_pread *args) { - return drm_gem_handle_delete(file, handle); -} + unsigned int needs_clflush; + char __user *user_data; + unsigned long offset; + pgoff_t idx; + u64 remain; + int ret; -/** - * Creates a new mm object and returns a handle to it. - */ -int -i915_gem_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_create *args = data; + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + return ret; - return i915_gem_create(file, dev, - args->size, &args->handle); -} + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_unlock; -static inline int -__copy_to_user_swizzled(char __user *cpu_vaddr, - const char *gpu_vaddr, int gpu_offset, - int length) -{ - int ret, cpu_offset = 0; + ret = i915_gem_object_prepare_read(obj, &needs_clflush); + if (ret) + goto err_unpin; - while (length > 0) { - int cacheline_end = ALIGN(gpu_offset + 1, 64); - int this_length = min(cacheline_end - gpu_offset, length); - int swizzled_gpu_offset = gpu_offset ^ 64; + i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); - ret = __copy_to_user(cpu_vaddr + cpu_offset, - gpu_vaddr + swizzled_gpu_offset, - this_length); + remain = args->size; + user_data = u64_to_user_ptr(args->data_ptr); + offset = offset_in_page(args->offset); + for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { + struct page *page = i915_gem_object_get_page(obj, idx); + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); + + ret = shmem_pread(page, offset, length, user_data, + needs_clflush); if (ret) - return ret + length; + break; - cpu_offset += this_length; - gpu_offset += this_length; - length -= this_length; + remain -= length; + user_data += length; + offset = 0; } - return 0; + i915_gem_object_unpin_pages(obj); + return ret; + +err_unpin: + i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); + return ret; } -static inline int -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, - const char __user *cpu_vaddr, - int length) +static inline bool +gtt_user_read(struct io_mapping *mapping, + loff_t base, int offset, + char __user *user_data, int length) { - int ret, cpu_offset = 0; - - while (length > 0) { - int cacheline_end = ALIGN(gpu_offset + 1, 64); - int this_length = min(cacheline_end - gpu_offset, length); - int swizzled_gpu_offset = gpu_offset ^ 64; - - ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, - cpu_vaddr + cpu_offset, - this_length); - if (ret) - return ret + length; + void __iomem *vaddr; + unsigned long unwritten; - cpu_offset += this_length; - gpu_offset += this_length; - length -= this_length; + /* We can use the cpu mem copy function because this is X86. */ + vaddr = io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_to_user_inatomic(user_data, + (void __force *)vaddr + offset, + length); + io_mapping_unmap_atomic(vaddr); + if (unwritten) { + vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_to_user(user_data, + (void __force *)vaddr + offset, + length); + io_mapping_unmap(vaddr); } - - return 0; + return unwritten; } -/* Per-page copy function for the shmem pread fastpath. - * Flushes invalid cachelines before reading the target if - * needs_clflush is set. */ -static int -shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, bool needs_clflush) +static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj, + struct drm_mm_node *node, + bool write) { - char *vaddr; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int ret; - if (unlikely(page_do_bit17_swizzling)) - return -EINVAL; + i915_gem_ww_ctx_init(&ww, true); +retry: + vma = ERR_PTR(-ENODEV); + ret = i915_gem_object_lock(obj, &ww); + if (ret) + goto err_ww; - vaddr = kmap_atomic(page); - if (needs_clflush) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - ret = __copy_to_user_inatomic(user_data, - vaddr + shmem_page_offset, - page_length); - kunmap_atomic(vaddr); + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto err_ww; + + if (!i915_gem_object_is_tiled(obj)) + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (vma == ERR_PTR(-EDEADLK)) { + ret = -EDEADLK; + goto err_ww; + } else if (!IS_ERR(vma)) { + node->start = i915_ggtt_offset(vma); + node->flags = 0; + } else { + ret = insert_mappable_node(ggtt, node, PAGE_SIZE); + if (ret) + goto err_ww; + GEM_BUG_ON(!drm_mm_node_allocated(node)); + vma = NULL; + } - return ret ? -EFAULT : 0; -} + ret = i915_gem_object_pin_pages(obj); + if (ret) { + if (drm_mm_node_allocated(node)) { + ggtt->vm.clear_range(&ggtt->vm, node->start, node->size); + remove_mappable_node(ggtt, node); + } else { + i915_vma_unpin(vma); + } + } -static void -shmem_clflush_swizzled_range(char *addr, unsigned long length, - bool swizzled) -{ - if (unlikely(swizzled)) { - unsigned long start = (unsigned long) addr; - unsigned long end = (unsigned long) addr + length; - - /* For swizzling simply ensure that we always flush both - * channels. Lame, but simple and it works. Swizzled - * pwrite/pread is far from a hotpath - current userspace - * doesn't use it at all. */ - start = round_down(start, 128); - end = round_up(end, 128); - - drm_clflush_virt_range((void *)start, end - start); - } else { - drm_clflush_virt_range(addr, length); +err_ww: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; } + i915_gem_ww_ctx_fini(&ww); + return ret ? ERR_PTR(ret) : vma; } -/* Only difference to the fast-path function is that this can handle bit17 - * and uses non-atomic copy and kmap functions. */ -static int -shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, bool needs_clflush) +static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj, + struct drm_mm_node *node, + struct i915_vma *vma) { - char *vaddr; - int ret; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; - vaddr = kmap(page); - if (needs_clflush) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, - page_do_bit17_swizzling); - - if (page_do_bit17_swizzling) - ret = __copy_to_user_swizzled(user_data, - vaddr, shmem_page_offset, - page_length); - else - ret = __copy_to_user(user_data, - vaddr + shmem_page_offset, - page_length); - kunmap(page); - - return ret ? - EFAULT : 0; + i915_gem_object_unpin_pages(obj); + if (drm_mm_node_allocated(node)) { + ggtt->vm.clear_range(&ggtt->vm, node->start, node->size); + remove_mappable_node(ggtt, node); + } else { + i915_vma_unpin(vma); + } } static int -i915_gem_shmem_pread(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pread *args, - struct drm_file *file) -{ - char __user *user_data; - ssize_t remain; - loff_t offset; - int shmem_page_offset, page_length, ret = 0; - int obj_do_bit17_swizzling, page_do_bit17_swizzling; - int prefaulted = 0; - int needs_clflush = 0; - struct sg_page_iter sg_iter; - - user_data = to_user_ptr(args->data_ptr); - remain = args->size; +i915_gem_gtt_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + unsigned long remain, offset; + intel_wakeref_t wakeref; + struct drm_mm_node node; + void __user *user_data; + struct i915_vma *vma; + int ret = 0; - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt - * read domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will dirty the data - * anyway again before the next pread happens. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush = 1; - if (obj->gtt_space) { - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - return ret; - } - } + if (overflows_type(args->size, remain) || + overflows_type(args->offset, offset)) + return -EINVAL; - ret = i915_gem_object_get_pages(obj); - if (ret) - return ret; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - i915_gem_object_pin_pages(obj); + vma = i915_gem_gtt_prepare(obj, &node, false); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out_rpm; + } + user_data = u64_to_user_ptr(args->data_ptr); + remain = args->size; offset = args->offset; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, - offset >> PAGE_SHIFT) { - struct page *page = sg_page_iter_page(&sg_iter); - - if (remain <= 0) - break; - + while (remain > 0) { /* Operation in this page * - * shmem_page_offset = offset within page in shmem file + * page_base = page offset within aperture + * page_offset = offset within page * page_length = bytes to copy for this page */ - shmem_page_offset = offset_in_page(offset); - page_length = remain; - if ((shmem_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - shmem_page_offset; - - page_do_bit17_swizzling = obj_do_bit17_swizzling && - (page_to_phys(page) & (1 << 17)) != 0; - - ret = shmem_pread_fast(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - needs_clflush); - if (ret == 0) - goto next_page; - - mutex_unlock(&dev->struct_mutex); - - if (!prefaulted) { - ret = fault_in_multipages_writeable(user_data, remain); - /* Userspace is tricking us, but we've already clobbered - * its pages with the prefault and promised to write the - * data up to the first fault. Hence ignore any errors - * and just continue. */ - (void)ret; - prefaulted = 1; + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? remain : page_length; + if (drm_mm_node_allocated(&node)) { + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, + offset >> PAGE_SHIFT), + node.start, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), 0); + } else { + page_base += offset & PAGE_MASK; } - ret = shmem_pread_slow(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - needs_clflush); - - mutex_lock(&dev->struct_mutex); - -next_page: - mark_page_accessed(page); - - if (ret) - goto out; + if (gtt_user_read(&ggtt->iomap, page_base, page_offset, + user_data, page_length)) { + ret = -EFAULT; + break; + } remain -= page_length; user_data += page_length; offset += page_length; } -out: - i915_gem_object_unpin_pages(obj); - + i915_gem_gtt_cleanup(obj, &node, vma); +out_rpm: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return ret; } /** - * Reads data from the object referenced by handle. + * i915_gem_pread_ioctl - Reads data from the object referenced by handle. + * @dev: drm device pointer + * @data: ioctl data blob + * @file: drm file pointer * * On error, the contents of *data are undefined. */ @@ -507,51 +456,53 @@ int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pread *args = data; struct drm_i915_gem_object *obj; - int ret = 0; + int ret; + + /* PREAD is disallowed for all platforms after TGL-LP. This also + * covers all platforms with local memory. + */ + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915)) + return -EOPNOTSUPP; if (args->size == 0) return 0; - if (!access_ok(VERIFY_WRITE, - to_user_ptr(args->data_ptr), + if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check source. */ - if (args->offset > obj->base.size || - args->size > obj->base.size - args->offset) { + if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { ret = -EINVAL; goto out; } - /* prime objects have no backing filp to GEM pread/pwrite - * pages from. - */ - if (!obj->base.filp) { - ret = -EINVAL; + trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) goto out; - } - trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto out; - ret = i915_gem_shmem_pread(dev, obj, args, file); + ret = i915_gem_shmem_pread(obj, args); + if (ret == -EFAULT || ret == -ENODEV) + ret = i915_gem_gtt_pread(obj, args); out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put(obj); return ret; } @@ -559,79 +510,114 @@ unlock: * page faults in the source data */ -static inline int -fast_user_write(struct io_mapping *mapping, - loff_t page_base, int page_offset, - char __user *user_data, - int length) +static inline bool +ggtt_write(struct io_mapping *mapping, + loff_t base, int offset, + char __user *user_data, int length) { - void __iomem *vaddr_atomic; - void *vaddr; + void __iomem *vaddr; unsigned long unwritten; - vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force*)vaddr_atomic + page_offset; - unwritten = __copy_from_user_inatomic_nocache(vaddr, + vaddr = io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, user_data, length); - io_mapping_unmap_atomic(vaddr_atomic); + io_mapping_unmap_atomic(vaddr); + if (unwritten) { + vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_from_user((void __force *)vaddr + offset, + user_data, length); + io_mapping_unmap(vaddr); + } + return unwritten; } /** - * This is the fast pwrite path, where we copy the data directly from the + * i915_gem_gtt_pwrite_fast - This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. + * @obj: i915 GEM object + * @args: pwrite arguments structure */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; - char __user *user_data; - int page_offset, page_length, ret; - - ret = i915_gem_object_pin(obj, 0, true, true); - if (ret) - goto out; +i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct intel_runtime_pm *rpm = &i915->runtime_pm; + unsigned long remain, offset; + intel_wakeref_t wakeref; + struct drm_mm_node node; + struct i915_vma *vma; + void __user *user_data; + int ret = 0; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto out_unpin; + if (overflows_type(args->size, remain) || + overflows_type(args->offset, offset)) + return -EINVAL; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto out_unpin; + if (i915_gem_object_has_struct_page(obj)) { + /* + * Avoid waking the device up if we can fallback, as + * waking/resuming is very slow (worst-case 10-100 ms + * depending on PCI sleeps and our own resume time). + * This easily dwarfs any performance advantage from + * using the cache bypass of indirect GGTT access. + */ + wakeref = intel_runtime_pm_get_if_in_use(rpm); + if (!wakeref) + return -EFAULT; + } else { + /* No backing pages, no fallback, we must force GGTT access */ + wakeref = intel_runtime_pm_get(rpm); + } - user_data = to_user_ptr(args->data_ptr); - remain = args->size; + vma = i915_gem_gtt_prepare(obj, &node, true); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out_rpm; + } - offset = obj->gtt_offset + args->offset; + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); - while (remain > 0) { + user_data = u64_to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned int page_offset = offset_in_page(offset); + unsigned int page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? remain : page_length; + if (drm_mm_node_allocated(&node)) { + /* flush the write before we modify the GGTT */ + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, + offset >> PAGE_SHIFT), + node.start, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + } else { + page_base += offset & PAGE_MASK; + } /* If we get a fault while copying data, then (presumably) our * source page isn't available. Return the error and we'll * retry in the slow path. + * If the object is non-shmem backed, we retry again with the + * path that handles page fault. */ - if (fast_user_write(dev_priv->gtt.mappable, page_base, - page_offset, user_data, page_length)) { + if (ggtt_write(&ggtt->iomap, page_base, page_offset, + user_data, page_length)) { ret = -EFAULT; - goto out_unpin; + break; } remain -= page_length; @@ -639,208 +625,112 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, offset += page_length; } -out_unpin: - i915_gem_object_unpin(obj); -out: + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + + i915_gem_gtt_cleanup(obj, &node, vma); +out_rpm: + intel_runtime_pm_put(rpm, wakeref); return ret; } /* Per-page copy function for the shmem pwrite fastpath. * Flushes invalid cachelines before writing to the target if * needs_clflush_before is set and flushes out any written cachelines after - * writing if needs_clflush is set. */ + * writing if needs_clflush is set. + */ static int -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, - bool needs_clflush_before, - bool needs_clflush_after) +shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, + bool needs_clflush_before, + bool needs_clflush_after) { char *vaddr; int ret; - if (unlikely(page_do_bit17_swizzling)) - return -EINVAL; + vaddr = kmap(page); - vaddr = kmap_atomic(page); if (needs_clflush_before) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, - user_data, - page_length); - if (needs_clflush_after) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - kunmap_atomic(vaddr); + drm_clflush_virt_range(vaddr + offset, len); - return ret ? -EFAULT : 0; -} + ret = __copy_from_user(vaddr + offset, user_data, len); + if (!ret && needs_clflush_after) + drm_clflush_virt_range(vaddr + offset, len); -/* Only difference to the fast-path function is that this can handle bit17 - * and uses non-atomic copy and kmap functions. */ -static int -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, - bool needs_clflush_before, - bool needs_clflush_after) -{ - char *vaddr; - int ret; - - vaddr = kmap(page); - if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, - page_do_bit17_swizzling); - if (page_do_bit17_swizzling) - ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, - user_data, - page_length); - else - ret = __copy_from_user(vaddr + shmem_page_offset, - user_data, - page_length); - if (needs_clflush_after) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, - page_do_bit17_swizzling); kunmap(page); return ret ? -EFAULT : 0; } static int -i915_gem_shmem_pwrite(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) -{ - ssize_t remain; - loff_t offset; - char __user *user_data; - int shmem_page_offset, page_length, ret = 0; - int obj_do_bit17_swizzling, page_do_bit17_swizzling; - int hit_slowpath = 0; - int needs_clflush_after = 0; - int needs_clflush_before = 0; - struct sg_page_iter sg_iter; - - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - /* If we're not in the cpu write domain, set ourself into the gtt - * write domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will use the data - * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; - if (obj->gtt_space) { - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - } - } - /* Same trick applies for invalidate partially written cachelines before - * writing. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) - && obj->cache_level == I915_CACHE_NONE) - needs_clflush_before = 1; +i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + unsigned int partial_cacheline_write; + unsigned int needs_clflush; + void __user *user_data; + unsigned long offset; + pgoff_t idx; + u64 remain; + int ret; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; - i915_gem_object_pin_pages(obj); - - offset = args->offset; - obj->dirty = 1; + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_unlock; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, - offset >> PAGE_SHIFT) { - struct page *page = sg_page_iter_page(&sg_iter); - int partial_cacheline_write; + ret = i915_gem_object_prepare_write(obj, &needs_clflush); + if (ret) + goto err_unpin; - if (remain <= 0) - break; + i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); - /* Operation in this page - * - * shmem_page_offset = offset within page in shmem file - * page_length = bytes to copy for this page - */ - shmem_page_offset = offset_in_page(offset); - - page_length = remain; - if ((shmem_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - shmem_page_offset; - - /* If we don't overwrite a cacheline completely we need to be - * careful to have up-to-date data by first clflushing. Don't - * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush_before && - ((shmem_page_offset | page_length) - & (boot_cpu_data.x86_clflush_size - 1)); - - page_do_bit17_swizzling = obj_do_bit17_swizzling && - (page_to_phys(page) & (1 << 17)) != 0; - - ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - partial_cacheline_write, - needs_clflush_after); - if (ret == 0) - goto next_page; - - hit_slowpath = 1; - mutex_unlock(&dev->struct_mutex); - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - partial_cacheline_write, - needs_clflush_after); - - mutex_lock(&dev->struct_mutex); - -next_page: - set_page_dirty(page); - mark_page_accessed(page); + /* If we don't overwrite a cacheline completely we need to be + * careful to have up-to-date data by first clflushing. Don't + * overcomplicate things and flush the entire patch. + */ + partial_cacheline_write = 0; + if (needs_clflush & CLFLUSH_BEFORE) + partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; + user_data = u64_to_user_ptr(args->data_ptr); + remain = args->size; + offset = offset_in_page(args->offset); + for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { + struct page *page = i915_gem_object_get_page(obj, idx); + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); + + ret = shmem_pwrite(page, offset, length, user_data, + (offset | length) & partial_cacheline_write, + needs_clflush & CLFLUSH_AFTER); if (ret) - goto out; + break; - remain -= page_length; - user_data += page_length; - offset += page_length; + remain -= length; + user_data += length; + offset = 0; } -out: - i915_gem_object_unpin_pages(obj); - - if (hit_slowpath) { - /* - * Fixup: Flush cpu caches in case we didn't flush the dirty - * cachelines in-line while writing and the object moved - * out of the cpu write domain while we've dropped the lock. - */ - if (!needs_clflush_after && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(dev); - } - } + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - if (needs_clflush_after) - i915_gem_chipset_flush(dev); + i915_gem_object_unpin_pages(obj); + return ret; +err_unpin: + i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); return ret; } /** - * Writes data to the object referenced by handle. + * i915_gem_pwrite_ioctl - Writes data to the object referenced by handle. + * @dev: drm device + * @data: ioctl data blob + * @file: drm file * * On error, the contents of the buffer that were to be modified are undefined. */ @@ -848,50 +738,54 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; + /* PWRITE is disallowed for all platforms after TGL-LP. This also + * covers all platforms with local memory. + */ + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915)) + return -EOPNOTSUPP; + if (args->size == 0) return 0; - if (!access_ok(VERIFY_READ, - to_user_ptr(args->data_ptr), - args->size)) - return -EFAULT; - - ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), - args->size); - if (ret) + if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check destination. */ - if (args->offset > obj->base.size || - args->size > obj->base.size - args->offset) { + if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { ret = -EINVAL; - goto out; + goto err; } - /* prime objects have no backing filp to GEM pread/pwrite - * pages from. - */ - if (!obj->base.filp) { + /* Writes not allowed into this read-only object */ + if (i915_gem_object_is_readonly(obj)) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pwrite) + ret = obj->ops->pwrite(obj, args); + if (ret != -ENODEV) + goto err; + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + goto err; + ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@ -899,342 +793,29 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * pread/pwrite currently are reading and writing from the CPU * perspective, requiring manual detiling by the client. */ - if (obj->phys_obj) { - ret = i915_gem_phys_pwrite(dev, obj, args, file); - goto out; - } - - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); + if (!i915_gem_object_has_struct_page(obj) || + i915_gem_cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between - * textures). Fallback to the shmem path in that case. */ - } - - if (ret == -EFAULT || ret == -ENOSPC) - ret = i915_gem_shmem_pwrite(dev, obj, args, file); - -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int -i915_gem_check_wedge(struct i915_gpu_error *error, - bool interruptible) -{ - if (i915_reset_in_progress(error)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - /* Recovery complete, but the reset failed ... */ - if (i915_terminally_wedged(error)) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -/* - * Compare seqno against outstanding lazy request. Emit a request if they are - * equal. - */ -static int -i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) -{ - int ret; - - BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); - - ret = 0; - if (seqno == ring->outstanding_lazy_request) - ret = i915_add_request(ring, NULL); - - return ret; -} - -/** - * __wait_seqno - wait until execution of seqno has finished - * @ring: the ring expected to report seqno - * @seqno: duh! - * @reset_counter: reset sequence associated with the given seqno - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the seqno was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, - unsigned reset_counter, - bool interruptible, struct timespec *timeout) -{ - drm_i915_private_t *dev_priv = ring->dev->dev_private; - struct timespec before, now, wait_time={1,0}; - unsigned long timeout_jiffies; - long end; - bool wait_forever = true; - int ret; - - if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) - return 0; - - trace_i915_gem_request_wait_begin(ring, seqno); - - if (timeout != NULL) { - wait_time = *timeout; - wait_forever = false; - } - - timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); - - if (WARN_ON(!ring->irq_get(ring))) - return -ENODEV; - - /* Record current time in case interrupted by signal, or wedged * */ - getrawmonotonic(&before); - -#define EXIT_COND \ - (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - i915_reset_in_progress(&dev_priv->gpu_error) || \ - reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) - do { - if (interruptible) - end = wait_event_interruptible_timeout(ring->irq_queue, - EXIT_COND, - timeout_jiffies); - else - end = wait_event_timeout(ring->irq_queue, EXIT_COND, - timeout_jiffies); - - /* We need to check whether any gpu reset happened in between - * the caller grabbing the seqno and now ... */ - if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) - end = -EAGAIN; - - /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely - * gone. */ - ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); - if (ret) - end = ret; - } while (end == 0 && wait_forever); - - getrawmonotonic(&now); - - ring->irq_put(ring); - trace_i915_gem_request_wait_end(ring, seqno); -#undef EXIT_COND - - if (timeout) { - struct timespec sleep_time = timespec_sub(now, before); - *timeout = timespec_sub(*timeout, sleep_time); - if (!timespec_valid(timeout)) /* i.e. negative time remains */ - set_normalized_timespec(timeout, 0, 0); - } - - switch (end) { - case -EIO: - case -EAGAIN: /* Wedged */ - case -ERESTARTSYS: /* Signal */ - return (int)end; - case 0: /* Timeout */ - return -ETIME; - default: /* Completed */ - WARN_ON(end < 0); /* We're not aware of other errors */ - return 0; - } -} - -/** - * Waits for a sequence number to be signaled, and cleans up the - * request and object lists appropriately for that event. - */ -int -i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) -{ - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - bool interruptible = dev_priv->mm.interruptible; - int ret; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(seqno == 0); - - ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); - if (ret) - return ret; - - ret = i915_gem_check_olr(ring, seqno); - if (ret) - return ret; - - return __wait_seqno(ring, seqno, - atomic_read(&dev_priv->gpu_error.reset_counter), - interruptible, NULL); -} - -static int -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring) -{ - i915_gem_retire_requests_ring(ring); - - /* Manually manage the write flush as we may have not yet - * retired the buffer. - * - * Note that the last_write_seqno is always the earlier of - * the two (read/write) seqno, so if we haved successfully waited, - * we know we have passed the last write. - */ - obj->last_write_seqno = 0; - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - - return 0; -} - -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - */ -static __must_check int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) -{ - struct intel_ring_buffer *ring = obj->ring; - u32 seqno; - int ret; - - seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; - if (seqno == 0) - return 0; - - ret = i915_wait_seqno(ring, seqno); - if (ret) - return ret; - - return i915_gem_object_wait_rendering__tail(obj, ring); -} - -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. - */ -static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - bool readonly) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring = obj->ring; - unsigned reset_counter; - u32 seqno; - int ret; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!dev_priv->mm.interruptible); - - seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; - if (seqno == 0) - return 0; - - ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); - if (ret) - return ret; - - ret = i915_gem_check_olr(ring, seqno); - if (ret) - return ret; - - reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - mutex_unlock(&dev->struct_mutex); - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); - mutex_lock(&dev->struct_mutex); - if (ret) - return ret; - - return i915_gem_object_wait_rendering__tail(obj, ring); -} - -/** - * Called when user space prepares to use an object with the CPU, either - * through the mmap ioctl's mapping or a GTT mapping. - */ -int -i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_set_domain *args = data; - struct drm_i915_gem_object *obj; - uint32_t read_domains = args->read_domains; - uint32_t write_domain = args->write_domain; - int ret; - - /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - /* Having something in the write domain implies it's in the read - * domain, and only that read domain. Enforce that in the request. - */ - if (write_domain != 0 && read_domains != write_domain) - return -EINVAL; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - /* Try to flush the object off the GPU without holding the lock. - * We will repeat the flush holding the lock in the normal manner - * to catch cases where we are gazumped. - */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); - if (ret) - goto unref; - - if (read_domains & I915_GEM_DOMAIN_GTT) { - ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); - - /* Silently promote "you're not bound, there was nothing to do" - * to success, since the client was just asking us to - * make sure everything was done. + * textures). Fallback to the shmem path in that case. */ - if (ret == -EINVAL) - ret = 0; - } else { - ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + ret = i915_gem_gtt_pwrite_fast(obj, args); + + if (ret == -EFAULT || ret == -ENOSPC) { + if (i915_gem_object_has_struct_page(obj)) + ret = i915_gem_shmem_pwrite(obj, args); } -unref: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); +err: + i915_gem_object_put(obj); return ret; } /** - * Called when user space has done writes to this buffer + * i915_gem_sw_finish_ioctl - Called when user space has done writes to this buffer + * @dev: drm device + * @data: ioctl data blob + * @file: drm file */ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, @@ -1242,2582 +823,216 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int ret = 0; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); - - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} -/** - * Maps the contents of an object, returning the address it is mapped - * into. - * - * While the mapping holds a reference on the contents of the object, it doesn't - * imply a ref on the object itself. - */ -int -i915_gem_mmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_mmap *args = data; - struct drm_gem_object *obj; - unsigned long addr; - - obj = drm_gem_object_lookup(dev, file, args->handle); - if (obj == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; - /* prime objects have no backing filp to GEM mmap - * pages from. - */ - if (!obj->filp) { - drm_gem_object_unreference_unlocked(obj); - return -EINVAL; - } - - addr = vm_mmap(obj->filp, 0, args->size, - PROT_READ | PROT_WRITE, MAP_SHARED, - args->offset); - drm_gem_object_unreference_unlocked(obj); - if (IS_ERR((void *)addr)) - return addr; - - args->addr_ptr = (uint64_t) addr; - - return 0; -} - -/** - * i915_gem_fault - fault a page into the GTT - * vma: VMA in question - * vmf: fault info - * - * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped - * from userspace. The fault handler takes care of binding the object to - * the GTT (if needed), allocating and programming a fence register (again, - * only if needed based on whether the old reg is still valid or the object - * is tiled) and inserting a new PTE into the faulting process. - * - * Note that the faulting process may involve evicting existing objects - * from the GTT and/or fence registers to make room. So performance may - * suffer if the GTT working set is large or there are few fence registers - * left. - */ -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - pgoff_t page_offset; - unsigned long pfn; - int ret = 0; - bool write = !!(vmf->flags & FAULT_FLAG_WRITE); - - /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> - PAGE_SHIFT; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto out; - - trace_i915_gem_object_fault(obj, page_offset, true, write); - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { - ret = -EINVAL; - goto unlock; - } - - /* Now bind it into the GTT if needed */ - ret = i915_gem_object_pin(obj, 0, true, false); - if (ret) - goto unlock; - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto unpin; - - ret = i915_gem_object_get_fence(obj); - if (ret) - goto unpin; - - obj->fault_mappable = true; - - pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) + - page_offset; - - /* Finally, remap it using the new GTT offset */ - ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); -unpin: - i915_gem_object_unpin(obj); -unlock: - mutex_unlock(&dev->struct_mutex); -out: - switch (ret) { - case -EIO: - /* If this -EIO is due to a gpu hang, give the reset code a - * chance to clean up the mess. Otherwise return the proper - * SIGBUS. */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) - return VM_FAULT_SIGBUS; - case -EAGAIN: - /* Give the error handler a chance to run and move the - * objects off the GPU active list. Next time we service the - * fault, we should be able to transition the page into the - * GTT without touching the GPU (and so avoid further - * EIO/EGAIN). If the GPU is wedged, then there is no issue - * with coherency, just lost writes. - */ - set_need_resched(); - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); - return VM_FAULT_SIGBUS; - } -} - -/** - * i915_gem_release_mmap - remove physical page mappings - * @obj: obj in question - * - * Preserve the reservation of the mmapping with the DRM core code, but - * relinquish ownership of the pages back to the system. - * - * It is vital that we remove the page mapping if we have mapped a tiled - * object through the GTT and then lose the fence register due to - * resource pressure. Similarly if the object has been moved out of the - * aperture, than pages mapped into userspace must be revoked. Removing the - * mapping will then trigger a page fault on the next user access, allowing - * fixup by i915_gem_fault(). - */ -void -i915_gem_release_mmap(struct drm_i915_gem_object *obj) -{ - if (!obj->fault_mappable) - return; - - if (obj->base.dev->dev_mapping) - unmap_mapping_range(obj->base.dev->dev_mapping, - (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, - obj->base.size, 1); - - obj->fault_mappable = false; -} - -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) -{ - uint32_t gtt_size; - - if (INTEL_INFO(dev)->gen >= 4 || - tiling_mode == I915_TILING_NONE) - return size; - - /* Previous chips need a power-of-two fence region when tiling */ - if (INTEL_INFO(dev)->gen == 3) - gtt_size = 1024*1024; - else - gtt_size = 512*1024; - - while (gtt_size < size) - gtt_size <<= 1; - - return gtt_size; -} - -/** - * i915_gem_get_gtt_alignment - return required GTT alignment for an object - * @obj: object to check - * - * Return the required GTT alignment for an object, taking into account - * potential fence register mapping. - */ -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced) -{ /* - * Minimum alignment is 4k (GTT page size), but might be greater - * if a fence register is needed for the object. - */ - if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || - tiling_mode == I915_TILING_NONE) - return 4096; - - /* - * Previous chips need to be aligned to the size of the smallest - * fence register that can contain the object. - */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); -} - -static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int ret; - - if (obj->base.map_list.map) - return 0; - - dev_priv->mm.shrinker_no_lock_stealing = true; - - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; - - /* Badly fragmented mmap space? The only way we can recover - * space is by destroying unwanted objects. We can't randomly release - * mmap_offsets as userspace expects them to be persistent for the - * lifetime of the objects. The closest we can is to release the - * offsets on purgeable objects by truncating it and marking it purged, - * which prevents userspace from ever using that object again. - */ - i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; - - i915_gem_shrink_all(dev_priv); - ret = drm_gem_create_mmap_offset(&obj->base); -out: - dev_priv->mm.shrinker_no_lock_stealing = false; - - return ret; -} - -static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) -{ - if (!obj->base.map_list.map) - return; - - drm_gem_free_mmap_offset(&obj->base); -} - -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - uint32_t handle, - uint64_t *offset) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - if (obj->base.size > dev_priv->gtt.mappable_end) { - ret = -E2BIG; - goto out; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_ERROR("Attempting to mmap a purgeable buffer\n"); - ret = -EINVAL; - goto out; - } - - ret = i915_gem_object_create_mmap_offset(obj); - if (ret) - goto out; - - *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; - -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -/** - * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing - * @dev: DRM device - * @data: GTT mapping ioctl data - * @file: GEM object info - * - * Simply returns the fake offset to userspace so it can mmap it. - * The mmap call will end up in drm_gem_mmap(), which will set things - * up so we can get faults in the handler above. - * - * The fault handler will take care of binding the object into the GTT - * (since it may have been evicted to make room for something), allocating - * a fence register, and mapping the appropriate aperture address into - * userspace. - */ -int -i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_mmap_gtt *args = data; - - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); -} - -/* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) -{ - struct inode *inode; - - i915_gem_object_free_mmap_offset(obj); - - if (obj->base.filp == NULL) - return; - - /* Our goal here is to return as much of the memory as - * is possible back to the system as we are called from OOM. - * To do this we must instruct the shmfs to drop all of its - * backing pages, *now*. + * Proxy objects are barred from CPU access, so there is no + * need to ban sw_finish as it is a nop. */ - inode = file_inode(obj->base.filp); - shmem_truncate_range(inode, 0, (loff_t)-1); - - obj->madv = __I915_MADV_PURGED; -} - -static inline int -i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) -{ - return obj->madv == I915_MADV_DONTNEED; -} - -static void -i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) -{ - struct sg_page_iter sg_iter; - int ret; - - BUG_ON(obj->madv == __I915_MADV_PURGED); - - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) { - /* In the event of a disaster, abandon all caches and - * hope for the best. - */ - WARN_ON(ret != -EIO); - i915_gem_clflush_object(obj); - obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } - - if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_save_bit_17_swizzle(obj); - - if (obj->madv == I915_MADV_DONTNEED) - obj->dirty = 0; - - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { - struct page *page = sg_page_iter_page(&sg_iter); - - if (obj->dirty) - set_page_dirty(page); - - if (obj->madv == I915_MADV_WILLNEED) - mark_page_accessed(page); - - page_cache_release(page); - } - obj->dirty = 0; - - sg_free_table(obj->pages); - kfree(obj->pages); -} - -int -i915_gem_object_put_pages(struct drm_i915_gem_object *obj) -{ - const struct drm_i915_gem_object_ops *ops = obj->ops; - - if (obj->pages == NULL) - return 0; - - BUG_ON(obj->gtt_space); - - if (obj->pages_pin_count) - return -EBUSY; - - /* ->put_pages might need to allocate memory for the bit17 swizzle - * array, hence protect them from being reaped by removing them from gtt - * lists early. */ - list_del(&obj->global_list); - - ops->put_pages(obj); - obj->pages = NULL; - - if (i915_gem_object_is_purgeable(obj)) - i915_gem_object_truncate(obj); - - return 0; -} - -static long -__i915_gem_shrink(struct drm_i915_private *dev_priv, long target, - bool purgeable_only) -{ - struct drm_i915_gem_object *obj, *next; - long count = 0; - - list_for_each_entry_safe(obj, next, - &dev_priv->mm.unbound_list, - global_list) { - if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && - i915_gem_object_put_pages(obj) == 0) { - count += obj->base.size >> PAGE_SHIFT; - if (count >= target) - return count; - } - } - - list_for_each_entry_safe(obj, next, - &dev_priv->mm.inactive_list, - mm_list) { - if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && - i915_gem_object_unbind(obj) == 0 && - i915_gem_object_put_pages(obj) == 0) { - count += obj->base.size >> PAGE_SHIFT; - if (count >= target) - return count; - } - } - - return count; -} - -static long -i915_gem_purge(struct drm_i915_private *dev_priv, long target) -{ - return __i915_gem_shrink(dev_priv, target, true); -} - -static void -i915_gem_shrink_all(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *obj, *next; - - i915_gem_evict_everything(dev_priv->dev); - - list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, - global_list) - i915_gem_object_put_pages(obj); -} -static int -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int page_count, i; - struct address_space *mapping; - struct sg_table *st; - struct scatterlist *sg; - struct sg_page_iter sg_iter; - struct page *page; - unsigned long last_pfn = 0; /* suppress gcc warning */ - gfp_t gfp; - - /* Assert that the object is not currently in any GPU domain. As it - * wasn't in the GTT, there shouldn't be any way it could have been in - * a GPU cache - */ - BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); - BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - - page_count = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, page_count, GFP_KERNEL)) { - sg_free_table(st); - kfree(st); - return -ENOMEM; - } - - /* Get the list of pages out of our struct file. They'll be pinned - * at this point until we release them. - * - * Fail silently without starting the shrinker - */ - mapping = file_inode(obj->base.filp)->i_mapping; - gfp = mapping_gfp_mask(mapping); - gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; - gfp &= ~(__GFP_IO | __GFP_WAIT); - sg = st->sgl; - st->nents = 0; - for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (IS_ERR(page)) { - i915_gem_purge(dev_priv, page_count); - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - } - if (IS_ERR(page)) { - /* We've tried hard to allocate the memory by reaping - * our own buffer, now let the real VM do its job and - * go down in flames if truly OOM. - */ - gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); - gfp |= __GFP_IO | __GFP_WAIT; - - i915_gem_shrink_all(dev_priv); - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (IS_ERR(page)) - goto err_pages; - - gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; - gfp &= ~(__GFP_IO | __GFP_WAIT); - } -#ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) { - st->nents++; - sg_set_page(sg, page, PAGE_SIZE, 0); - sg = sg_next(sg); - continue; - } -#endif - if (!i || page_to_pfn(page) != last_pfn + 1) { - if (i) - sg = sg_next(sg); - st->nents++; - sg_set_page(sg, page, PAGE_SIZE, 0); - } else { - sg->length += PAGE_SIZE; - } - last_pfn = page_to_pfn(page); - } -#ifdef CONFIG_SWIOTLB - if (!swiotlb_nr_tbl()) -#endif - sg_mark_end(sg); - obj->pages = st; - - if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_do_bit_17_swizzle(obj); - - return 0; - -err_pages: - sg_mark_end(sg); - for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) - page_cache_release(sg_page_iter_page(&sg_iter)); - sg_free_table(st); - kfree(st); - return PTR_ERR(page); -} - -/* Ensure that the associated pages are gathered from the backing storage - * and pinned into our object. i915_gem_object_get_pages() may be called - * multiple times before they are released by a single call to - * i915_gem_object_put_pages() - once the pages are no longer referenced - * either as a result of memory pressure (reaping pages under the shrinker) - * or as the object is itself released. - */ -int -i915_gem_object_get_pages(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - const struct drm_i915_gem_object_ops *ops = obj->ops; - int ret; - - if (obj->pages) - return 0; - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_ERROR("Attempting to obtain a purgeable object\n"); - return -EINVAL; - } - - BUG_ON(obj->pages_pin_count); - - ret = ops->get_pages(obj); - if (ret) - return ret; - - list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - return 0; -} - -void -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - u32 seqno = intel_ring_get_seqno(ring); - - BUG_ON(ring == NULL); - obj->ring = ring; - - /* Add a reference if we're newly entering the active list. */ - if (!obj->active) { - drm_gem_object_reference(&obj->base); - obj->active = 1; - } - - /* Move from whatever list we were on to the tail of execution. */ - list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); - list_move_tail(&obj->ring_list, &ring->active_list); - - obj->last_read_seqno = seqno; - - if (obj->fenced_gpu_access) { - obj->last_fenced_seqno = seqno; - - /* Bump MRU to take account of the delayed flush */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_fence_reg *reg; - - reg = &dev_priv->fence_regs[obj->fence_reg]; - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - } - } -} - -static void -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); - BUG_ON(!obj->active); - - list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - - list_del_init(&obj->ring_list); - obj->ring = NULL; - - obj->last_read_seqno = 0; - obj->last_write_seqno = 0; - obj->base.write_domain = 0; - - obj->last_fenced_seqno = 0; - obj->fenced_gpu_access = false; - - obj->active = 0; - drm_gem_object_unreference(&obj->base); - - WARN_ON(i915_verify_lists(dev)); -} - -static int -i915_gem_init_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int ret, i, j; - - /* Carefully retire all requests without writing to the rings */ - for_each_ring(ring, dev_priv, i) { - ret = intel_ring_idle(ring); - if (ret) - return ret; - } - i915_gem_retire_requests(dev); - - /* Finally reset hw state */ - for_each_ring(ring, dev_priv, i) { - intel_ring_init_seqno(ring, seqno); - - for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) - ring->sync_seqno[j] = 0; - } - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; -} - -int -i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 1; - } - - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; -} - -int __i915_add_request(struct intel_ring_buffer *ring, - struct drm_file *file, - struct drm_i915_gem_object *obj, - u32 *out_seqno) -{ - drm_i915_private_t *dev_priv = ring->dev->dev_private; - struct drm_i915_gem_request *request; - u32 request_ring_position, request_start; - int was_empty; - int ret; - - request_start = intel_ring_get_tail(ring); - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; - - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request_ring_position = intel_ring_get_tail(ring); - - ret = ring->add_request(ring); - if (ret) { - kfree(request); - return ret; - } - - request->seqno = intel_ring_get_seqno(ring); - request->ring = ring; - request->head = request_start; - request->tail = request_ring_position; - request->ctx = ring->last_context; - request->batch_obj = obj; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - - if (request->ctx) - i915_gem_context_reference(request->ctx); - - request->emitted_jiffies = jiffies; - was_empty = list_empty(&ring->request_list); - list_add_tail(&request->list, &ring->request_list); - request->file_priv = NULL; - - if (file) { - struct drm_i915_file_private *file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - request->file_priv = file_priv; - list_add_tail(&request->client_list, - &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - } - - trace_i915_gem_request_add(ring, request->seqno); - ring->outstanding_lazy_request = 0; - - if (!dev_priv->mm.suspended) { - if (i915_enable_hangcheck) { - mod_timer(&dev_priv->gpu_error.hangcheck_timer, - round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); - } - if (was_empty) { - queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - intel_mark_busy(dev_priv->dev); - } - } + /* Pinned buffers may be scanout, so flush the cache */ + i915_gem_object_flush_if_display(obj); + i915_gem_object_put(obj); - if (out_seqno) - *out_seqno = request->seqno; return 0; } -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { - list_del(&request->client_list); - request->file_priv = NULL; - } - spin_unlock(&file_priv->mm.lock); -} - -static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj) -{ - if (acthd >= obj->gtt_offset && - acthd < obj->gtt_offset + obj->base.size) - return true; - - return false; -} - -static bool i915_head_inside_request(const u32 acthd_unmasked, - const u32 request_start, - const u32 request_end) -{ - const u32 acthd = acthd_unmasked & HEAD_ADDR; - - if (request_start < request_end) { - if (acthd >= request_start && acthd < request_end) - return true; - } else if (request_start > request_end) { - if (acthd >= request_start || acthd < request_end) - return true; - } - - return false; -} - -static bool i915_request_guilty(struct drm_i915_gem_request *request, - const u32 acthd, bool *inside) -{ - /* There is a possibility that unmasked head address - * pointing inside the ring, matches the batch_obj address range. - * However this is extremely unlikely. - */ - - if (request->batch_obj) { - if (i915_head_inside_object(acthd, request->batch_obj)) { - *inside = true; - return true; - } - } - - if (i915_head_inside_request(acthd, request->head, request->tail)) { - *inside = false; - return true; - } - - return false; -} - -static void i915_set_reset_status(struct intel_ring_buffer *ring, - struct drm_i915_gem_request *request, - u32 acthd) -{ - struct i915_ctx_hang_stats *hs = NULL; - bool inside, guilty; - - /* Innocent until proven guilty */ - guilty = false; - - if (ring->hangcheck.action != wait && - i915_request_guilty(request, acthd, &inside)) { - DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n", - ring->name, - inside ? "inside" : "flushing", - request->batch_obj ? - request->batch_obj->gtt_offset : 0, - request->ctx ? request->ctx->id : 0, - acthd); - - guilty = true; - } - - /* If contexts are disabled or this is the default context, use - * file_priv->reset_state - */ - if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) - hs = &request->ctx->hang_stats; - else if (request->file_priv) - hs = &request->file_priv->hang_stats; - - if (hs) { - if (guilty) - hs->batch_active++; - else - hs->batch_pending++; - } -} - -static void i915_gem_free_request(struct drm_i915_gem_request *request) -{ - list_del(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->ctx) - i915_gem_context_unreference(request->ctx); - - kfree(request); -} - -static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, - struct intel_ring_buffer *ring) -{ - u32 completed_seqno; - u32 acthd; - - acthd = intel_ring_get_active_head(ring); - completed_seqno = ring->get_seqno(ring, false); - - while (!list_empty(&ring->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&ring->request_list, - struct drm_i915_gem_request, - list); - - if (request->seqno > completed_seqno) - i915_set_reset_status(ring, request, acthd); - - i915_gem_free_request(request); - } - - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - i915_gem_object_move_to_inactive(obj); - } -} - -void i915_gem_restore_fences(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - i915_gem_write_fence(dev, i, reg->obj); - } -} - -void i915_gem_reset(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *obj; - struct intel_ring_buffer *ring; - int i; - - for_each_ring(ring, dev_priv, i) - i915_gem_reset_ring_lists(dev_priv, ring); - - /* Move everything out of the GPU domains to ensure we do any - * necessary invalidation upon reuse. - */ - list_for_each_entry(obj, - &dev_priv->mm.inactive_list, - mm_list) - { - obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; - } - - i915_gem_restore_fences(dev); -} - -/** - * This function clears the request list as sequence numbers are passed. - */ -void -i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) -{ - uint32_t seqno; - - if (list_empty(&ring->request_list)) - return; - - WARN_ON(i915_verify_lists(ring->dev)); - - seqno = ring->get_seqno(ring, true); - - while (!list_empty(&ring->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&ring->request_list, - struct drm_i915_gem_request, - list); - - if (!i915_seqno_passed(seqno, request->seqno)) - break; - - trace_i915_gem_request_retire(ring, request->seqno); - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - */ - ring->last_retired_head = request->tail; - - i915_gem_free_request(request); - } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate. - */ - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - if (!i915_seqno_passed(seqno, obj->last_read_seqno)) - break; - - i915_gem_object_move_to_inactive(obj); - } - - if (unlikely(ring->trace_irq_seqno && - i915_seqno_passed(seqno, ring->trace_irq_seqno))) { - ring->irq_put(ring); - ring->trace_irq_seqno = 0; - } - - WARN_ON(i915_verify_lists(ring->dev)); -} - -void -i915_gem_retire_requests(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int i; - - for_each_ring(ring, dev_priv, i) - i915_gem_retire_requests_ring(ring); -} - -static void -i915_gem_retire_work_handler(struct work_struct *work) +void i915_gem_runtime_suspend(struct drm_i915_private *i915) { - drm_i915_private_t *dev_priv; - struct drm_device *dev; - struct intel_ring_buffer *ring; - bool idle; + struct drm_i915_gem_object *obj, *on; int i; - dev_priv = container_of(work, drm_i915_private_t, - mm.retire_work.work); - dev = dev_priv->dev; - - /* Come back later if the device is busy... */ - if (!mutex_trylock(&dev->struct_mutex)) { - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - return; - } - - i915_gem_retire_requests(dev); - - /* Send a periodic flush down the ring so we don't hold onto GEM - * objects indefinitely. + /* + * Only called during RPM suspend. All users of the userfault_list + * must be holding an RPM wakeref to ensure that this can not + * run concurrently with themselves. */ - idle = true; - for_each_ring(ring, dev_priv, i) { - if (ring->gpu_caches_dirty) - i915_add_request(ring, NULL); - - idle &= list_empty(&ring->request_list); - } - - if (!dev_priv->mm.suspended && !idle) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - if (idle) - intel_mark_idle(dev); - - mutex_unlock(&dev->struct_mutex); -} - -/** - * Ensures that an object will eventually get non-busy by flushing any required - * write domains, emitting any outstanding lazy request and retiring and - * completed requests. - */ -static int -i915_gem_object_flush_active(struct drm_i915_gem_object *obj) -{ - int ret; - - if (obj->active) { - ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); - if (ret) - return ret; - - i915_gem_retire_requests_ring(obj->ring); - } - - return 0; -} - -/** - * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT - * @DRM_IOCTL_ARGS: standard ioctl arguments - * - * Returns 0 if successful, else an error is returned with the remaining time in - * the timeout parameter. - * -ETIME: object is still busy after timeout - * -ERESTARTSYS: signal interrupted the wait - * -ENONENT: object doesn't exist - * Also possible, but rare: - * -EAGAIN: GPU wedged - * -ENOMEM: damn - * -ENODEV: Internal IRQ fail - * -E?: The add request failed - * - * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any - * non-zero timeout parameter the wait ioctl will wait for the given number of - * nanoseconds on an object becoming unbusy. Since the wait itself does so - * without holding struct_mutex the object may become re-busied before this - * function completes. A similar but shorter * race condition exists in the busy - * ioctl - */ -int -i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_wait *args = data; - struct drm_i915_gem_object *obj; - struct intel_ring_buffer *ring = NULL; - struct timespec timeout_stack, *timeout = NULL; - unsigned reset_counter; - u32 seqno = 0; - int ret = 0; - - if (args->timeout_ns >= 0) { - timeout_stack = ns_to_timespec(args->timeout_ns); - timeout = &timeout_stack; - } - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); - if (&obj->base == NULL) { - mutex_unlock(&dev->struct_mutex); - return -ENOENT; - } - /* Need to make sure the object gets inactive eventually. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto out; + list_for_each_entry_safe(obj, on, + &to_gt(i915)->ggtt->userfault_list, userfault_link) + __i915_gem_object_release_mmap_gtt(obj); - if (obj->active) { - seqno = obj->last_read_seqno; - ring = obj->ring; - } - - if (seqno == 0) - goto out; + list_for_each_entry_safe(obj, on, + &i915->runtime_pm.lmem_userfault_list, userfault_link) + i915_gem_object_runtime_pm_release_mmap_offset(obj); - /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a 0 timeout (like busy ioctl) + /* + * The fence will be lost when the device powers down. If any were + * in use by hardware (i.e. they are pinned), we should not be powering + * down! All other fences will be reacquired by the user upon waking. */ - if (!args->timeout_ns) { - ret = -ETIME; - goto out; - } - - drm_gem_object_unreference(&obj->base); - reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - mutex_unlock(&dev->struct_mutex); + for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) { + struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i]; - ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); - if (timeout) - args->timeout_ns = timespec_to_ns(timeout); - return ret; - -out: - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -/** - * i915_gem_object_sync - sync an object to a ring. - * - * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * - * This code is meant to abstract object synchronization with the GPU. - * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. - * - * Returns 0 if successful, else propagates up the lower layer error. - */ -int -i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *to) -{ - struct intel_ring_buffer *from = obj->ring; - u32 seqno; - int ret, idx; - - if (from == NULL || to == from) - return 0; - - if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) - return i915_gem_object_wait_rendering(obj, false); - - idx = intel_ring_sync_index(from, to); - - seqno = obj->last_read_seqno; - if (seqno <= from->sync_seqno[idx]) - return 0; - - ret = i915_gem_check_olr(obj->ring, seqno); - if (ret) - return ret; - - ret = to->sync_to(to, from, seqno); - if (!ret) - /* We use last_read_seqno because sync_to() - * might have just caused seqno wrap under - * the radar. + /* + * Ideally we want to assert that the fence register is not + * live at this point (i.e. that no piece of code will be + * trying to write through fence + GTT, as that both violates + * our tracking of activity and associated locking/barriers, + * but also is illegal given that the hw is powered down). + * + * Previously we used reg->pin_count as a "liveness" indicator. + * That is not sufficient, and we need a more fine-grained + * tool if we want to have a sanity check here. */ - from->sync_seqno[idx] = obj->last_read_seqno; - - return ret; -} - -static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) -{ - u32 old_write_domain, old_read_domains; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) - return; - - /* Wait for any direct GTT access to complete */ - mb(); - - old_read_domains = obj->base.read_domains; - old_write_domain = obj->base.write_domain; - - obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; - obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); -} - -/** - * Unbinds an object from the GTT aperture. - */ -int -i915_gem_object_unbind(struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = obj->base.dev->dev_private; - int ret; - - if (obj->gtt_space == NULL) - return 0; - - if (obj->pin_count) - return -EBUSY; - - BUG_ON(obj->pages == NULL); - - ret = i915_gem_object_finish_gpu(obj); - if (ret) - return ret; - /* Continue on if we fail due to EIO, the GPU is hung so we - * should be safe and we need to cleanup or else we might - * cause memory corruption through use-after-free. - */ - - i915_gem_object_finish_gtt(obj); - - /* release the fence reg _after_ flushing */ - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; - - trace_i915_gem_object_unbind(obj); - - if (obj->has_global_gtt_mapping) - i915_gem_gtt_unbind_object(obj); - if (obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); - obj->has_aliasing_ppgtt_mapping = 0; - } - i915_gem_gtt_finish_object(obj); - i915_gem_object_unpin_pages(obj); - - list_del(&obj->mm_list); - list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); - /* Avoid an unnecessary call to unbind on rebind. */ - obj->map_and_fenceable = true; - - drm_mm_put_block(obj->gtt_space); - obj->gtt_space = NULL; - obj->gtt_offset = 0; - - return 0; -} - -int i915_gpu_idle(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int ret, i; - - /* Flush everything onto the inactive list. */ - for_each_ring(ring, dev_priv, i) { - ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); - if (ret) - return ret; - - ret = intel_ring_idle(ring); - if (ret) - return ret; - } - - return 0; -} - -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int fence_reg; - int fence_pitch_shift; - uint64_t val; - - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg = FENCE_REG_SANDYBRIDGE_0; - fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; - } else { - fence_reg = FENCE_REG_965_0; - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; - } - - if (obj) { - u32 size = obj->gtt_space->size; - - val = (uint64_t)((obj->gtt_offset + size - 4096) & - 0xfffff000) << 32; - val |= obj->gtt_offset & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; - val |= I965_FENCE_REG_VALID; - } else - val = 0; - - fence_reg += reg * 8; - I915_WRITE64(fence_reg, val); - POSTING_READ(fence_reg); -} - -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - u32 val; - - if (obj) { - u32 size = obj->gtt_space->size; - int pitch_val; - int tile_width; - - WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || - (size & -size) != size || - (obj->gtt_offset & (size - 1)), - "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", - obj->gtt_offset, obj->map_and_fenceable, size); - - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) - tile_width = 128; - else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; - pitch_val = ffs(pitch_val) - 1; - - val = obj->gtt_offset; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I915_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - if (reg < 8) - reg = FENCE_REG_830_0 + reg * 4; - else - reg = FENCE_REG_945_8 + (reg - 8) * 4; - - I915_WRITE(reg, val); - POSTING_READ(reg); -} - -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t val; - - if (obj) { - u32 size = obj->gtt_space->size; - uint32_t pitch_val; - - WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || - (size & -size) != size || - (obj->gtt_offset & (size - 1)), - "object 0x%08x not 512K or pot-size 0x%08x aligned\n", - obj->gtt_offset, size); - - pitch_val = obj->stride / 128; - pitch_val = ffs(pitch_val) - 1; - - val = obj->gtt_offset; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I830_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - I915_WRITE(FENCE_REG_830_0 + reg * 4, val); - POSTING_READ(FENCE_REG_830_0 + reg * 4); -} - -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) -{ - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; -} - -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - switch (INTEL_INFO(dev)->gen) { - case 7: - case 6: - case 5: - case 4: i965_write_fence_reg(dev, reg, obj); break; - case 3: i915_write_fence_reg(dev, reg, obj); break; - case 2: i830_write_fence_reg(dev, reg, obj); break; - default: BUG(); - } - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. - */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} - -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; -} - -struct write_fence { - struct drm_device *dev; - struct drm_i915_gem_object *obj; - int fence; -}; - -static void i915_gem_write_fence__ipi(void *data) -{ - struct write_fence *args = data; - - /* Required for SNB+ with LLC */ - wbinvd(); - - /* Required for VLV */ - i915_gem_write_fence(args->dev, args->fence, args->obj); -} - -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct write_fence args = { - .dev = obj->base.dev, - .fence = fence_number(dev_priv, fence), - .obj = enable ? obj : NULL, - }; - - /* In order to fully serialize access to the fenced region and - * the update to the fence register we need to take extreme - * measures on SNB+. In theory, the write to the fence register - * flushes all memory transactions before, and coupled with the - * mb() placed around the register write we serialise all memory - * operations with respect to the changes in the tiler. Yet, on - * SNB+ we need to take a step further and emit an explicit wbinvd() - * on each processor in order to manually flush all memory - * transactions before updating the fence register. - * - * However, Valleyview complicates matter. There the wbinvd is - * insufficient and unlike SNB/IVB requires the serialising - * register write. (Note that that register write by itself is - * conversely not sufficient for SNB+.) To compromise, we do both. - */ - if (INTEL_INFO(args.dev)->gen >= 6) - on_each_cpu(i915_gem_write_fence__ipi, &args, 1); - else - i915_gem_write_fence(args.dev, args.fence, args.obj); - - if (enable) { - obj->fence_reg = args.fence; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); - } -} - -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - if (obj->last_fenced_seqno) { - int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); - if (ret) - return ret; - - obj->last_fenced_seqno = 0; - } - - obj->fenced_gpu_access = false; - return 0; -} - -int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct drm_i915_fence_reg *fence; - int ret; - - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - if (obj->fence_reg == I915_FENCE_REG_NONE) - return 0; - - fence = &dev_priv->fence_regs[obj->fence_reg]; - - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; -} - -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - return NULL; - - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) - continue; - - return reg; - } - - return NULL; -} - -/** - * i915_gem_object_get_fence - set up fencing for an object - * @obj: object to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - */ -int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - bool enable = obj->tiling_mode != I915_TILING_NONE; - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - } - - /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - return 0; - } - } else if (enable) { - reg = i915_find_fence_reg(dev); - if (reg == NULL) - return -EDEADLK; - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } - } else - return 0; - - i915_gem_object_update_fence(obj, reg, enable); - obj->fence_dirty = false; - - return 0; -} - -static bool i915_gem_valid_gtt_space(struct drm_device *dev, - struct drm_mm_node *gtt_space, - unsigned long cache_level) -{ - struct drm_mm_node *other; - - /* On non-LLC machines we have to be careful when putting differing - * types of snoopable memory together to avoid the prefetcher - * crossing memory domains and dying. - */ - if (HAS_LLC(dev)) - return true; - - if (gtt_space == NULL) - return true; - - if (list_empty(>t_space->node_list)) - return true; - - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); - if (other->allocated && !other->hole_follows && other->color != cache_level) - return false; - - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) - return false; - - return true; -} - -static void i915_gem_verify_gtt(struct drm_device *dev) -{ -#if WATCH_GTT - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *obj; - int err = 0; - - list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { - if (obj->gtt_space == NULL) { - printk(KERN_ERR "object found on GTT list with no space reserved\n"); - err++; + if (!reg->vma) continue; - } - - if (obj->cache_level != obj->gtt_space->color) { - printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", - obj->gtt_space->start, - obj->gtt_space->start + obj->gtt_space->size, - obj->cache_level, - obj->gtt_space->color); - err++; - continue; - } - - if (!i915_gem_valid_gtt_space(dev, - obj->gtt_space, - obj->cache_level)) { - printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", - obj->gtt_space->start, - obj->gtt_space->start + obj->gtt_space->size, - obj->cache_level); - err++; - continue; - } - } - - WARN_ON(err); -#endif -} -/** - * Finds free space in the GTT aperture and binds the object there. - */ -static int -i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, - unsigned alignment, - bool map_and_fenceable, - bool nonblocking) -{ - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_mm_node *node; - u32 size, fence_size, fence_alignment, unfenced_alignment; - bool mappable, fenceable; - size_t gtt_max = map_and_fenceable ? - dev_priv->gtt.mappable_end : dev_priv->gtt.total; - int ret; - - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, false); - - if (alignment == 0) - alignment = map_and_fenceable ? fence_alignment : - unfenced_alignment; - if (map_and_fenceable && alignment & (fence_alignment - 1)) { - DRM_ERROR("Invalid object alignment requested %u\n", alignment); - return -EINVAL; + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); + reg->dirty = true; } - - size = map_and_fenceable ? fence_size : obj->base.size; - - /* If the object is bigger than the entire aperture, reject it early - * before evicting everything in a vain attempt to find space. - */ - if (obj->base.size > gtt_max) { - DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", - obj->base.size, - map_and_fenceable ? "mappable" : "total", - gtt_max); - return -E2BIG; - } - - ret = i915_gem_object_get_pages(obj); - if (ret) - return ret; - - i915_gem_object_pin_pages(obj); - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (node == NULL) { - i915_gem_object_unpin_pages(obj); - return -ENOMEM; - } - -search_free: - ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, - size, alignment, - obj->cache_level, 0, gtt_max); - if (ret) { - ret = i915_gem_evict_something(dev, size, alignment, - obj->cache_level, - map_and_fenceable, - nonblocking); - if (ret == 0) - goto search_free; - - i915_gem_object_unpin_pages(obj); - kfree(node); - return ret; - } - if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { - i915_gem_object_unpin_pages(obj); - drm_mm_put_block(node); - return -EINVAL; - } - - ret = i915_gem_gtt_prepare_object(obj); - if (ret) { - i915_gem_object_unpin_pages(obj); - drm_mm_put_block(node); - return ret; - } - - list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - - obj->gtt_space = node; - obj->gtt_offset = node->start; - - fenceable = - node->size == fence_size && - (node->start & (fence_alignment - 1)) == 0; - - mappable = - obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end; - - obj->map_and_fenceable = mappable && fenceable; - - trace_i915_gem_object_bind(obj, map_and_fenceable); - i915_gem_verify_gtt(dev); - return 0; } -void -i915_gem_clflush_object(struct drm_i915_gem_object *obj) +static void discard_ggtt_vma(struct i915_vma *vma) { - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (obj->pages == NULL) - return; - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - */ - if (obj->stolen) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (obj->cache_level != I915_CACHE_NONE) - return; - - trace_i915_gem_object_clflush(obj); - - drm_clflush_sg(obj->pages); -} - -/** Flushes the GTT write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) -{ - uint32_t old_write_domain; - - if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it immediately go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - */ - wmb(); + struct drm_i915_gem_object *obj = vma->obj; - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - old_write_domain); -} - -/** Flushes the CPU write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) -{ - uint32_t old_write_domain; - - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) - return; - - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(obj->base.dev); - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - old_write_domain); -} - -/** - * Moves a single object to the GTT read, and possibly write domain. - * - * This function returns when the move is complete, including waiting on - * flushes to occur. - */ -int -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) -{ - drm_i915_private_t *dev_priv = obj->base.dev->dev_private; - uint32_t old_write_domain, old_read_domains; - int ret; - - /* Not valid to be called on unbound objects. */ - if (obj->gtt_space == NULL) - return -EINVAL; - - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) - return 0; - - ret = i915_gem_object_wait_rendering(obj, !write); - if (ret) - return ret; - - i915_gem_object_flush_cpu_write_domain(obj); - - /* Serialise direct access to this object with the barriers for - * coherent writes from the GPU, by effectively invalidating the - * GTT domain upon first access. - */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) - mb(); - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - - /* It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->base.write_domain = I915_GEM_DOMAIN_GTT; - obj->dirty = 1; + spin_lock(&obj->vma.lock); + if (!RB_EMPTY_NODE(&vma->obj_node)) { + rb_erase(&vma->obj_node, &obj->vma.tree); + RB_CLEAR_NODE(&vma->obj_node); } - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - - /* And bump the LRU for this access */ - if (i915_gem_object_is_inactive(obj)) - list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - - return 0; + spin_unlock(&obj->vma.lock); } -int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) +struct i915_vma * +i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + const struct i915_gtt_view *view, + u64 size, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct i915_vma *vma; int ret; - if (obj->cache_level == cache_level) - return 0; - - if (obj->pin_count) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - - if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { - ret = i915_gem_object_unbind(obj); - if (ret) - return ret; - } + GEM_WARN_ON(!ww); - if (obj->gtt_space) { - ret = i915_gem_object_finish_gpu(obj); - if (ret) - return ret; - - i915_gem_object_finish_gtt(obj); - - /* Before SandyBridge, you could not use tiling or fence - * registers with snooped memory, so relinquish any fences - * currently pointing to our region in the aperture. + if (flags & PIN_MAPPABLE && + (!view || view->type == I915_GTT_VIEW_NORMAL)) { + /* + * If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. */ - if (INTEL_INFO(dev)->gen < 6) { - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; - } - - if (obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, cache_level); - if (obj->has_aliasing_ppgtt_mapping) - i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, - obj, cache_level); - - obj->gtt_space->color = cache_level; - } - - if (cache_level == I915_CACHE_NONE) { - u32 old_read_domains, old_write_domain; + if (obj->base.size > ggtt->mappable_end) + return ERR_PTR(-E2BIG); - /* If we're coming from LLC cached, then we haven't - * actually been tracking whether the data is in the - * CPU cache or not, since we only allow one bit set - * in obj->write_domain and have been skipping the clflushes. - * Just set it to the CPU cache for now. + /* + * If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. */ - WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); - WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); - - old_read_domains = obj->base.read_domains; - old_write_domain = obj->base.write_domain; - - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - } - - obj->cache_level = cache_level; - i915_gem_verify_gtt(dev); - return 0; -} - -int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_caching *args = data; - struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - args->caching = obj->cache_level != I915_CACHE_NONE; - - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_caching *args = data; - struct drm_i915_gem_object *obj; - enum i915_cache_level level; - int ret; - - switch (args->caching) { - case I915_CACHING_NONE: - level = I915_CACHE_NONE; - break; - case I915_CACHING_CACHED: - level = I915_CACHE_LLC; - break; - default: - return -EINVAL; - } - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - ret = i915_gem_object_set_cache_level(obj, level); - - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -/* - * Prepare buffer for display plane (scanout, cursors, etc). - * Can be called from an uninterruptible phase (modesetting) and allows - * any flushes to be pipelined (for pageflips). - */ -int -i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, - u32 alignment, - struct intel_ring_buffer *pipelined) -{ - u32 old_read_domains, old_write_domain; - int ret; - - if (pipelined != obj->ring) { - ret = i915_gem_object_sync(obj, pipelined); - if (ret) - return ret; - } - - /* The display engine is not coherent with the LLC cache on gen6. As - * a result, we make sure that the pinning that is about to occur is - * done with uncached PTEs. This is lowest common denominator for all - * chipsets. - * - * However for gen6+, we could do better by using the GFDT bit instead - * of uncaching, which would allow us to flush all the LLC-cached data - * with that bit in the PTE to main memory with just one PIPE_CONTROL. - */ - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); - if (ret) - return ret; - - /* As the user may map the buffer once pinned in the display plane - * (e.g. libkms for the bootup splash), we have to ensure that we - * always use map_and_fenceable for all scanout buffers. - */ - ret = i915_gem_object_pin(obj, alignment, true, false); - if (ret) - return ret; - - i915_gem_object_flush_cpu_write_domain(obj); - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - - /* It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - obj->base.write_domain = 0; - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - - return 0; -} - -int -i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) -{ - int ret; - - if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - - /* Ensure that we invalidate the GPU's caches and TLBs. */ - obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; - return 0; -} - -/** - * Moves a single object to the CPU read, and possibly write domain. - * - * This function returns when the move is complete, including waiting on - * flushes to occur. - */ -int -i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) -{ - uint32_t old_write_domain, old_read_domains; - int ret; - - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - - ret = i915_gem_object_wait_rendering(obj, !write); - if (ret) - return ret; - - i915_gem_object_flush_gtt_write_domain(obj); - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - - /* Flush the CPU cache if it's still invalid. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj); - - obj->base.read_domains |= I915_GEM_DOMAIN_CPU; - } - - /* It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); - - /* If we're writing through the CPU, then the GPU read domains will - * need to be invalidated at next use. - */ - if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - - return 0; -} - -/* Throttle our rendering by waiting until the ring has completed our requests - * emitted over 20 msec ago. - * - * Note that if we were to use the current jiffies each time around the loop, - * we wouldn't escape the function with any frames outstanding if the time to - * render a frame was over 20ms. - * - * This should get us reasonable parallelism between CPU and GPU but also - * relatively low latency when blocking on a particular request to finish. - */ -static int -i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = jiffies - msecs_to_jiffies(20); - struct drm_i915_gem_request *request; - struct intel_ring_buffer *ring = NULL; - unsigned reset_counter; - u32 seqno = 0; - int ret; - - ret = i915_gem_wait_for_error(&dev_priv->gpu_error); - if (ret) - return ret; - - ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); - if (ret) - return ret; - - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { - if (time_after_eq(request->emitted_jiffies, recent_enough)) - break; - - ring = request->ring; - seqno = request->seqno; - } - reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - spin_unlock(&file_priv->mm.lock); - - if (seqno == 0) - return 0; - - ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); - if (ret == 0) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); - - return ret; -} - -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - uint32_t alignment, - bool map_and_fenceable, - bool nonblocking) -{ - int ret; - - if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - - if (obj->gtt_space != NULL) { - if ((alignment && obj->gtt_offset & (alignment - 1)) || - (map_and_fenceable && !obj->map_and_fenceable)) { - WARN(obj->pin_count, - "bo is already pinned with incorrect alignment:" - " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - obj->gtt_offset, alignment, - map_and_fenceable, - obj->map_and_fenceable); - ret = i915_gem_object_unbind(obj); - if (ret) - return ret; + if (flags & PIN_NONBLOCK && + obj->base.size > ggtt->mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + +new_vma: + vma = i915_vma_instance(obj, &ggtt->vm, view); + if (IS_ERR(vma)) + return vma; + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); + + /* + * If this misplaced vma is too big (i.e, at-least + * half the size of aperture) or hasn't been pinned + * mappable before, we ignore the misplacement when + * PIN_NONBLOCK is set in order to avoid the ping-pong + * issue described above. In other words, we try to + * avoid the costly operation of unbinding this vma + * from the GGTT and rebinding it back because there + * may not be enough space for this vma in the aperture. + */ + if (flags & PIN_MAPPABLE && + (vma->fence_size > ggtt->mappable_end / 2 || + !i915_vma_is_map_and_fenceable(vma))) + return ERR_PTR(-ENOSPC); } - } - - if (obj->gtt_space == NULL) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - - ret = i915_gem_object_bind_to_gtt(obj, alignment, - map_and_fenceable, - nonblocking); - if (ret) - return ret; - - if (!dev_priv->mm.aliasing_ppgtt) - i915_gem_gtt_bind_object(obj, obj->cache_level); - } - - if (!obj->has_global_gtt_mapping && map_and_fenceable) - i915_gem_gtt_bind_object(obj, obj->cache_level); - - obj->pin_count++; - obj->pin_mappable |= map_and_fenceable; - - return 0; -} - -void -i915_gem_object_unpin(struct drm_i915_gem_object *obj) -{ - BUG_ON(obj->pin_count == 0); - BUG_ON(obj->gtt_space == NULL); - - if (--obj->pin_count == 0) - obj->pin_mappable = false; -} - -int -i915_gem_pin_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_pin *args = data; - struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_ERROR("Attempting to pin a purgeable buffer\n"); - ret = -EINVAL; - goto out; - } - if (obj->pin_filp != NULL && obj->pin_filp != file) { - DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", - args->handle); - ret = -EINVAL; - goto out; - } + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { + discard_ggtt_vma(vma); + goto new_vma; + } - if (obj->user_pin_count == 0) { - ret = i915_gem_object_pin(obj, args->alignment, true, false); + ret = i915_vma_unbind(vma); if (ret) - goto out; + return ERR_PTR(ret); } - obj->user_pin_count++; - obj->pin_filp = file; + ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); - /* XXX - flush the CPU caches for pinned objects - * as the X server doesn't manage domains yet - */ - i915_gem_object_flush_cpu_write_domain(obj); - args->offset = obj->gtt_offset; -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int -i915_gem_unpin_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_pin *args = data; - struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + return ERR_PTR(ret); - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; + if (vma->fence && !i915_gem_object_is_tiled(obj)) { + mutex_lock(&ggtt->vm.mutex); + i915_vma_revoke_fence(vma); + mutex_unlock(&ggtt->vm.mutex); } - if (obj->pin_filp != file) { - DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", - args->handle); - ret = -EINVAL; - goto out; - } - obj->user_pin_count--; - if (obj->user_pin_count == 0) { - obj->pin_filp = NULL; - i915_gem_object_unpin(obj); + ret = i915_vma_wait_for_bind(vma); + if (ret) { + i915_vma_unpin(vma); + return ERR_PTR(ret); } -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + return vma; } -int -i915_gem_busy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +struct i915_vma * __must_check +i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, + const struct i915_gtt_view *view, + u64 size, u64 alignment, u64 flags) { - struct drm_i915_gem_busy *args = data; - struct drm_i915_gem_object *obj; - int ret; + struct i915_gem_ww_ctx ww; + struct i915_vma *ret; + int err; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions, therefore emit any - * necessary flushes here. - */ - ret = i915_gem_object_flush_active(obj); + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; - args->busy = obj->active; - if (obj->ring) { - BUILD_BUG_ON(I915_NUM_RINGS > 16); - args->busy |= intel_ring_flag(obj->ring) << 16; + ret = i915_gem_object_ggtt_pin_ww(obj, &ww, view, size, + alignment, flags); + if (IS_ERR(ret)) + err = PTR_ERR(ret); } - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int -i915_gem_throttle_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return i915_gem_ring_throttle(dev, file_priv); + return err ? ERR_PTR(err) : ret; } int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_madvise *args = data; struct drm_i915_gem_object *obj; - int ret; + int err; switch (args->madv) { case I915_MADV_DONTNEED: @@ -3827,795 +1042,314 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file_priv, args->handle); + if (!obj) + return -ENOENT; - if (obj->pin_count) { - ret = -EINVAL; + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) goto out; - } - - if (obj->madv != __I915_MADV_PURGED) - obj->madv = args->madv; - - /* if the object is no longer attached, discard its backing storage */ - if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) - i915_gem_object_truncate(obj); - - args->retained = obj->madv != __I915_MADV_PURGED; - -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; -} - -void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops) -{ - INIT_LIST_HEAD(&obj->mm_list); - INIT_LIST_HEAD(&obj->global_list); - INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); - - obj->ops = ops; - obj->fence_reg = I915_FENCE_REG_NONE; - obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; - - i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); -} - -static const struct drm_i915_gem_object_ops i915_gem_object_ops = { - .get_pages = i915_gem_object_get_pages_gtt, - .put_pages = i915_gem_object_put_pages_gtt, -}; - -struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, - size_t size) -{ - struct drm_i915_gem_object *obj; - struct address_space *mapping; - gfp_t mask; - - obj = i915_gem_object_alloc(dev); - if (obj == NULL) - return NULL; - - if (drm_gem_object_init(dev, &obj->base, size) != 0) { - i915_gem_object_free(obj); - return NULL; + if (i915_gem_object_has_pages(obj) && + i915_gem_object_is_tiled(obj) && + i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES) { + if (obj->mm.madv == I915_MADV_WILLNEED) { + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_clear_tiling_quirk(obj); + i915_gem_object_make_shrinkable(obj); + } + if (args->madv == I915_MADV_WILLNEED) { + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_make_unshrinkable(obj); + i915_gem_object_set_tiling_quirk(obj); + } } - mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; - if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { - /* 965gm cannot relocate objects above 4GiB. */ - mask &= ~__GFP_HIGHMEM; - mask |= __GFP_DMA32; + if (obj->mm.madv != __I915_MADV_PURGED) { + obj->mm.madv = args->madv; + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); } - mapping = file_inode(obj->base.filp)->i_mapping; - mapping_set_gfp_mask(mapping, mask); - - i915_gem_object_init(obj, &i915_gem_object_ops); - - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - - if (HAS_LLC(dev)) { - /* On some devices, we can have the GPU use the LLC (the CPU - * cache) for about a 10% performance improvement - * compared to uncached. Graphics requests other than - * display scanout are coherent with the CPU in - * accessing this cache. This means in this mode we - * don't need to clflush on the CPU side, and on the - * GPU side we only need to flush internal caches to - * get data visible to the CPU. - * - * However, we maintain the display planes as UC, and so - * need to rebind when first used as such. - */ - obj->cache_level = I915_CACHE_LLC; - } else - obj->cache_level = I915_CACHE_NONE; - - return obj; -} - -int i915_gem_init_object(struct drm_gem_object *obj) -{ - BUG(); - - return 0; -} - -void i915_gem_free_object(struct drm_gem_object *gem_obj) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - - trace_i915_gem_object_destroy(obj); + if (i915_gem_object_has_pages(obj) || + i915_gem_object_has_self_managed_shrink_list(obj)) { + unsigned long flags; - if (obj->phys_obj) - i915_gem_detach_phys_object(dev, obj); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!list_empty(&obj->mm.link)) { + struct list_head *list; - obj->pin_count = 0; - if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { - bool was_interruptible; + if (obj->mm.madv != I915_MADV_WILLNEED) + list = &i915->mm.purge_list; + else + list = &i915->mm.shrink_list; + list_move_tail(&obj->mm.link, list); - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - WARN_ON(i915_gem_object_unbind(obj)); - - dev_priv->mm.interruptible = was_interruptible; + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } - /* Stolen objects don't hold a ref, but do hold pin count. Fix that up - * before progressing. */ - if (obj->stolen) - i915_gem_object_unpin_pages(obj); - - if (WARN_ON(obj->pages_pin_count)) - obj->pages_pin_count = 0; - i915_gem_object_put_pages(obj); - i915_gem_object_free_mmap_offset(obj); - i915_gem_object_release_stolen(obj); - - BUG_ON(obj->pages); - - if (obj->base.import_attach) - drm_prime_gem_destroy(&obj->base, NULL); + /* if the object is no longer attached, discard its backing storage */ + if (obj->mm.madv == I915_MADV_DONTNEED && + !i915_gem_object_has_pages(obj)) + i915_gem_object_truncate(obj); - drm_gem_object_release(&obj->base); - i915_gem_info_remove_obj(dev_priv, obj->base.size); + args->retained = obj->mm.madv != __I915_MADV_PURGED; - kfree(obj->bit_17); - i915_gem_object_free(obj); + i915_gem_object_unlock(obj); +out: + i915_gem_object_put(obj); + return err; } -int -i915_gem_idle(struct drm_device *dev) +/* + * A single pass should suffice to release all the freed objects (along most + * call paths), but be a little more paranoid in that freeing the objects does + * take a little amount of time, during which the rcu callbacks could have added + * new objects into the freed list, and armed the work again. + */ +void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { - drm_i915_private_t *dev_priv = dev->dev_private; - int ret; - - mutex_lock(&dev->struct_mutex); - - if (dev_priv->mm.suspended) { - mutex_unlock(&dev->struct_mutex); - return 0; - } - - ret = i915_gpu_idle(dev); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return ret; + while (atomic_read(&i915->mm.free_count)) { + flush_work(&i915->mm.free_work); + drain_workqueue(i915->bdev.wq); + rcu_barrier(); } - i915_gem_retire_requests(dev); - - /* Under UMS, be paranoid and evict. */ - if (!drm_core_check_feature(dev, DRIVER_MODESET)) - i915_gem_evict_everything(dev); - - /* Hack! Don't let anybody do execbuf while we don't control the chip. - * We need to replace this with a semaphore, or something. - * And not confound mm.suspended! - */ - dev_priv->mm.suspended = 1; - del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); - - i915_kernel_lost_context(dev); - i915_gem_cleanup_ringbuffer(dev); - - mutex_unlock(&dev->struct_mutex); - - /* Cancel the retire work handler, which should be idle now. */ - cancel_delayed_work_sync(&dev_priv->mm.retire_work); - - return 0; } -void i915_gem_l3_remap(struct drm_device *dev) +/* + * Similar to objects above (see i915_gem_drain_freed-objects), in general we + * have workers that are armed by RCU and then rearm themselves in their + * callbacks. To be paranoid, we need to drain the workqueue a second time after + * waiting for the RCU grace period so that we catch work queued via RCU from + * the first pass. As neither drain_workqueue() nor flush_workqueue() report a + * result, we make an assumption that we only don't require more than 3 passes + * to catch all _recursive_ RCU delayed work. + */ +void i915_gem_drain_workqueue(struct drm_i915_private *i915) { - drm_i915_private_t *dev_priv = dev->dev_private; - u32 misccpctl; int i; - if (!HAS_L3_GPU_CACHE(dev)) - return; - - if (!dev_priv->l3_parity.remap_info) - return; - - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - POSTING_READ(GEN7_MISCCPCTL); - - for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + for (i = 0; i < 3; i++) { + flush_workqueue(i915->wq); + rcu_barrier(); + i915_gem_drain_freed_objects(i915); } - /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); - - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + drain_workqueue(i915->wq); } -void i915_gem_init_swizzling(struct drm_device *dev) +int i915_gem_init(struct drm_i915_private *dev_priv) { - drm_i915_private_t *dev_priv = dev->dev_private; - - if (INTEL_INFO(dev)->gen < 5 || - dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) - return; + struct intel_gt *gt; + unsigned int i; + int ret; - I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | - DISP_TILE_SURFACE_SWIZZLING); + /* + * In the process of replacing cache_level with pat_index a tricky + * dependency is created on the definition of the enum i915_cache_level. + * In case this enum is changed, PTE encode would be broken. + * Add a WARNING here. And remove when we completely quit using this + * enum. + */ + BUILD_BUG_ON(I915_CACHE_NONE != 0 || + I915_CACHE_LLC != 1 || + I915_CACHE_L3_LLC != 2 || + I915_CACHE_WT != 3 || + I915_MAX_CACHE_LEVEL != 4); - if (IS_GEN5(dev)) - return; + /* We need to fallback to 4K pages if host doesn't support huge gtt. */ + if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) + RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); - if (IS_GEN6(dev)) - I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); - else if (IS_GEN7(dev)) - I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); - else - BUG(); -} - -static bool -intel_enable_blt(struct drm_device *dev) -{ - if (!HAS_BLT(dev)) - return false; - - /* The blitter was dysfunctional on early prototypes */ - if (IS_GEN6(dev) && dev->pdev->revision < 8) { - DRM_INFO("BLT not supported on this pre-production hardware;" - " graphics performance will be degraded.\n"); - return false; + for_each_gt(gt, dev_priv, i) { + intel_uc_fetch_firmwares(>->uc); + intel_wopcm_init(>->wopcm); + if (GRAPHICS_VER(dev_priv) >= 8) + setup_private_pat(gt); } - return true; -} - -static int i915_gem_init_rings(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - ret = intel_init_render_ring_buffer(dev); - if (ret) - return ret; - - if (HAS_BSD(dev)) { - ret = intel_init_bsd_ring_buffer(dev); - if (ret) - goto cleanup_render_ring; + ret = i915_init_ggtt(dev_priv); + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_unlock; } - if (intel_enable_blt(dev)) { - ret = intel_init_blt_ring_buffer(dev); - if (ret) - goto cleanup_bsd_ring; - } + /* + * Despite its name intel_clock_gating_init applies both display + * clock gating workarounds; GT mmio workarounds and the occasional + * GT power context workaround. Worse, sometimes it includes a context + * register workaround which we need to apply before we record the + * default HW state for all contexts. + * + * FIXME: break up the workarounds and apply them at the right time! + */ + intel_clock_gating_init(dev_priv); - if (HAS_VEBOX(dev)) { - ret = intel_init_vebox_ring_buffer(dev); + for_each_gt(gt, dev_priv, i) { + ret = intel_gt_init(gt); if (ret) - goto cleanup_blt_ring; + goto err_unlock; } - - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_vebox_ring; + /* + * Register engines early to ensure the engine list is in its final + * rb-tree form, lowering the amount of code that has to deal with + * the intermediate llist state. + */ + intel_engines_driver_register(dev_priv); return 0; -cleanup_vebox_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); -cleanup_blt_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); -cleanup_bsd_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); -cleanup_render_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); - - return ret; -} - -int -i915_gem_init_hw(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int ret; - - if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) - return -EIO; - - if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) - I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); - - if (HAS_PCH_NOP(dev)) { - u32 temp = I915_READ(GEN7_MSG_CTL); - temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); - I915_WRITE(GEN7_MSG_CTL, temp); - } - - i915_gem_l3_remap(dev); - - i915_gem_init_swizzling(dev); - - ret = i915_gem_init_rings(dev); - if (ret) - return ret; - /* - * XXX: There was some w/a described somewhere suggesting loading - * contexts before PPGTT. + * Unwinding is complicated by that we want to handle -EIO to mean + * disable GPU submission but keep KMS alive. We want to mark the + * HW as irrevisibly wedged, but keep enough state around that the + * driver doesn't explode during runtime. */ - i915_gem_context_init(dev); - if (dev_priv->mm.aliasing_ppgtt) { - ret = dev_priv->mm.aliasing_ppgtt->enable(dev); - if (ret) { - i915_gem_cleanup_aliasing_ppgtt(dev); - DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); +err_unlock: + i915_gem_drain_workqueue(dev_priv); + + if (ret != -EIO) { + for_each_gt(gt, dev_priv, i) { + intel_gt_driver_remove(gt); + intel_gt_driver_release(gt); + intel_uc_cleanup_firmwares(>->uc); } } - return 0; -} - -int i915_gem_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - mutex_lock(&dev->struct_mutex); - - if (IS_VALLEYVIEW(dev)) { - /* VLVA0 (potential hack), BIOS isn't actually waking us */ - I915_WRITE(VLV_GTLC_WAKE_CTRL, 1); - if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10)) - DRM_DEBUG_DRIVER("allow wake ack timed out\n"); - } - - i915_gem_init_global_gtt(dev); - - ret = i915_gem_init_hw(dev); - mutex_unlock(&dev->struct_mutex); - if (ret) { - i915_gem_cleanup_aliasing_ppgtt(dev); - return ret; - } - - /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ - if (!drm_core_check_feature(dev, DRIVER_MODESET)) - dev_priv->dri1.allow_batchbuffer = 1; - return 0; -} - -void -i915_gem_cleanup_ringbuffer(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int i; - - for_each_ring(ring, dev_priv, i) - intel_cleanup_ring_buffer(ring); -} - -int -i915_gem_entervt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int ret; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return 0; - - if (i915_reset_in_progress(&dev_priv->gpu_error)) { - DRM_ERROR("Reenabling wedged hardware, good luck\n"); - atomic_set(&dev_priv->gpu_error.reset_counter, 0); - } - - mutex_lock(&dev->struct_mutex); - dev_priv->mm.suspended = 0; + if (ret == -EIO) { + /* + * Allow engines or uC initialisation to fail by marking the GPU + * as wedged. But we only want to do this when the GPU is angry, + * for all other failure, such as an allocation failure, bail. + */ + for_each_gt(gt, dev_priv, i) { + if (!intel_gt_is_wedged(gt)) { + i915_probe_error(dev_priv, + "Failed to initialize GPU, declaring it wedged!\n"); + intel_gt_set_wedged(gt); + } + } - ret = i915_gem_init_hw(dev); - if (ret != 0) { - mutex_unlock(&dev->struct_mutex); - return ret; + /* Minimal basic recovery for KMS */ + ret = i915_ggtt_enable_hw(dev_priv); + i915_ggtt_resume(to_gt(dev_priv)->ggtt); + intel_clock_gating_init(dev_priv); } - BUG_ON(!list_empty(&dev_priv->mm.active_list)); - mutex_unlock(&dev->struct_mutex); - - ret = drm_irq_install(dev); - if (ret) - goto cleanup_ringbuffer; - - return 0; - -cleanup_ringbuffer: - mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); - dev_priv->mm.suspended = 1; - mutex_unlock(&dev->struct_mutex); + i915_gem_drain_freed_objects(dev_priv); return ret; } -int -i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) +void i915_gem_driver_register(struct drm_i915_private *i915) { - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return 0; - - drm_irq_uninstall(dev); - return i915_gem_idle(dev); + i915_gem_driver_register__shrinker(i915); } -void -i915_gem_lastclose(struct drm_device *dev) +void i915_gem_driver_unregister(struct drm_i915_private *i915) { - int ret; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return; - - ret = i915_gem_idle(dev); - if (ret) - DRM_ERROR("failed to idle hardware: %d\n", ret); + i915_gem_driver_unregister__shrinker(i915); } -static void -init_ring_lists(struct intel_ring_buffer *ring) +void i915_gem_driver_remove(struct drm_i915_private *dev_priv) { - INIT_LIST_HEAD(&ring->active_list); - INIT_LIST_HEAD(&ring->request_list); -} + struct intel_gt *gt; + unsigned int i; -void -i915_gem_load(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int i; - - dev_priv->slab = - kmem_cache_create("i915_gem_object", - sizeof(struct drm_i915_gem_object), 0, - SLAB_HWCACHE_ALIGN, - NULL); - - INIT_LIST_HEAD(&dev_priv->mm.active_list); - INIT_LIST_HEAD(&dev_priv->mm.inactive_list); - INIT_LIST_HEAD(&dev_priv->mm.unbound_list); - INIT_LIST_HEAD(&dev_priv->mm.bound_list); - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - for (i = 0; i < I915_NUM_RINGS; i++) - init_ring_lists(&dev_priv->ring[i]); - for (i = 0; i < I915_MAX_NUM_FENCES; i++) - INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); - INIT_DELAYED_WORK(&dev_priv->mm.retire_work, - i915_gem_retire_work_handler); - init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - - /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ - if (IS_GEN3(dev)) { - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); - } - - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - - /* Old X drivers will take 0-2 for front, back, depth buffers */ - if (!drm_core_check_feature(dev, DRIVER_MODESET)) - dev_priv->fence_reg_start = 3; - - if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) - dev_priv->num_fence_regs = 32; - else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) - dev_priv->num_fence_regs = 16; - else - dev_priv->num_fence_regs = 8; - - /* Initialize fence registers to zero */ - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - i915_gem_restore_fences(dev); + i915_gem_suspend_late(dev_priv); + for_each_gt(gt, dev_priv, i) + intel_gt_driver_remove(gt); + dev_priv->uabi_engines = RB_ROOT; - i915_gem_detect_bit_6_swizzle(dev); - init_waitqueue_head(&dev_priv->pending_flip_queue); - - dev_priv->mm.interruptible = true; - - dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; - dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&dev_priv->mm.inactive_shrinker); + /* Flush any outstanding unpin_work. */ + i915_gem_drain_workqueue(dev_priv); } -/* - * Create a physically contiguous memory object for this object - * e.g. for cursor + overlay regs - */ -static int i915_gem_init_phys_object(struct drm_device *dev, - int id, int size, int align) +void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_phys_object *phys_obj; - int ret; + struct intel_gt *gt; + unsigned int i; - if (dev_priv->mm.phys_objs[id - 1] || !size) - return 0; - - phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); - if (!phys_obj) - return -ENOMEM; - - phys_obj->id = id; - - phys_obj->handle = drm_pci_alloc(dev, size, align); - if (!phys_obj->handle) { - ret = -ENOMEM; - goto kfree_obj; + for_each_gt(gt, dev_priv, i) { + intel_gt_driver_release(gt); + intel_uc_cleanup_firmwares(>->uc); } -#ifdef CONFIG_X86 - set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); -#endif - dev_priv->mm.phys_objs[id - 1] = phys_obj; + /* Flush any outstanding work, including i915_gem_context.release_work. */ + i915_gem_drain_workqueue(dev_priv); - return 0; -kfree_obj: - kfree(phys_obj); - return ret; + drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); } -static void i915_gem_free_phys_object(struct drm_device *dev, int id) +static void i915_gem_init__mm(struct drm_i915_private *i915) { - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_phys_object *phys_obj; + spin_lock_init(&i915->mm.obj_lock); - if (!dev_priv->mm.phys_objs[id - 1]) - return; + init_llist_head(&i915->mm.free_list); - phys_obj = dev_priv->mm.phys_objs[id - 1]; - if (phys_obj->cur_obj) { - i915_gem_detach_phys_object(dev, phys_obj->cur_obj); - } + INIT_LIST_HEAD(&i915->mm.purge_list); + INIT_LIST_HEAD(&i915->mm.shrink_list); -#ifdef CONFIG_X86 - set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); -#endif - drm_pci_free(dev, phys_obj->handle); - kfree(phys_obj); - dev_priv->mm.phys_objs[id - 1] = NULL; + i915_gem_init__objects(i915); } -void i915_gem_free_all_phys_object(struct drm_device *dev) +void i915_gem_init_early(struct drm_i915_private *dev_priv) { - int i; + i915_gem_init__mm(dev_priv); + i915_gem_init__contexts(dev_priv); - for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) - i915_gem_free_phys_object(dev, i); + spin_lock_init(&dev_priv->frontbuffer_lock); } -void i915_gem_detach_phys_object(struct drm_device *dev, - struct drm_i915_gem_object *obj) +void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; - char *vaddr; - int i; - int page_count; - - if (!obj->phys_obj) - return; - vaddr = obj->phys_obj->handle->vaddr; - - page_count = obj->base.size / PAGE_SIZE; - for (i = 0; i < page_count; i++) { - struct page *page = shmem_read_mapping_page(mapping, i); - if (!IS_ERR(page)) { - char *dst = kmap_atomic(page); - memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); - kunmap_atomic(dst); - - drm_clflush_pages(&page, 1); - - set_page_dirty(page); - mark_page_accessed(page); - page_cache_release(page); - } - } - i915_gem_chipset_flush(dev); - - obj->phys_obj->cur_obj = NULL; - obj->phys_obj = NULL; + i915_gem_drain_workqueue(dev_priv); + GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); + GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); + drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); } -int -i915_gem_attach_phys_object(struct drm_device *dev, - struct drm_i915_gem_object *obj, - int id, - int align) +int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; - drm_i915_private_t *dev_priv = dev->dev_private; - int ret = 0; - int page_count; - int i; - - if (id > I915_MAX_PHYS_OBJECT) - return -EINVAL; + struct drm_i915_file_private *file_priv; + struct i915_drm_client *client; + int ret = -ENOMEM; - if (obj->phys_obj) { - if (obj->phys_obj->id == id) - return 0; - i915_gem_detach_phys_object(dev, obj); - } + drm_dbg(&i915->drm, "\n"); - /* create a new object */ - if (!dev_priv->mm.phys_objs[id - 1]) { - ret = i915_gem_init_phys_object(dev, id, - obj->base.size, align); - if (ret) { - DRM_ERROR("failed to init phys object %d size: %zu\n", - id, obj->base.size); - return ret; - } - } - - /* bind to the object */ - obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; - obj->phys_obj->cur_obj = obj; - - page_count = obj->base.size / PAGE_SIZE; - - for (i = 0; i < page_count; i++) { - struct page *page; - char *dst, *src; - - page = shmem_read_mapping_page(mapping, i); - if (IS_ERR(page)) - return PTR_ERR(page); - - src = kmap_atomic(page); - dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); - memcpy(dst, src, PAGE_SIZE); - kunmap_atomic(src); - - mark_page_accessed(page); - page_cache_release(page); - } + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) + goto err_alloc; - return 0; -} + client = i915_drm_client_alloc(); + if (!client) + goto err_client; -static int -i915_gem_phys_pwrite(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file_priv) -{ - void *vaddr = obj->phys_obj->handle->vaddr + args->offset; - char __user *user_data = to_user_ptr(args->data_ptr); + file->driver_priv = file_priv; + file_priv->i915 = i915; + file_priv->file = file; + file_priv->client = client; - if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { - unsigned long unwritten; + file_priv->bsd_engine = -1; + file_priv->hang_timestamp = jiffies; - /* The physical object once assigned is fixed for the lifetime - * of the obj, so we can safely drop the lock and continue - * to access vaddr. - */ - mutex_unlock(&dev->struct_mutex); - unwritten = copy_from_user(vaddr, user_data, args->size); - mutex_lock(&dev->struct_mutex); - if (unwritten) - return -EFAULT; - } + ret = i915_gem_context_open(i915, file); + if (ret) + goto err_context; - i915_gem_chipset_flush(dev); return 0; -} -void i915_gem_release(struct drm_device *dev, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - /* Clean up our request list when the client is going away, so that - * later retire_requests won't dereference our soon-to-be-gone - * file_priv. - */ - spin_lock(&file_priv->mm.lock); - while (!list_empty(&file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - list_del(&request->client_list); - request->file_priv = NULL; - } - spin_unlock(&file_priv->mm.lock); +err_context: + i915_drm_client_put(client); +err_client: + kfree(file_priv); +err_alloc: + return ret; } -static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) -{ - if (!mutex_is_locked(mutex)) - return false; - -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) - return mutex->owner == task; -#else - /* Since UP may be pre-empted, we cannot assume that we own the lock */ - return false; +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gem_device.c" +#include "selftests/i915_gem.c" #endif -} - -static int -i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct drm_i915_private *dev_priv = - container_of(shrinker, - struct drm_i915_private, - mm.inactive_shrinker); - struct drm_device *dev = dev_priv->dev; - struct drm_i915_gem_object *obj; - int nr_to_scan = sc->nr_to_scan; - bool unlock = true; - int cnt; - - if (!mutex_trylock(&dev->struct_mutex)) { - if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return 0; - - if (dev_priv->mm.shrinker_no_lock_stealing) - return 0; - - unlock = false; - } - - if (nr_to_scan) { - nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); - if (nr_to_scan > 0) - nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, - false); - if (nr_to_scan > 0) - i915_gem_shrink_all(dev_priv); - } - - cnt = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) - if (obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; - list_for_each_entry(obj, &dev_priv->mm.inactive_list, global_list) - if (obj->pin_count == 0 && obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; - - if (unlock) - mutex_unlock(&dev->struct_mutex); - return cnt; -} |
