/* * SPDX-License-Identifier: MIT * * Copyright © 2014-2016 Intel Corporation */ #include #include #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_trace.h" #include "i915_vma.h" static inline bool __vma_matches(struct vm_area_struct *vma, struct file *filp, unsigned long addr, unsigned long size) { if (vma->vm_file != filp) return false; return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); } /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. * @dev: drm device * @data: ioctl data blob * @file: drm file * * While the mapping holds a reference on the contents of the object, it doesn't * imply a ref on the object itself. * * IMPORTANT: * * DRM driver writers who look a this function as an example for how to do GEM * mmap support, please don't implement mmap support like here. The modern way * to implement DRM mmap support is with an mmap offset ioctl (like * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. * That way debug tooling like valgrind will understand what's going on, hiding * the mmap call in a driver private ioctl will break that. The i915 driver only * does cpu mmaps this way because we didn't know better. */ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap *args = data; struct drm_i915_gem_object *obj; unsigned long addr; if (args->flags & ~(I915_MMAP_WC)) return -EINVAL; if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) return -ENODEV; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* prime objects have no backing filp to GEM mmap * pages from. */ if (!obj->base.filp) { addr = -ENXIO; goto err; } if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { addr = -EINVAL; goto err; } addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); if (IS_ERR_VALUE(addr)) goto err; if (args->flags & I915_MMAP_WC) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { addr = -EINTR; goto err; } vma = find_vma(mm, addr); if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else addr = -ENOMEM; up_write(&mm->mmap_sem); if (IS_ERR_VALUE(addr)) goto err; } i915_gem_object_put(obj); args->addr_ptr = (u64)addr; return 0; err: i915_gem_object_put(obj); return addr; } static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) { return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; } /** * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps * * A history of the GTT mmap interface: * * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to * aligned and suitable for fencing, and still fit into the available * mappable space left by the pinned display objects. A classic problem * we called the page-fault-of-doom where we would ping-pong between * two objects that could not fit inside the GTT and so the memcpy * would page one object in at the expense of the other between every * single byte. * * 1 - Objects can be any size, and have any compatible fencing (X Y, or none * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the * object is too large for the available space (or simply too large * for the mappable aperture!), a view is created instead and faulted * into userspace. (This view is aligned and sized appropriately for * fenced access.) * * 2 - Recognise WC as a separate cache domain so that we can flush the * delayed writes via GTT before performing direct access via WC. * * 3 - Remove implicit set-domain(GTT) and synchronisation on initial * pagefault; swapin remains transparent. * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine * hangs on some architectures, corruption on others. An attempt to service * a GTT page fault from a snoopable object will generate a SIGBUS. * * * the object must be able to fit into RAM (physical memory, though no * limited to the mappable aperture). * * * Caveats: * * * a new GTT page fault will synchronize rendering from the GPU and flush * all data to system memory. Subsequent access will not be synchronized. * * * all mappings are revoked on runtime device suspend. * * * there are only 8, 16 or 32 fence registers to share between all users * (older machines require fence register for display and blitter access * as well). Contention of the fence registers will cause the previous users * to be unmapped and any new access will generate new page faults. * * * running out of memory while servicing a fault may generate a SIGBUS, * rather than the expected SIGSEGV. */ int i915_gem_mmap_gtt_version(void) { return 3; } static inline struct i915_ggtt_view compute_partial_view(const struct drm_i915_gem_object *obj, pgoff_t page_offset, unsigned int chunk) { struct i915_ggtt_view view; if (i915_gem_object_is_tiled(obj)) chunk = roundup(chunk, tile_row_pages(obj)); view.type = I915_GGTT_VIEW_PARTIAL; view.partial.offset = rounddown(page_offset, chunk); view.partial.size = min_t(unsigned int, chunk, (obj->base.size >> PAGE_SHIFT) - view.partial.offset); /* If the partial covers the entire object, just create a normal VMA. */ if (chunk >= obj->base.size >> PAGE_SHIFT) view.type = I915_GGTT_VIEW_NORMAL; return view; } /** * i915_gem_fault - fault a page into the GTT * @vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped * from userspace. The fault handler takes care of binding the object to * the GTT (if needed), allocating and programming a fence register (again, * only if needed based on whether the old reg is still valid or the object * is tiled) and inserting a new PTE into the faulting process. * * Note that the faulting process may involve evicting existing objects * from the GTT and/or fence registers to make room. So performance may * suffer if the GTT working set is large or there are few fence registers * left. * * The current feature set supported by i915_gem_fault() and thus GTT mmaps * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). */ vm_fault_t i915_gem_fault(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) struct vm_area_struct *area = vmf->vma; struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); struct drm_device *dev = obj->base.dev; struct drm_i915_private *i915 = to_i915(dev); struct intel_runtime_pm *rpm = &i915->runtime_pm; struct i915_ggtt *ggtt = &i915->ggtt; bool write = area->vm_flags & VM_WRITE; intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int srcu; int ret; /* Sanity check that we allow writing into this object */ if (i915_gem_object_is_readonly(obj) && write) return VM_FAULT_SIGBUS; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; trace_i915_gem_object_fault(obj, page_offset, true, write); ret = i915_gem_object_pin_pages(obj); if (ret) goto err; wakeref = intel_runtime_pm_get(rpm); ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); if (ret) goto err_rpm; ret = i915_mutex_lock_interruptible(dev); if (ret) goto err_reset; /* Access to snoopable pages through the GTT is incoherent. */ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { ret = -EFAULT; goto err_unlock; } /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; flags = PIN_MAPPABLE | PIN_NOSEARCH; if (view.type == I915_GGTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ /* * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); if (IS_ERR(vma)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } } if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unlock; } ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->iomap); if (ret) goto err_fence; assert_rpm_wakelock_held(rpm); /* Mark as being mmapped into userspace for later revocation */ mutex_lock(&i915->ggtt.vm.mutex); if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); mutex_unlock(&i915->ggtt.vm.mutex); if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); if (write) { GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); i915_vma_set_ggtt_write(vma); obj->mm.dirty = true; } err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: mutex_unlock(&dev->struct_mutex); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); err: switch (ret) { case -EIO: /* * We eat errors when the gpu is terminally wedged to avoid * userspace unduly crashing (gl has no provisions for mmaps to * fail). But any other -EIO isn't ours (e.g. swap in failure) * and so needs to be reported. */ if (!intel_gt_is_wedged(ggtt->vm.gt)) return VM_FAULT_SIGBUS; /* else, fall through */ case -EAGAIN: /* * EAGAIN means the gpu is hung and we'll wait for the error * handler to reset everything when re-faulting in * i915_mutex_lock_interruptible. */ case 0: case -ERESTARTSYS: case -EINTR: case -EBUSY: /* * EBUSY is ok: this just means that another thread * already did the job. */ return VM_FAULT_NOPAGE; case -ENOMEM: return VM_FAULT_OOM; case -ENOSPC: case -EFAULT: case -ENODEV: /* bad object, how did you get here! */ return VM_FAULT_SIGBUS; default: WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); return VM_FAULT_SIGBUS; } } void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) { struct i915_vma *vma; GEM_BUG_ON(!obj->userfault_count); obj->userfault_count = 0; list_del(&obj->userfault_link); drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); for_each_ggtt_vma(vma, obj) i915_vma_unset_userfault(vma); } /** * i915_gem_object_release_mmap - remove physical page mappings * @obj: obj in question * * Preserve the reservation of the mmapping with the DRM core code, but * relinquish ownership of the pages back to the system. * * It is vital that we remove the page mapping if we have mapped a tiled * object through the GTT and then lose the fence register due to * resource pressure. Similarly if the object has been moved out of the * aperture, than pages mapped into userspace must be revoked. Removing the * mapping will then trigger a page fault on the next user access, allowing * fixup by i915_gem_fault(). */ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); intel_wakeref_t wakeref; /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. * * Note that RPM complicates somewhat by adding an additional * requirement that operations to the GGTT be made holding the RPM * wakeref. */ wakeref = intel_runtime_pm_get(&i915->runtime_pm); mutex_lock(&i915->ggtt.vm.mutex); if (!obj->userfault_count) goto out; __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB * flushing implied above by changing the PTE above *should* be * sufficient, an extra barrier here just provides us with a bit * of paranoid documentation about our requirement to serialise * memory writes before touching registers / GSM. */ wmb(); out: mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static int create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; err = drm_gem_create_mmap_offset(&obj->base); if (likely(!err)) return 0; /* Attempt to reap some mmap space from dead objects */ do { err = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (err) break; i915_gem_drain_freed_objects(i915); err = drm_gem_create_mmap_offset(&obj->base); if (!err) break; } while (flush_delayed_work(&i915->gem.retire_work)); return err; } int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset) { struct drm_i915_gem_object *obj; int ret; obj = i915_gem_object_lookup(file, handle); if (!obj) return -ENOENT; if (i915_gem_object_never_bind_ggtt(obj)) { ret = -ENODEV; goto out; } ret = create_mmap_offset(obj); if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); out: i915_gem_object_put(obj); return ret; } /** * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device * @data: GTT mapping ioctl data * @file: GEM object info * * Simply returns the fake offset to userspace so it can mmap it. * The mmap call will end up in drm_gem_mmap(), which will set things * up so we can get faults in the handler above. * * The fault handler will take care of binding the object into the GTT * (since it may have been evicted to make room for something), allocating * a fence register, and mapping the appropriate aperture address into * userspace. */ int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap_gtt *args = data; return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_mman.c" #endif