summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_gtt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c1110
1 files changed, 262 insertions, 848 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5101ab6869b4..25295eb626dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1,882 +1,296 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright © 2010 Daniel Vetter
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2020 Intel Corporation
*/
-#include <drm/drmP.h>
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-
-/* PPGTT stuff */
-#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
-
-#define GEN6_PDE_VALID (1 << 0)
-/* gen6+ has bit 11-4 for physical addr bit 39-32 */
-#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#include <linux/slab.h> /* fault-inject.h is not standalone! */
-#define GEN6_PTE_VALID (1 << 0)
-#define GEN6_PTE_UNCACHED (1 << 1)
-#define HSW_PTE_UNCACHED (0)
-#define GEN6_PTE_CACHE_LLC (2 << 1)
-#define GEN6_PTE_CACHE_LLC_MLC (3 << 1)
-#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#include <linux/fault-inject.h>
+#include <linux/log2.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/stop_machine.h>
-static gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev,
- dma_addr_t addr,
- enum i915_cache_level level)
-{
- gen6_gtt_pte_t pte = GEN6_PTE_VALID;
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
- switch (level) {
- case I915_CACHE_LLC_MLC:
- pte |= GEN6_PTE_CACHE_LLC_MLC;
- break;
- case I915_CACHE_LLC:
- pte |= GEN6_PTE_CACHE_LLC;
- break;
- case I915_CACHE_NONE:
- pte |= GEN6_PTE_UNCACHED;
- break;
- default:
- BUG();
- }
-
- return pte;
-}
+#include <asm/set_memory.h>
+#include <asm/smp.h>
-#define BYT_PTE_WRITEABLE (1 << 1)
-#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
-
-static gen6_gtt_pte_t byt_pte_encode(struct drm_device *dev,
- dma_addr_t addr,
- enum i915_cache_level level)
-{
- gen6_gtt_pte_t pte = GEN6_PTE_VALID;
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
- /* Mark the page as writeable. Other platforms don't have a
- * setting for read-only/writable, so this matches that behavior.
- */
- pte |= BYT_PTE_WRITEABLE;
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
- if (level != I915_CACHE_NONE)
- pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
- return pte;
-}
-
-static gen6_gtt_pte_t hsw_pte_encode(struct drm_device *dev,
- dma_addr_t addr,
- enum i915_cache_level level)
-{
- gen6_gtt_pte_t pte = GEN6_PTE_VALID;
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
- if (level != I915_CACHE_NONE)
- pte |= GEN6_PTE_CACHE_LLC;
+#include "i915_drv.h"
+#include "i915_gem_evict.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+
+int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ do {
+ if (dma_map_sg_attrs(obj->base.dev->dev,
+ pages->sgl, pages->nents,
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_KERNEL_MAPPING |
+ DMA_ATTR_NO_WARN))
+ return 0;
+
+ /*
+ * If the DMA remap fails, one cause can be that we have
+ * too many objects pinned in a small remapping table,
+ * such as swiotlb. Incrementally purge all other objects and
+ * try again - if there are no more pages to remove from
+ * the DMA remapper, i915_gem_shrink will return 0.
+ */
+ GEM_BUG_ON(obj->mm.pages == pages);
+ } while (i915_gem_shrink(NULL, to_i915(obj->base.dev),
+ obj->base.size >> PAGE_SHIFT, NULL,
+ I915_SHRINK_BOUND |
+ I915_SHRINK_UNBOUND));
+
+ return -ENOSPC;
+}
+
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+ /* XXX This does not prevent more requests being submitted! */
+ if (unlikely(ggtt->do_idle_maps))
+ /* Wait a bit, in the hope it avoids the hang */
+ usleep_range(100, 250);
+
+ dma_unmap_sg(i915->drm.dev, pages->sgl, pages->nents,
+ DMA_BIDIRECTIONAL);
+}
+
+/**
+ * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
+ * @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
+ * @node: the &struct drm_mm_node (typically i915_vma.node)
+ * @size: how much space to allocate inside the GTT,
+ * must be #I915_GTT_PAGE_SIZE aligned
+ * @offset: where to insert inside the GTT,
+ * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
+ * (@offset + @size) must fit within the address space
+ * @color: color to apply to node, if this node is not from a VMA,
+ * color must be #I915_COLOR_UNEVICTABLE
+ * @flags: control search and eviction behaviour
+ *
+ * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
+ * the address space (using @size and @color). If the @node does not fit, it
+ * tries to evict any overlapping nodes from the GTT, including any
+ * neighbouring nodes if the colors do not match (to ensure guard pages between
+ * differing domains). See i915_gem_evict_for_node() for the gory details
+ * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
+ * evicting active overlapping objects, and any overlapping node that is pinned
+ * or marked as unevictable will also result in failure.
+ *
+ * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
+ * asked to wait for eviction and interrupted.
+ */
+int i915_gem_gtt_reserve(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
+ struct drm_mm_node *node,
+ u64 size, u64 offset, unsigned long color,
+ unsigned int flags)
+{
+ int err;
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
+ GEM_BUG_ON(range_overflows(offset, size, vm->total));
+ GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
+ GEM_BUG_ON(drm_mm_node_allocated(node));
+
+ node->size = size;
+ node->start = offset;
+ node->color = color;
+
+ err = drm_mm_reserve_node(&vm->mm, node);
+ if (err != -ENOSPC)
+ return err;
+
+ if (flags & PIN_NOEVICT)
+ return -ENOSPC;
- return pte;
-}
+ err = i915_gem_evict_for_node(vm, ww, node, flags);
+ if (err == 0)
+ err = drm_mm_reserve_node(&vm->mm, node);
-static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
-{
- struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
- gen6_gtt_pte_t __iomem *pd_addr;
- uint32_t pd_entry;
- int i;
-
- WARN_ON(ppgtt->pd_offset & 0x3f);
- pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
- ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- dma_addr_t pt_addr;
-
- pt_addr = ppgtt->pt_dma_addr[i];
- pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
- pd_entry |= GEN6_PDE_VALID;
-
- writel(pd_entry, pd_addr + i);
- }
- readl(pd_addr);
+ return err;
}
-static int gen6_ppgtt_enable(struct drm_device *dev)
+static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
- uint32_t pd_offset;
- struct intel_ring_buffer *ring;
- struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
- int i;
-
- BUG_ON(ppgtt->pd_offset & 0x3f);
+ u64 range, addr;
- gen6_write_pdes(ppgtt);
+ GEM_BUG_ON(range_overflows(start, len, end));
+ GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
- pd_offset = ppgtt->pd_offset;
- pd_offset /= 64; /* in cachelines, */
- pd_offset <<= 16;
-
- if (INTEL_INFO(dev)->gen == 6) {
- uint32_t ecochk, gab_ctl, ecobits;
-
- ecobits = I915_READ(GAC_ECO_BITS);
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
- ECOBITS_PPGTT_CACHE64B);
-
- gab_ctl = I915_READ(GAB_CTL);
- I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
-
- ecochk = I915_READ(GAM_ECOCHK);
- I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
- ECOCHK_PPGTT_CACHE64B);
- I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
- } else if (INTEL_INFO(dev)->gen >= 7) {
- uint32_t ecochk, ecobits;
-
- ecobits = I915_READ(GAC_ECO_BITS);
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
-
- ecochk = I915_READ(GAM_ECOCHK);
- if (IS_HASWELL(dev)) {
- ecochk |= ECOCHK_PPGTT_WB_HSW;
+ range = round_down(end - len, align) - round_up(start, align);
+ if (range) {
+ if (sizeof(unsigned long) == sizeof(u64)) {
+ addr = get_random_u64();
} else {
- ecochk |= ECOCHK_PPGTT_LLC_IVB;
- ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
- }
- I915_WRITE(GAM_ECOCHK, ecochk);
- /* GFX_MODE is per-ring on gen7+ */
- }
-
- for_each_ring(ring, dev_priv, i) {
- if (INTEL_INFO(dev)->gen >= 7)
- I915_WRITE(RING_MODE_GEN7(ring),
- _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
- I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
- I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
- }
- return 0;
-}
-
-/* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
- unsigned first_entry,
- unsigned num_entries)
-{
- gen6_gtt_pte_t *pt_vaddr, scratch_pte;
- unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
- unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
- unsigned last_pte, i;
-
- scratch_pte = ppgtt->pte_encode(ppgtt->dev,
- ppgtt->scratch_page_dma_addr,
- I915_CACHE_LLC);
-
- while (num_entries) {
- last_pte = first_pte + num_entries;
- if (last_pte > I915_PPGTT_PT_ENTRIES)
- last_pte = I915_PPGTT_PT_ENTRIES;
-
- pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
-
- for (i = first_pte; i < last_pte; i++)
- pt_vaddr[i] = scratch_pte;
-
- kunmap_atomic(pt_vaddr);
-
- num_entries -= last_pte - first_pte;
- first_pte = 0;
- act_pt++;
- }
-}
-
-static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
- struct sg_table *pages,
- unsigned first_entry,
- enum i915_cache_level cache_level)
-{
- gen6_gtt_pte_t *pt_vaddr;
- unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
- unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
- struct sg_page_iter sg_iter;
-
- pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
- for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
- dma_addr_t page_addr;
-
- page_addr = sg_page_iter_dma_address(&sg_iter);
- pt_vaddr[act_pte] = ppgtt->pte_encode(ppgtt->dev, page_addr,
- cache_level);
- if (++act_pte == I915_PPGTT_PT_ENTRIES) {
- kunmap_atomic(pt_vaddr);
- act_pt++;
- pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
- act_pte = 0;
-
+ addr = get_random_u32();
+ if (range > U32_MAX) {
+ addr <<= 32;
+ addr |= get_random_u32();
+ }
}
- }
- kunmap_atomic(pt_vaddr);
-}
-
-static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
-{
- int i;
-
- if (ppgtt->pt_dma_addr) {
- for (i = 0; i < ppgtt->num_pd_entries; i++)
- pci_unmap_page(ppgtt->dev->pdev,
- ppgtt->pt_dma_addr[i],
- 4096, PCI_DMA_BIDIRECTIONAL);
+ div64_u64_rem(addr, range, &addr);
+ start += addr;
}
- kfree(ppgtt->pt_dma_addr);
- for (i = 0; i < ppgtt->num_pd_entries; i++)
- __free_page(ppgtt->pt_pages[i]);
- kfree(ppgtt->pt_pages);
- kfree(ppgtt);
-}
-
-static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
-{
- struct drm_device *dev = ppgtt->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
- unsigned first_pd_entry_in_global_pt;
- int i;
- int ret = -ENOMEM;
-
- /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
- * entries. For aliasing ppgtt support we just steal them at the end for
- * now. */
- first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
-
- if (IS_HASWELL(dev)) {
- ppgtt->pte_encode = hsw_pte_encode;
- } else if (IS_VALLEYVIEW(dev)) {
- ppgtt->pte_encode = byt_pte_encode;
- } else {
- ppgtt->pte_encode = gen6_pte_encode;
- }
- ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
- ppgtt->enable = gen6_ppgtt_enable;
- ppgtt->clear_range = gen6_ppgtt_clear_range;
- ppgtt->insert_entries = gen6_ppgtt_insert_entries;
- ppgtt->cleanup = gen6_ppgtt_cleanup;
- ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
- GFP_KERNEL);
- if (!ppgtt->pt_pages)
- return -ENOMEM;
-
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
- if (!ppgtt->pt_pages[i])
- goto err_pt_alloc;
- }
-
- ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t) *ppgtt->num_pd_entries,
- GFP_KERNEL);
- if (!ppgtt->pt_dma_addr)
- goto err_pt_alloc;
-
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- dma_addr_t pt_addr;
-
- pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
- PCI_DMA_BIDIRECTIONAL);
-
- if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
- ret = -EIO;
- goto err_pd_pin;
-
- }
- ppgtt->pt_dma_addr[i] = pt_addr;
- }
-
- ppgtt->clear_range(ppgtt, 0,
- ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
-
- ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
-
- return 0;
-
-err_pd_pin:
- if (ppgtt->pt_dma_addr) {
- for (i--; i >= 0; i--)
- pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
- 4096, PCI_DMA_BIDIRECTIONAL);
- }
-err_pt_alloc:
- kfree(ppgtt->pt_dma_addr);
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- if (ppgtt->pt_pages[i])
- __free_page(ppgtt->pt_pages[i]);
- }
- kfree(ppgtt->pt_pages);
-
- return ret;
-}
-
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct i915_hw_ppgtt *ppgtt;
- int ret;
-
- ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
- if (!ppgtt)
- return -ENOMEM;
-
- ppgtt->dev = dev;
- ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
-
- if (INTEL_INFO(dev)->gen < 8)
- ret = gen6_ppgtt_init(ppgtt);
- else
- BUG();
-
- if (ret)
- kfree(ppgtt);
- else
- dev_priv->mm.aliasing_ppgtt = ppgtt;
-
- return ret;
-}
-
-void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-
- if (!ppgtt)
- return;
-
- ppgtt->cleanup(ppgtt);
- dev_priv->mm.aliasing_ppgtt = NULL;
-}
-
-void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
- struct drm_i915_gem_object *obj,
- enum i915_cache_level cache_level)
-{
- ppgtt->insert_entries(ppgtt, obj->pages,
- obj->gtt_space->start >> PAGE_SHIFT,
- cache_level);
-}
-
-void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
- struct drm_i915_gem_object *obj)
-{
- ppgtt->clear_range(ppgtt,
- obj->gtt_space->start >> PAGE_SHIFT,
- obj->base.size >> PAGE_SHIFT);
-}
-
-extern int intel_iommu_gfx_mapped;
-/* Certain Gen5 chipsets require require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
+ return round_up(start, align);
+}
+
+/**
+ * i915_gem_gtt_insert - insert a node into an address_space (GTT)
+ * @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
+ * @node: the &struct drm_mm_node (typically i915_vma.node)
+ * @size: how much space to allocate inside the GTT,
+ * must be #I915_GTT_PAGE_SIZE aligned
+ * @alignment: required alignment of starting offset, may be 0 but
+ * if specified, this must be a power-of-two and at least
+ * #I915_GTT_MIN_ALIGNMENT
+ * @color: color to apply to node
+ * @start: start of any range restriction inside GTT (0 for all),
+ * must be #I915_GTT_PAGE_SIZE aligned
+ * @end: end of any range restriction inside GTT (U64_MAX for all),
+ * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
+ * @flags: control search and eviction behaviour
+ *
+ * i915_gem_gtt_insert() first searches for an available hole into which
+ * is can insert the node. The hole address is aligned to @alignment and
+ * its @size must then fit entirely within the [@start, @end] bounds. The
+ * nodes on either side of the hole must match @color, or else a guard page
+ * will be inserted between the two nodes (or the node evicted). If no
+ * suitable hole is found, first a victim is randomly selected and tested
+ * for eviction, otherwise then the LRU list of objects within the GTT
+ * is scanned to find the first set of replacement nodes to create the hole.
+ * Those old overlapping nodes are evicted from the GTT (and so must be
+ * rebound before any future use). Any node that is currently pinned cannot
+ * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
+ * active and #PIN_NONBLOCK is specified, that node is also skipped when
+ * searching for an eviction candidate. See i915_gem_evict_something() for
+ * the gory details on the eviction algorithm.
+ *
+ * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
+ * asked to wait for eviction and interrupted.
*/
-static inline bool needs_idle_maps(struct drm_device *dev)
-{
-#ifdef CONFIG_INTEL_IOMMU
- /* Query intel_iommu to see if we need the workaround. Presumably that
- * was loaded first.
- */
- if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
- return true;
-#endif
- return false;
-}
-
-static bool do_idling(struct drm_i915_private *dev_priv)
-{
- bool ret = dev_priv->mm.interruptible;
-
- if (unlikely(dev_priv->gtt.do_idle_maps)) {
- dev_priv->mm.interruptible = false;
- if (i915_gpu_idle(dev_priv->dev)) {
- DRM_ERROR("Couldn't idle GPU\n");
- /* Wait a bit, in hopes it avoids the hang */
- udelay(10);
- }
- }
-
- return ret;
-}
-
-static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
-{
- if (unlikely(dev_priv->gtt.do_idle_maps))
- dev_priv->mm.interruptible = interruptible;
-}
-
-void i915_gem_restore_gtt_mappings(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct drm_i915_gem_object *obj;
-
- /* First fill our portion of the GTT with scratch pages */
- dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
- dev_priv->gtt.total / PAGE_SIZE);
-
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- i915_gem_clflush_object(obj);
- i915_gem_gtt_bind_object(obj, obj->cache_level);
- }
-
- i915_gem_chipset_flush(dev);
-}
-
-int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
-{
- if (obj->has_dma_mapping)
- return 0;
-
- if (!dma_map_sg(&obj->base.dev->pdev->dev,
- obj->pages->sgl, obj->pages->nents,
- PCI_DMA_BIDIRECTIONAL))
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
+ struct drm_mm_node *node,
+ u64 size, u64 alignment, unsigned long color,
+ u64 start, u64 end, unsigned int flags)
+{
+ enum drm_mm_insert_mode mode;
+ u64 offset;
+ int err;
+
+ lockdep_assert_held(&vm->mutex);
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(alignment && !is_power_of_2(alignment));
+ GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
+ GEM_BUG_ON(start >= end);
+ GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
+ GEM_BUG_ON(drm_mm_node_allocated(node));
+
+ if (unlikely(range_overflows(start, size, end)))
return -ENOSPC;
- return 0;
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level. The object
- * will be accessible to the GPU via commands whose operands reference offsets
- * within the global GTT as well as accessible by the GPU through the GMADR
- * mapped BAR (dev_priv->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct drm_device *dev,
- struct sg_table *st,
- unsigned int first_entry,
- enum i915_cache_level level)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- gen6_gtt_pte_t __iomem *gtt_entries =
- (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
- int i = 0;
- struct sg_page_iter sg_iter;
- dma_addr_t addr;
-
- for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
- addr = sg_page_iter_dma_address(&sg_iter);
- iowrite32(dev_priv->gtt.pte_encode(dev, addr, level),
- &gtt_entries[i]);
- i++;
- }
-
- /* XXX: This serves as a posting read to make sure that the PTE has
- * actually been updated. There is some concern that even though
- * registers and PTEs are within the same BAR that they are potentially
- * of NUMA access patterns. Therefore, even with the way we assume
- * hardware should work, we must keep this posting read for paranoia.
- */
- if (i != 0)
- WARN_ON(readl(&gtt_entries[i-1])
- != dev_priv->gtt.pte_encode(dev, addr, level));
+ if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
+ return -ENOSPC;
- /* This next bit makes the above posting read even more important. We
- * want to flush the TLBs only after we're certain all the PTE updates
- * have finished.
+ mode = DRM_MM_INSERT_BEST;
+ if (flags & PIN_HIGH)
+ mode = DRM_MM_INSERT_HIGHEST;
+ if (flags & PIN_MAPPABLE)
+ mode = DRM_MM_INSERT_LOW;
+
+ /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+ * so we know that we always have a minimum alignment of 4096.
+ * The drm_mm range manager is optimised to return results
+ * with zero alignment, so where possible use the optimal
+ * path.
*/
- I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
- POSTING_READ(GFX_FLSH_CNTL_GEN6);
-}
-
-static void gen6_ggtt_clear_range(struct drm_device *dev,
- unsigned int first_entry,
- unsigned int num_entries)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
- (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
- const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
- int i;
-
- if (WARN(num_entries > max_entries,
- "First entry = %d; Num entries = %d (max=%d)\n",
- first_entry, num_entries, max_entries))
- num_entries = max_entries;
-
- scratch_pte = dev_priv->gtt.pte_encode(dev,
- dev_priv->gtt.scratch_page_dma,
- I915_CACHE_LLC);
- for (i = 0; i < num_entries; i++)
- iowrite32(scratch_pte, &gtt_base[i]);
- readl(gtt_base);
-}
-
-
-static void i915_ggtt_insert_entries(struct drm_device *dev,
- struct sg_table *st,
- unsigned int pg_start,
- enum i915_cache_level cache_level)
-{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
- AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
- intel_gtt_insert_sg_entries(st, pg_start, flags);
-
-}
-
-static void i915_ggtt_clear_range(struct drm_device *dev,
- unsigned int first_entry,
- unsigned int num_entries)
-{
- intel_gtt_clear_range(first_entry, num_entries);
-}
-
-
-void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
- enum i915_cache_level cache_level)
-{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
-
- dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
- obj->gtt_space->start >> PAGE_SHIFT,
- cache_level);
-
- obj->has_global_gtt_mapping = 1;
-}
-
-void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
-{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
-
- dev_priv->gtt.gtt_clear_range(obj->base.dev,
- obj->gtt_space->start >> PAGE_SHIFT,
- obj->base.size >> PAGE_SHIFT);
-
- obj->has_global_gtt_mapping = 0;
-}
-
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
-{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
- bool interruptible;
-
- interruptible = do_idling(dev_priv);
-
- if (!obj->has_dma_mapping)
- dma_unmap_sg(&dev->pdev->dev,
- obj->pages->sgl, obj->pages->nents,
- PCI_DMA_BIDIRECTIONAL);
+ BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
+ if (alignment <= I915_GTT_MIN_ALIGNMENT)
+ alignment = 0;
+
+ err = drm_mm_insert_node_in_range(&vm->mm, node,
+ size, alignment, color,
+ start, end, mode);
+ if (err != -ENOSPC)
+ return err;
+
+ if (mode & DRM_MM_INSERT_ONCE) {
+ err = drm_mm_insert_node_in_range(&vm->mm, node,
+ size, alignment, color,
+ start, end,
+ DRM_MM_INSERT_BEST);
+ if (err != -ENOSPC)
+ return err;
+ }
- undo_idling(dev_priv, interruptible);
-}
+ if (flags & PIN_NOEVICT)
+ return -ENOSPC;
-static void i915_gtt_color_adjust(struct drm_mm_node *node,
- unsigned long color,
- unsigned long *start,
- unsigned long *end)
-{
- if (node->color != color)
- *start += 4096;
-
- if (!list_empty(&node->node_list)) {
- node = list_entry(node->node_list.next,
- struct drm_mm_node,
- node_list);
- if (node->allocated && node->color != color)
- *end -= 4096;
- }
-}
-void i915_gem_setup_global_gtt(struct drm_device *dev,
- unsigned long start,
- unsigned long mappable_end,
- unsigned long end)
-{
- /* Let GEM Manage all of the aperture.
+ /*
+ * No free space, pick a slot at random.
*
- * However, leave one page at the end still bound to the scratch page.
- * There are a number of places where the hardware apparently prefetches
- * past the end of the object, and we've seen multiple hangs with the
- * GPU head pointer stuck in a batchbuffer bound at the last page of the
- * aperture. One page should be enough to keep any prefetching inside
- * of the aperture.
+ * There is a pathological case here using a GTT shared between
+ * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
+ *
+ * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
+ * (64k objects) (448k objects)
+ *
+ * Now imagine that the eviction LRU is ordered top-down (just because
+ * pathology meets real life), and that we need to evict an object to
+ * make room inside the aperture. The eviction scan then has to walk
+ * the 448k list before it finds one within range. And now imagine that
+ * it has to search for a new hole between every byte inside the memcpy,
+ * for several simultaneous clients.
+ *
+ * On a full-ppgtt system, if we have run out of available space, there
+ * will be lots and lots of objects in the eviction list! Again,
+ * searching that LRU list may be slow if we are also applying any
+ * range restrictions (e.g. restriction to low 4GiB) and so, for
+ * simplicity and similarilty between different GTT, try the single
+ * random replacement first.
*/
- drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_mm_node *entry;
- struct drm_i915_gem_object *obj;
- unsigned long hole_start, hole_end;
-
- BUG_ON(mappable_end > end);
-
- /* Subtract the guard page ... */
- drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
- if (!HAS_LLC(dev))
- dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
-
- /* Mark any preallocated objects as occupied */
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
- obj->gtt_offset, obj->base.size);
-
- BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
- obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
- obj->gtt_offset,
- obj->base.size,
- false);
- obj->has_global_gtt_mapping = 1;
- }
-
- dev_priv->gtt.start = start;
- dev_priv->gtt.total = end - start;
-
- /* Clear any non-preallocated blocks */
- drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space,
- hole_start, hole_end) {
- DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
- hole_start, hole_end);
- dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
- (hole_end-hole_start) / PAGE_SIZE);
- }
-
- /* And finally clear the reserved guard page */
- dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
-}
-
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
-{
- if (i915_enable_ppgtt >= 0)
- return i915_enable_ppgtt;
+ offset = random_offset(start, end,
+ size, alignment ?: I915_GTT_MIN_ALIGNMENT);
+ err = i915_gem_gtt_reserve(vm, ww, node, size, offset, color, flags);
+ if (err != -ENOSPC)
+ return err;
-#ifdef CONFIG_INTEL_IOMMU
- /* Disable ppgtt on SNB if VT-d is on. */
- if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
- return false;
-#endif
-
- return true;
-}
-
-void i915_gem_init_global_gtt(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- unsigned long gtt_size, mappable_size;
-
- gtt_size = dev_priv->gtt.total;
- mappable_size = dev_priv->gtt.mappable_end;
-
- if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
- int ret;
-
- if (INTEL_INFO(dev)->gen <= 7) {
- /* PPGTT pdes are stolen from global gtt ptes, so shrink the
- * aperture accordingly when using aliasing ppgtt. */
- gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
- }
-
- i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
+ if (flags & PIN_NOSEARCH)
+ return -ENOSPC;
- ret = i915_gem_init_aliasing_ppgtt(dev);
- if (!ret)
- return;
+ /* Randomly selected placement is pinned, do a search */
+ err = i915_gem_evict_something(vm, ww, size, alignment, color,
+ start, end, flags);
+ if (err)
+ return err;
- DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
- drm_mm_takedown(&dev_priv->mm.gtt_space);
- gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
- }
- i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
+ return drm_mm_insert_node_in_range(&vm->mm, node,
+ size, alignment, color,
+ start, end, DRM_MM_INSERT_EVICT);
}
-static int setup_scratch_page(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct page *page;
- dma_addr_t dma_addr;
-
- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
- if (page == NULL)
- return -ENOMEM;
- get_page(page);
- set_pages_uc(page, 1);
-
-#ifdef CONFIG_INTEL_IOMMU
- dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->pdev, dma_addr))
- return -EINVAL;
-#else
- dma_addr = page_to_phys(page);
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_gtt.c"
#endif
- dev_priv->gtt.scratch_page = page;
- dev_priv->gtt.scratch_page_dma = dma_addr;
-
- return 0;
-}
-
-static void teardown_scratch_page(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- set_pages_wb(dev_priv->gtt.scratch_page, 1);
- pci_unmap_page(dev->pdev, dev_priv->gtt.scratch_page_dma,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- put_page(dev_priv->gtt.scratch_page);
- __free_page(dev_priv->gtt.scratch_page);
-}
-
-static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
- snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
- snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
- return snb_gmch_ctl << 20;
-}
-
-static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
-{
- snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
- snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
- return snb_gmch_ctl << 25; /* 32 MB units */
-}
-
-static int gen6_gmch_probe(struct drm_device *dev,
- size_t *gtt_total,
- size_t *stolen,
- phys_addr_t *mappable_base,
- unsigned long *mappable_end)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- phys_addr_t gtt_bus_addr;
- unsigned int gtt_size;
- u16 snb_gmch_ctl;
- int ret;
-
- *mappable_base = pci_resource_start(dev->pdev, 2);
- *mappable_end = pci_resource_len(dev->pdev, 2);
-
- /* 64/512MB is the current min/max we actually know of, but this is just
- * a coarse sanity check.
- */
- if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
- DRM_ERROR("Unknown GMADR size (%lx)\n",
- dev_priv->gtt.mappable_end);
- return -ENXIO;
- }
-
- if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
- pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
- pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
- gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
-
- *stolen = gen6_get_stolen_size(snb_gmch_ctl);
- *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
-
- /* For Modern GENs the PTEs and register space are split in the BAR */
- gtt_bus_addr = pci_resource_start(dev->pdev, 0) +
- (pci_resource_len(dev->pdev, 0) / 2);
-
- dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size);
- if (!dev_priv->gtt.gsm) {
- DRM_ERROR("Failed to map the gtt page table\n");
- return -ENOMEM;
- }
-
- ret = setup_scratch_page(dev);
- if (ret)
- DRM_ERROR("Scratch setup failed\n");
-
- dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
- dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
-
- return ret;
-}
-
-static void gen6_gmch_remove(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- iounmap(dev_priv->gtt.gsm);
- teardown_scratch_page(dev_priv->dev);
-}
-
-static int i915_gmch_probe(struct drm_device *dev,
- size_t *gtt_total,
- size_t *stolen,
- phys_addr_t *mappable_base,
- unsigned long *mappable_end)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- int ret;
-
- ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
- if (!ret) {
- DRM_ERROR("failed to set up gmch\n");
- return -EIO;
- }
-
- intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
-
- dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
- dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
- dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
-
- return 0;
-}
-
-static void i915_gmch_remove(struct drm_device *dev)
-{
- intel_gmch_remove();
-}
-
-int i915_gem_gtt_init(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct i915_gtt *gtt = &dev_priv->gtt;
- int ret;
-
- if (INTEL_INFO(dev)->gen <= 5) {
- dev_priv->gtt.gtt_probe = i915_gmch_probe;
- dev_priv->gtt.gtt_remove = i915_gmch_remove;
- } else {
- dev_priv->gtt.gtt_probe = gen6_gmch_probe;
- dev_priv->gtt.gtt_remove = gen6_gmch_remove;
- if (IS_HASWELL(dev)) {
- dev_priv->gtt.pte_encode = hsw_pte_encode;
- } else if (IS_VALLEYVIEW(dev)) {
- dev_priv->gtt.pte_encode = byt_pte_encode;
- } else {
- dev_priv->gtt.pte_encode = gen6_pte_encode;
- }
- }
-
- ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
- &dev_priv->gtt.stolen_size,
- &gtt->mappable_base,
- &gtt->mappable_end);
- if (ret)
- return ret;
-
- /* GMADR is the PCI mmio aperture into the global GTT. */
- DRM_INFO("Memory usable by graphics device = %zdM\n",
- dev_priv->gtt.total >> 20);
- DRM_DEBUG_DRIVER("GMADR size = %ldM\n",
- dev_priv->gtt.mappable_end >> 20);
- DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n",
- dev_priv->gtt.stolen_size >> 20);
-
- return 0;
-}