From a39a68054f63da0ea3b4806e1bfad79670a93d9f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 15 Nov 2012 15:40:05 +0100 Subject: drm/i915: simplify shmem pwrite/pread slowpath handling The shmem paths for pwrite/pread used a clever trick to hold onto a single page when dropping the big dev->struct_mutex for the slowpath. But this ran the risk of reinstating (or not completely purging) the backing storage when dropping purgeable objects. Hence the code needed to keep track of whether it ever dropped the lock, and if it did, manually check whether it needs to re-purge the backing storage. But thanks to the pages pin count introduced in commit a5570178c059cec59e9835be20bc8546377fa7b5 Author: Chris Wilson Date: Tue Sep 4 21:02:54 2012 +0100 drm/i915: Pin backing pages whilst exporting through a dmabuf vmap which allowed us to pin the backing storage and remove that page reference trick from shmem_pwrite/read in commit f60d7f0c1d55a935475ab394955cafddefaa6533 Author: Chris Wilson Date: Tue Sep 4 21:02:56 2012 +0100 drm/i915: Pin backing pages for pread and commit 755d22184f1e5015b040acee794542d9cf8a16c5 Author: Chris Wilson Date: Tue Sep 4 21:02:55 2012 +0100 drm/i915: Pin backing pages for pwrite we can now abolish this check. The slowpath cleanup completely disappears from pread, and for pwrite we're only left with the domain fixup in case someone moved the object out of the cpu domain from under us. A follow-on patch will optimize that a notch more. Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ef8b5714337..36f629a79d88 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -407,7 +407,6 @@ i915_gem_shmem_pread(struct drm_device *dev, loff_t offset; int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; - int hit_slowpath = 0; int prefaulted = 0; int needs_clflush = 0; struct scatterlist *sg; @@ -469,7 +468,6 @@ i915_gem_shmem_pread(struct drm_device *dev, if (ret == 0) goto next_page; - hit_slowpath = 1; mutex_unlock(&dev->struct_mutex); if (!prefaulted) { @@ -502,12 +500,6 @@ next_page: out: i915_gem_object_unpin_pages(obj); - if (hit_slowpath) { - /* Fixup: Kill any reinstated backing storage pages */ - if (obj->madv == __I915_MADV_PURGED) - i915_gem_object_truncate(obj); - } - return ret; } @@ -838,11 +830,8 @@ out: i915_gem_object_unpin_pages(obj); if (hit_slowpath) { - /* Fixup: Kill any reinstated backing storage pages */ - if (obj->madv == __I915_MADV_PURGED) - i915_gem_object_truncate(obj); - /* and flush dirty cachelines in case the object isn't in the cpu write - * domain anymore. */ + /* Fixup: Flush dirty cachelines in case the object isn't in the + * cpu write domain anymore. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); i915_gem_chipset_flush(dev); -- cgit From 8dcf015eb967c718962c0690330d9a94d56f2c5d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 15 Nov 2012 16:53:58 +0100 Subject: drm/i915: optimize the shmem_pwrite slowpath handling Since we drop dev->struct_mutex when going through the slowpath, the object might have been moved out of the cpu domain. Hence we need to clflush the entire object to ensure that after the ioctl returns, everything is coherent again (interwoven writes are ill-defined anyway). But we only need to do this if we start in the cpu domain and the object requires flushing for coherency. So don't do the flushing if the object is coherent anyway or if we've done in-line clfushing already. v2: i915_gem_clflush_object already checks whether the object is coherent and if so, drops the flushing. Hence we don't need to check that ourselves, simplifying the condition. v3: Reorder the checks for better clarity (and adjust the comment accordingly), suggested by Chris Wilson. Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 36f629a79d88..5c8df572cd17 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -830,9 +830,13 @@ out: i915_gem_object_unpin_pages(obj); if (hit_slowpath) { - /* Fixup: Flush dirty cachelines in case the object isn't in the - * cpu write domain anymore. */ - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + /* + * Fixup: Flush cpu caches in case we didn't flush the dirty + * cachelines in-line while writing and the object moved + * out of the cpu write domain while we've dropped the lock. + */ + if (!needs_clflush_after && + obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); i915_gem_chipset_flush(dev); } -- cgit From 0104fdbb84d7adb0e377ed05bf75eba97b007544 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Nov 2012 11:32:26 +0000 Subject: drm/i915: Introduce i915_gem_object_create_stolen() Allow for the creation of GEM objects backed by stolen memory. As these are not backed by ordinary pages, we create a fake dma mapping and store the address in the scatterlist rather than obj->pages. v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse Barnes. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5c8df572cd17..3de62b0127a5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3772,6 +3772,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) obj->pages_pin_count = 0; i915_gem_object_put_pages(obj); i915_gem_object_free_mmap_offset(obj); + i915_gem_object_release_stolen(obj); BUG_ON(obj->pages); -- cgit From 42dcedd4f2e715dc0313e359c8288e6397843fff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Nov 2012 11:32:30 +0000 Subject: drm/i915: Use a slab for object allocation The primary purpose of this was to debug some use-after-free memory corruption that was causing an OOPS inside drm/i915. As it turned out the corruption was being caused elsewhere and i915.ko as a major user of many objects was being hit hardest. Indeed as we do frequent the generic kmalloc caches, dedicating one to ourselves (or at least naming one for us depending upon the core) aids debugging our own slab usage. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3de62b0127a5..dfe7174a7c03 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -192,6 +192,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } +void *i915_gem_object_alloc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); +} + +void i915_gem_object_free(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + kmem_cache_free(dev_priv->slab, obj); +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, @@ -215,7 +227,7 @@ i915_gem_create(struct drm_file *file, if (ret) { drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev->dev_private, obj->base.size); - kfree(obj); + i915_gem_object_free(obj); return ret; } @@ -3695,12 +3707,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct address_space *mapping; u32 mask; - obj = kzalloc(sizeof(*obj), GFP_KERNEL); + obj = i915_gem_object_alloc(dev); if (obj == NULL) return NULL; if (drm_gem_object_init(dev, &obj->base, size) != 0) { - kfree(obj); + i915_gem_object_free(obj); return NULL; } @@ -3783,7 +3795,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_info_remove_obj(dev_priv, obj->base.size); kfree(obj->bit_17); - kfree(obj); + i915_gem_object_free(obj); } int @@ -4101,8 +4113,14 @@ init_ring_lists(struct intel_ring_buffer *ring) void i915_gem_load(struct drm_device *dev) { - int i; drm_i915_private_t *dev_priv = dev->dev_private; + int i; + + dev_priv->slab = + kmem_cache_create("i915_gem_object", + sizeof(struct drm_i915_gem_object), 0, + SLAB_HWCACHE_ALIGN, + NULL); INIT_LIST_HEAD(&dev_priv->mm.active_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); -- cgit From 4239ca779dbe47a310a3106d9f4cd5458014bdb6 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Dec 2012 16:26:16 +0000 Subject: drm/i915: Fix dieing -> dying typo Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dfe7174a7c03..b68bc02745db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2818,7 +2818,7 @@ static bool i915_gem_valid_gtt_space(struct drm_device *dev, /* On non-LLC machines we have to be careful when putting differing * types of snoopable memory together to avoid the prefetcher - * crossing memory domains and dieing. + * crossing memory domains and dying. */ if (HAS_LLC(dev)) return true; -- cgit From 1a240d4de2ccf40de5796a4d1dbb3a0236051fc9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 29 Nov 2012 22:18:51 +0100 Subject: drm/i915: fixup sparse warnings - __iomem where there is none (I love how we mix these things up). - Use gfp_t instead of an other plain type. - Unconfuse one place about enum pipe vs enum transcoder - for the pch transcoder we actually use the pipe enum. Fixup the other cases where we assign the pipe to the cpu transcoder with explicit casts. - Declare the mch_lock properly in a header. There is still a decent mess in intel_bios.c about __iomem, but heck, this is x86 and we're allowed to do that. Makes-sparse-happy: Chris Wilson [danvet: Use a space after the cast consistently and fix up the newly-added cast in i915_irq.c to properly use __iomem.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b68bc02745db..cb941448c2bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3705,7 +3705,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, { struct drm_i915_gem_object *obj; struct address_space *mapping; - u32 mask; + gfp_t mask; obj = i915_gem_object_alloc(dev); if (obj == NULL) -- cgit From 498d2ac15ce0fc08edb005a7faf9ed6b5aa028d8 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 4 Dec 2012 15:12:04 +0200 Subject: drm/i915: Add intel_ring_handle_seqno wrap If there are pre-wrap values in semaphore-mbox registers after wrap, syncing against some after-wrap request will complete immediately. Fix this by emitting ring commands to set mbox registers to zero when the wrap happens. v2: Use __intel_ring_begin to emit ring commands, from Chris Wilson. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson [danvet: Add a small comment to handle_seqno_wrap.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cb941448c2bc..02d315164aa9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1952,6 +1952,10 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) i915_gem_retire_requests(dev); for_each_ring(ring, dev_priv, i) { + ret = intel_ring_handle_seqno_wrap(ring); + if (ret) + return ret; + for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) ring->sync_seqno[j] = 0; } -- cgit From e9b73c67390a5d4faec1d22cbdf24cd6fcca53f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Dec 2012 21:03:14 +0000 Subject: drm/i915: Reduce memory pressure during shrinker by preallocating swizzle pages On a machine with bit17 swizzling, we need to store the bit17 of the physical page address in put-pages. This requires a memory allocation, on average less than a page, which may be difficult to satisfy is the request to put-pages is on behalf of the shrinker. We could allow that allocation to pull from the reserved memory pools, but it seems much safer to preallocate the array for tiled objects on affected machines. v2: Export i915_gem_object_needs_bit17_swizzle() for reuse. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 02d315164aa9..e4b233df576f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -271,14 +271,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, args->size, &args->handle); } -static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = obj->base.dev->dev_private; - - return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && - obj->tiling_mode != I915_TILING_NONE; -} - static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, -- cgit From f72b3435c1a75406d82d6e252bb78f009efd4bd9 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 10 Dec 2012 15:41:48 +0200 Subject: drm/i915: Don't emit semaphore wait if wrap happened If wrap just happened we need to prevent emitting waits for pre wrap values. Detect this and emit no-ops instead. v2: Use olr > seqno to detect wrap instead of *seqno == 0 as suggested by Chris Wilson. v3: Use last used seqno to detect the wraparound. From Chris Wilson v4: Fixed unnecessary last_seqno assigment References: https://bugs.freedesktop.org/show_bug.cgi?id=57967 Signed-off-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4b233df576f..a81b78a59bd9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1969,7 +1969,7 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) dev_priv->next_seqno = 1; } - *seqno = dev_priv->next_seqno++; + *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; return 0; } -- cgit From 107f27a5df8ad33a351d6e82fe95ff92b428f72e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Dec 2012 13:56:17 +0200 Subject: drm/i915: Open-code i915_gpu_idle() for handling seqno wrapping The complication is that during seqno wrapping we must be extremely careful not to write to any ring as that will require a new seqno, and so would recurse back into the seqno wrap handler. So we cannot call i915_gpu_idle() as that does additional work beyond simply retiring the current set of requests, and instead must do the minimal work ourselves during seqno wrapping. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a81b78a59bd9..bc73f98c3631 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1938,11 +1938,15 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) if (ret == 0) return ret; - ret = i915_gpu_idle(dev); - if (ret) - return ret; - + /* Carefully retire all requests without writing to the rings */ + for_each_ring(ring, dev_priv, i) { + ret = intel_ring_idle(ring); + if (ret) + return ret; + } i915_gem_retire_requests(dev); + + /* Finally reset hw state */ for_each_ring(ring, dev_priv, i) { ret = intel_ring_handle_seqno_wrap(ring); if (ret) -- cgit From 9e8e36879f268f1652e11b1c8560bbb67cf4f08e Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 4 Dec 2012 15:12:05 +0200 Subject: drm/i915: Set initial seqno value close to wrap boundary To gain confidence in the wrap handling, make it happen quite soon after the boot. Signed-off-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bc73f98c3631..6380c6083cb2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3943,7 +3943,7 @@ i915_gem_init_hw(struct drm_device *dev) goto cleanup_bsd_ring; } - dev_priv->next_seqno = 1; + dev_priv->next_seqno = (u32)-1 - 0x1000; /* * XXX: There was some w/a described somewhere suggesting loading -- cgit From eb119bd612906519cacef2536a9a524c2da5f7fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 16 Dec 2012 12:43:36 +0000 Subject: drm/i915: Access to snooped system memory through the GTT is incoherent We ignore all the user requests to handle flushing to the GTT domain if the user requests such on a snoopable bo, and as such access through the GTT to such pages remains incoherent. The specs even warn that such behaviour is undefined - a strong reason never to do so. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6380c6083cb2..d15c86279d02 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1341,6 +1341,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) trace_i915_gem_object_fault(obj, page_offset, true, write); + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { + ret = -EINVAL; + goto unlock; + } + /* Now bind it into the GTT if needed */ ret = i915_gem_object_pin(obj, 0, true, false); if (ret) -- cgit From 7dbf9d6e0fd8d42398955de119ccba6c35813c88 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 18 Dec 2012 10:31:22 -0800 Subject: drm/i915: BUG() if fences are used on unsupported platform Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d15c86279d02..ad03dea210bd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2649,7 +2649,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, case 4: i965_write_fence_reg(dev, reg, obj); break; case 3: i915_write_fence_reg(dev, reg, obj); break; case 2: i830_write_fence_reg(dev, reg, obj); break; - default: break; + default: BUG(); } } -- cgit From 8782e26c0cf3444a0d11400277bb44de710a0142 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 18 Dec 2012 10:31:23 -0800 Subject: drm/i915: Bug on unsupported swizzled platforms Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad03dea210bd..6685861cd176 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3897,8 +3897,10 @@ void i915_gem_init_swizzling(struct drm_device *dev) I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); if (IS_GEN6(dev)) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); - else + else if (IS_GEN7(dev)) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); + else + BUG(); } static bool -- cgit From f7e98ad4d4a8afa043126a6f24d0a154a684e081 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 19 Dec 2012 11:13:06 +0200 Subject: drm/i915: Initialize hardware semaphore state on ring init Hardware status page needs to have proper seqno set as our initial seqno can be arbitrary. If initial seqno is close to wrap boundary on init and i915_seqno_passed() (31bit space) refers to hw status page which contains zero, errorneous result will be returned. v2: clear mboxes and set hws page directly instead of going through rings. Suggested by Chris Wilson. v3: hws needs to be updated for all gens. Noticed by Chris Wilson. References: https://bugs.freedesktop.org/show_bug.cgi?id=58230 Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6685861cd176..51282b2c4943 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1954,9 +1954,7 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) /* Finally reset hw state */ for_each_ring(ring, dev_priv, i) { - ret = intel_ring_handle_seqno_wrap(ring); - if (ret) - return ret; + intel_ring_init_seqno(ring, 0); for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) ring->sync_seqno[j] = 0; @@ -3935,6 +3933,8 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_init_swizzling(dev); + dev_priv->next_seqno = dev_priv->last_seqno = (u32)~0 - 0x1000; + ret = intel_init_render_ring_buffer(dev); if (ret) return ret; @@ -3951,8 +3951,6 @@ i915_gem_init_hw(struct drm_device *dev) goto cleanup_bsd_ring; } - dev_priv->next_seqno = (u32)-1 - 0x1000; - /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. -- cgit From ba1a7067c00f4cbdd99b00a570ff072639ae59f2 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 19 Dec 2012 11:13:07 +0200 Subject: drm/i915: Always clear semaphore mboxes on seqno wrap In preparation for setting the seqno to arbitrary value on init or through debugfs. We need to always clear the semaphores and set the hws page seqno index by calling intel_ring_init_seqno(). v2: rewrote the commit message as suggested by Chris Wilson. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 51282b2c4943..bb38bca02559 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1932,18 +1932,6 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) struct intel_ring_buffer *ring; int ret, i, j; - /* The hardware uses various monotonic 32-bit counters, if we - * detect that they will wraparound we need to idle the GPU - * and reset those counters. - */ - ret = 0; - for_each_ring(ring, dev_priv, i) { - for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) - ret |= ring->sync_seqno[j] != 0; - } - if (ret == 0) - return ret; - /* Carefully retire all requests without writing to the rings */ for_each_ring(ring, dev_priv, i) { ret = intel_ring_idle(ring); -- cgit From fca26bb45375266d9d9b8b4b57fee905ac38fe3c Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 19 Dec 2012 11:13:08 +0200 Subject: drm/i915: Introduce i915_gem_set_seqno() This function can be used to set the driver's next_seqno to arbitrary value. i915_gem_set_seqno() will idle the gpu, retire outstanding requests, clear the semaphore mailboxes and set the hardware status page's seqno index. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb38bca02559..23b883a135db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1926,7 +1926,7 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) } static int -i915_gem_handle_seqno_wrap(struct drm_device *dev) +i915_gem_init_seqno(struct drm_device *dev, u32 seqno) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; @@ -1942,7 +1942,7 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) /* Finally reset hw state */ for_each_ring(ring, dev_priv, i) { - intel_ring_init_seqno(ring, 0); + intel_ring_init_seqno(ring, seqno); for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) ring->sync_seqno[j] = 0; @@ -1951,6 +1951,32 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev) return 0; } +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + ret = i915_gem_init_seqno(dev, seqno - 1); + if (ret) + return ret; + + /* Carefully set the last_seqno value so that wrap + * detection still works + */ + dev_priv->next_seqno = seqno; + dev_priv->last_seqno = seqno - 1; + if (dev_priv->last_seqno == 0) + dev_priv->last_seqno--; + + return 0; +} + int i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) { @@ -1958,7 +1984,7 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) /* reserve 0 for non-seqno */ if (dev_priv->next_seqno == 0) { - int ret = i915_gem_handle_seqno_wrap(dev); + int ret = i915_gem_init_seqno(dev, 0); if (ret) return ret; -- cgit From d7e5008f7c2077d856e40a3af746f1a47028b5f2 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 18 Dec 2012 10:31:25 -0800 Subject: drm/i915: Move even more gtt code to i915_gem_gtt This really should have been part of the kill agp series. Signed-off-by: Ben Widawsky Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 51 +++-------------------------------------- 1 file changed, 3 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 23b883a135db..ad98db5d22ea 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -163,8 +163,8 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, return -ENODEV; mutex_lock(&dev->struct_mutex); - i915_gem_init_global_gtt(dev, args->gtt_start, - args->gtt_end, args->gtt_end); + i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, + args->gtt_end); mutex_unlock(&dev->struct_mutex); return 0; @@ -3981,58 +3981,13 @@ cleanup_render_ring: return ret; } -static bool -intel_enable_ppgtt(struct drm_device *dev) -{ - if (i915_enable_ppgtt >= 0) - return i915_enable_ppgtt; - -#ifdef CONFIG_INTEL_IOMMU - /* Disable ppgtt on SNB if VT-d is on. */ - if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long gtt_size, mappable_size; int ret; - gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; - mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; - mutex_lock(&dev->struct_mutex); - if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { - /* PPGTT pdes are stolen from global gtt ptes, so shrink the - * aperture accordingly when using aliasing ppgtt. */ - gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; - - i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); - - ret = i915_gem_init_aliasing_ppgtt(dev); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return ret; - } - } else { - /* Let GEM Manage all of the aperture. - * - * However, leave one page at the end still bound to the scratch - * page. There are a number of places where the hardware - * apparently prefetches past the end of the object, and we've - * seen multiple hangs with the GPU head pointer stuck in a - * batchbuffer bound at the last page of the aperture. One page - * should be enough to keep any prefetching inside of the - * aperture. - */ - i915_gem_init_global_gtt(dev, 0, mappable_size, - gtt_size); - } - + i915_gem_init_global_gtt(dev); ret = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); if (ret) { -- cgit