From a39a68054f63da0ea3b4806e1bfad79670a93d9f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 15 Nov 2012 15:40:05 +0100
Subject: drm/i915: simplify shmem pwrite/pread slowpath handling

The shmem paths for pwrite/pread used a clever trick to hold onto a
single page when dropping the big dev->struct_mutex for the slowpath.
But this ran the risk of reinstating (or not completely purging) the
backing storage when dropping purgeable objects.

Hence the code needed to keep track of whether it ever dropped the
lock, and if it did, manually check whether it needs to re-purge the
backing storage. But thanks to the pages pin count introduced in

commit a5570178c059cec59e9835be20bc8546377fa7b5
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Sep 4 21:02:54 2012 +0100

    drm/i915: Pin backing pages whilst exporting through a dmabuf vmap

which allowed us to pin the backing storage and remove that page
reference trick from shmem_pwrite/read in

commit f60d7f0c1d55a935475ab394955cafddefaa6533
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Sep 4 21:02:56 2012 +0100

    drm/i915: Pin backing pages for pread

and

commit 755d22184f1e5015b040acee794542d9cf8a16c5
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Sep 4 21:02:55 2012 +0100

    drm/i915: Pin backing pages for pwrite

we can now abolish this check. The slowpath cleanup completely
disappears from pread, and for pwrite we're only left with the domain
fixup in case someone moved the object out of the cpu domain from
under us. A follow-on patch will optimize that a notch more.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4ef8b5714337..36f629a79d88 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -407,7 +407,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
 	loff_t offset;
 	int shmem_page_offset, page_length, ret = 0;
 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-	int hit_slowpath = 0;
 	int prefaulted = 0;
 	int needs_clflush = 0;
 	struct scatterlist *sg;
@@ -469,7 +468,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		if (ret == 0)
 			goto next_page;
 
-		hit_slowpath = 1;
 		mutex_unlock(&dev->struct_mutex);
 
 		if (!prefaulted) {
@@ -502,12 +500,6 @@ next_page:
 out:
 	i915_gem_object_unpin_pages(obj);
 
-	if (hit_slowpath) {
-		/* Fixup: Kill any reinstated backing storage pages */
-		if (obj->madv == __I915_MADV_PURGED)
-			i915_gem_object_truncate(obj);
-	}
-
 	return ret;
 }
 
@@ -838,11 +830,8 @@ out:
 	i915_gem_object_unpin_pages(obj);
 
 	if (hit_slowpath) {
-		/* Fixup: Kill any reinstated backing storage pages */
-		if (obj->madv == __I915_MADV_PURGED)
-			i915_gem_object_truncate(obj);
-		/* and flush dirty cachelines in case the object isn't in the cpu write
-		 * domain anymore. */
+		/* Fixup: Flush dirty cachelines in case the object isn't in the
+		 * cpu write domain anymore. */
 		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 			i915_gem_clflush_object(obj);
 			i915_gem_chipset_flush(dev);
-- 
cgit 


From 8dcf015eb967c718962c0690330d9a94d56f2c5d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 15 Nov 2012 16:53:58 +0100
Subject: drm/i915: optimize the shmem_pwrite slowpath handling

Since we drop dev->struct_mutex when going through the slowpath, the
object might have been moved out of the cpu domain. Hence we need to
clflush the entire object to ensure that after the ioctl returns,
everything is coherent again (interwoven writes are ill-defined
anyway).

But we only need to do this if we start in the cpu domain and the
object requires flushing for coherency. So don't do the flushing if
the object is coherent anyway or if we've done in-line clfushing
already.

v2: i915_gem_clflush_object already checks whether the object is
coherent and if so, drops the flushing. Hence we don't need to check
that ourselves, simplifying the condition.

v3: Reorder the checks for better clarity (and adjust the comment
accordingly), suggested by Chris Wilson.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 36f629a79d88..5c8df572cd17 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -830,9 +830,13 @@ out:
 	i915_gem_object_unpin_pages(obj);
 
 	if (hit_slowpath) {
-		/* Fixup: Flush dirty cachelines in case the object isn't in the
-		 * cpu write domain anymore. */
-		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+		/*
+		 * Fixup: Flush cpu caches in case we didn't flush the dirty
+		 * cachelines in-line while writing and the object moved
+		 * out of the cpu write domain while we've dropped the lock.
+		 */
+		if (!needs_clflush_after &&
+		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 			i915_gem_clflush_object(obj);
 			i915_gem_chipset_flush(dev);
 		}
-- 
cgit 


From 0104fdbb84d7adb0e377ed05bf75eba97b007544 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Nov 2012 11:32:26 +0000
Subject: drm/i915: Introduce i915_gem_object_create_stolen()

Allow for the creation of GEM objects backed by stolen memory. As these
are not backed by ordinary pages, we create a fake dma mapping and store
the address in the scatterlist rather than obj->pages.

v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse
Barnes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5c8df572cd17..3de62b0127a5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3772,6 +3772,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	obj->pages_pin_count = 0;
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
+	i915_gem_object_release_stolen(obj);
 
 	BUG_ON(obj->pages);
 
-- 
cgit 


From 42dcedd4f2e715dc0313e359c8288e6397843fff Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Nov 2012 11:32:30 +0000
Subject: drm/i915: Use a slab for object allocation

The primary purpose of this was to debug some use-after-free memory
corruption that was causing an OOPS inside drm/i915. As it turned out
the corruption was being caused elsewhere and i915.ko as a major user of
many objects was being hit hardest.

Indeed as we do frequent the generic kmalloc caches, dedicating one to
ourselves (or at least naming one for us depending upon the core) aids
debugging our own slab usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3de62b0127a5..dfe7174a7c03 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -192,6 +192,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
@@ -215,7 +227,7 @@ i915_gem_create(struct drm_file *file,
 	if (ret) {
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return ret;
 	}
 
@@ -3695,12 +3707,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	struct address_space *mapping;
 	u32 mask;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return NULL;
 	}
 
@@ -3783,7 +3795,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
 	kfree(obj->bit_17);
-	kfree(obj);
+	i915_gem_object_free(obj);
 }
 
 int
@@ -4101,8 +4113,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-	int i;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	dev_priv->slab =
+		kmem_cache_create("i915_gem_object",
+				  sizeof(struct drm_i915_gem_object), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
-- 
cgit 


From 4239ca779dbe47a310a3106d9f4cd5458014bdb6 Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 3 Dec 2012 16:26:16 +0000
Subject: drm/i915: Fix dieing -> dying typo

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dfe7174a7c03..b68bc02745db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2818,7 +2818,7 @@ static bool i915_gem_valid_gtt_space(struct drm_device *dev,
 
 	/* On non-LLC machines we have to be careful when putting differing
 	 * types of snoopable memory together to avoid the prefetcher
-	 * crossing memory domains and dieing.
+	 * crossing memory domains and dying.
 	 */
 	if (HAS_LLC(dev))
 		return true;
-- 
cgit 


From 1a240d4de2ccf40de5796a4d1dbb3a0236051fc9 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Nov 2012 22:18:51 +0100
Subject: drm/i915: fixup sparse warnings

- __iomem where there is none (I love how we mix these things up).
- Use gfp_t instead of an other plain type.
- Unconfuse one place about enum pipe vs enum transcoder - for the pch
  transcoder we actually use the pipe enum. Fixup the other cases
  where we assign the pipe to the cpu transcoder with explicit casts.
- Declare the mch_lock properly in a header.

There is still a decent mess in intel_bios.c about __iomem, but heck,
this is x86 and we're allowed to do that.

Makes-sparse-happy: Chris Wilson <chris@chris-wilson.co.uk>
[danvet: Use a space after the cast consistently and fix up the
newly-added cast in i915_irq.c to properly use __iomem.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b68bc02745db..cb941448c2bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3705,7 +3705,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 {
 	struct drm_i915_gem_object *obj;
 	struct address_space *mapping;
-	u32 mask;
+	gfp_t mask;
 
 	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
-- 
cgit 


From 498d2ac15ce0fc08edb005a7faf9ed6b5aa028d8 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Tue, 4 Dec 2012 15:12:04 +0200
Subject: drm/i915: Add intel_ring_handle_seqno wrap

If there are pre-wrap values in semaphore-mbox registers after wrap,
syncing against some after-wrap request will complete immediately.
Fix this by emitting ring commands to set mbox registers to zero
when the wrap happens.

v2: Use __intel_ring_begin to emit ring commands, from
Chris Wilson.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
[danvet: Add a small comment to handle_seqno_wrap.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb941448c2bc..02d315164aa9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1952,6 +1952,10 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 
 	i915_gem_retire_requests(dev);
 	for_each_ring(ring, dev_priv, i) {
+		ret = intel_ring_handle_seqno_wrap(ring);
+		if (ret)
+			return ret;
+
 		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
 			ring->sync_seqno[j] = 0;
 	}
-- 
cgit 


From e9b73c67390a5d4faec1d22cbdf24cd6fcca53f6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 3 Dec 2012 21:03:14 +0000
Subject: drm/i915: Reduce memory pressure during shrinker by preallocating
 swizzle pages

On a machine with bit17 swizzling, we need to store the bit17 of the
physical page address in put-pages. This requires a memory allocation,
on average less than a page, which may be difficult to satisfy is the
request to put-pages is on behalf of the shrinker. We could allow that
allocation to pull from the reserved memory pools, but it seems much
safer to preallocate the array for tiled objects on affected machines.

v2: Export i915_gem_object_needs_bit17_swizzle() for reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 02d315164aa9..e4b233df576f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -271,14 +271,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }
 
-static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
-{
-	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
-
-	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
-		obj->tiling_mode != I915_TILING_NONE;
-}
-
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
 			const char *gpu_vaddr, int gpu_offset,
-- 
cgit 


From f72b3435c1a75406d82d6e252bb78f009efd4bd9 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Mon, 10 Dec 2012 15:41:48 +0200
Subject: drm/i915: Don't emit semaphore wait if wrap happened

If wrap just happened we need to prevent emitting waits for
pre wrap values. Detect this and emit no-ops instead.

v2: Use olr > seqno to detect wrap instead of *seqno == 0
as suggested by Chris Wilson.

v3: Use last used seqno to detect the wraparound. From
Chris Wilson

v4: Fixed unnecessary last_seqno assigment

References: https://bugs.freedesktop.org/show_bug.cgi?id=57967
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4b233df576f..a81b78a59bd9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1969,7 +1969,7 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 		dev_priv->next_seqno = 1;
 	}
 
-	*seqno = dev_priv->next_seqno++;
+	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 	return 0;
 }
 
-- 
cgit 


From 107f27a5df8ad33a351d6e82fe95ff92b428f72e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 10 Dec 2012 13:56:17 +0200
Subject: drm/i915: Open-code i915_gpu_idle() for handling seqno wrapping

The complication is that during seqno wrapping we must be extremely
careful not to write to any ring as that will require a new seqno, and
so would recurse back into the seqno wrap handler. So we cannot call
i915_gpu_idle() as that does additional work beyond simply retiring the
current set of requests, and instead must do the minimal work ourselves
during seqno wrapping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a81b78a59bd9..bc73f98c3631 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1938,11 +1938,15 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 	if (ret == 0)
 		return ret;
 
-	ret = i915_gpu_idle(dev);
-	if (ret)
-		return ret;
-
+	/* Carefully retire all requests without writing to the rings */
+	for_each_ring(ring, dev_priv, i) {
+		ret = intel_ring_idle(ring);
+		if (ret)
+			return ret;
+	}
 	i915_gem_retire_requests(dev);
+
+	/* Finally reset hw state */
 	for_each_ring(ring, dev_priv, i) {
 		ret = intel_ring_handle_seqno_wrap(ring);
 		if (ret)
-- 
cgit 


From 9e8e36879f268f1652e11b1c8560bbb67cf4f08e Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Tue, 4 Dec 2012 15:12:05 +0200
Subject: drm/i915: Set initial seqno value close to wrap boundary

To gain confidence in the wrap handling, make it happen quite
soon after the boot.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bc73f98c3631..6380c6083cb2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3943,7 +3943,7 @@ i915_gem_init_hw(struct drm_device *dev)
 			goto cleanup_bsd_ring;
 	}
 
-	dev_priv->next_seqno = 1;
+	dev_priv->next_seqno = (u32)-1 - 0x1000;
 
 	/*
 	 * XXX: There was some w/a described somewhere suggesting loading
-- 
cgit 


From eb119bd612906519cacef2536a9a524c2da5f7fb Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 16 Dec 2012 12:43:36 +0000
Subject: drm/i915: Access to snooped system memory through the GTT is
 incoherent

We ignore all the user requests to handle flushing to the GTT domain if
the user requests such on a snoopable bo, and as such access through the
GTT to such pages remains incoherent. The specs even warn that such
behaviour is undefined - a strong reason never to do so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6380c6083cb2..d15c86279d02 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1341,6 +1341,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	/* Now bind it into the GTT if needed */
 	ret = i915_gem_object_pin(obj, 0, true, false);
 	if (ret)
-- 
cgit 


From 7dbf9d6e0fd8d42398955de119ccba6c35813c88 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 18 Dec 2012 10:31:22 -0800
Subject: drm/i915: BUG() if fences are used on unsupported platform

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d15c86279d02..ad03dea210bd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2649,7 +2649,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
 	case 4: i965_write_fence_reg(dev, reg, obj); break;
 	case 3: i915_write_fence_reg(dev, reg, obj); break;
 	case 2: i830_write_fence_reg(dev, reg, obj); break;
-	default: break;
+	default: BUG();
 	}
 }
 
-- 
cgit 


From 8782e26c0cf3444a0d11400277bb44de710a0142 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 18 Dec 2012 10:31:23 -0800
Subject: drm/i915: Bug on unsupported swizzled platforms

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad03dea210bd..6685861cd176 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3897,8 +3897,10 @@ void i915_gem_init_swizzling(struct drm_device *dev)
 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
 	if (IS_GEN6(dev))
 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
-	else
+	else if (IS_GEN7(dev))
 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+	else
+		BUG();
 }
 
 static bool
-- 
cgit 


From f7e98ad4d4a8afa043126a6f24d0a154a684e081 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 19 Dec 2012 11:13:06 +0200
Subject: drm/i915: Initialize hardware semaphore state on ring init

Hardware status page needs to have proper seqno set
as our initial seqno can be arbitrary. If initial seqno is close
to wrap boundary on init and i915_seqno_passed() (31bit space)
refers to hw status page which contains zero, errorneous result
will be returned.

v2: clear mboxes and set hws page directly instead of going
through rings. Suggested by Chris Wilson.

v3: hws needs to be updated for all gens. Noticed by Chris
Wilson.

References: https://bugs.freedesktop.org/show_bug.cgi?id=58230
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6685861cd176..51282b2c4943 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1954,9 +1954,7 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 
 	/* Finally reset hw state */
 	for_each_ring(ring, dev_priv, i) {
-		ret = intel_ring_handle_seqno_wrap(ring);
-		if (ret)
-			return ret;
+		intel_ring_init_seqno(ring, 0);
 
 		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
 			ring->sync_seqno[j] = 0;
@@ -3935,6 +3933,8 @@ i915_gem_init_hw(struct drm_device *dev)
 
 	i915_gem_init_swizzling(dev);
 
+	dev_priv->next_seqno = dev_priv->last_seqno = (u32)~0 - 0x1000;
+
 	ret = intel_init_render_ring_buffer(dev);
 	if (ret)
 		return ret;
@@ -3951,8 +3951,6 @@ i915_gem_init_hw(struct drm_device *dev)
 			goto cleanup_bsd_ring;
 	}
 
-	dev_priv->next_seqno = (u32)-1 - 0x1000;
-
 	/*
 	 * XXX: There was some w/a described somewhere suggesting loading
 	 * contexts before PPGTT.
-- 
cgit 


From ba1a7067c00f4cbdd99b00a570ff072639ae59f2 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 19 Dec 2012 11:13:07 +0200
Subject: drm/i915: Always clear semaphore mboxes on seqno wrap

In preparation for setting the seqno to arbitrary value on init or
through debugfs. We need to always clear the semaphores and set the
hws page seqno index by calling intel_ring_init_seqno().

v2: rewrote the commit message as suggested by Chris Wilson.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51282b2c4943..bb38bca02559 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1932,18 +1932,6 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 	struct intel_ring_buffer *ring;
 	int ret, i, j;
 
-	/* The hardware uses various monotonic 32-bit counters, if we
-	 * detect that they will wraparound we need to idle the GPU
-	 * and reset those counters.
-	 */
-	ret = 0;
-	for_each_ring(ring, dev_priv, i) {
-		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
-			ret |= ring->sync_seqno[j] != 0;
-	}
-	if (ret == 0)
-		return ret;
-
 	/* Carefully retire all requests without writing to the rings */
 	for_each_ring(ring, dev_priv, i) {
 		ret = intel_ring_idle(ring);
-- 
cgit 


From fca26bb45375266d9d9b8b4b57fee905ac38fe3c Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 19 Dec 2012 11:13:08 +0200
Subject: drm/i915: Introduce i915_gem_set_seqno()

This function can be used to set the driver's next_seqno
to arbitrary value.

i915_gem_set_seqno() will idle the gpu, retire outstanding
requests, clear the semaphore mailboxes and set the hardware
status page's seqno index.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bb38bca02559..23b883a135db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1926,7 +1926,7 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 }
 
 static int
-i915_gem_handle_seqno_wrap(struct drm_device *dev)
+i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring;
@@ -1942,7 +1942,7 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 
 	/* Finally reset hw state */
 	for_each_ring(ring, dev_priv, i) {
-		intel_ring_init_seqno(ring, 0);
+		intel_ring_init_seqno(ring, seqno);
 
 		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
 			ring->sync_seqno[j] = 0;
@@ -1951,6 +1951,32 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
 	return 0;
 }
 
+int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (seqno == 0)
+		return -EINVAL;
+
+	/* HWS page needs to be set less than what we
+	 * will inject to ring
+	 */
+	ret = i915_gem_init_seqno(dev, seqno - 1);
+	if (ret)
+		return ret;
+
+	/* Carefully set the last_seqno value so that wrap
+	 * detection still works
+	 */
+	dev_priv->next_seqno = seqno;
+	dev_priv->last_seqno = seqno - 1;
+	if (dev_priv->last_seqno == 0)
+		dev_priv->last_seqno--;
+
+	return 0;
+}
+
 int
 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 {
@@ -1958,7 +1984,7 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 	/* reserve 0 for non-seqno */
 	if (dev_priv->next_seqno == 0) {
-		int ret = i915_gem_handle_seqno_wrap(dev);
+		int ret = i915_gem_init_seqno(dev, 0);
 		if (ret)
 			return ret;
 
-- 
cgit 


From d7e5008f7c2077d856e40a3af746f1a47028b5f2 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 18 Dec 2012 10:31:25 -0800
Subject: drm/i915: Move even more gtt code to i915_gem_gtt

This really should have been part of the kill agp series.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 51 +++--------------------------------------
 1 file changed, 3 insertions(+), 48 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 23b883a135db..ad98db5d22ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -163,8 +163,8 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
 		return -ENODEV;
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_init_global_gtt(dev, args->gtt_start,
-				 args->gtt_end, args->gtt_end);
+	i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
+				  args->gtt_end);
 	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
@@ -3981,58 +3981,13 @@ cleanup_render_ring:
 	return ret;
 }
 
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
-{
-	if (i915_enable_ppgtt >= 0)
-		return i915_enable_ppgtt;
-
-#ifdef CONFIG_INTEL_IOMMU
-	/* Disable ppgtt on SNB if VT-d is on. */
-	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
-		return false;
-#endif
-
-	return true;
-}
-
 int i915_gem_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long gtt_size, mappable_size;
 	int ret;
 
-	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
-	mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
-
 	mutex_lock(&dev->struct_mutex);
-	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
-		/* PPGTT pdes are stolen from global gtt ptes, so shrink the
-		 * aperture accordingly when using aliasing ppgtt. */
-		gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
-
-		i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
-
-		ret = i915_gem_init_aliasing_ppgtt(dev);
-		if (ret) {
-			mutex_unlock(&dev->struct_mutex);
-			return ret;
-		}
-	} else {
-		/* Let GEM Manage all of the aperture.
-		 *
-		 * However, leave one page at the end still bound to the scratch
-		 * page.  There are a number of places where the hardware
-		 * apparently prefetches past the end of the object, and we've
-		 * seen multiple hangs with the GPU head pointer stuck in a
-		 * batchbuffer bound at the last page of the aperture.  One page
-		 * should be enough to keep any prefetching inside of the
-		 * aperture.
-		 */
-		i915_gem_init_global_gtt(dev, 0, mappable_size,
-					 gtt_size);
-	}
-
+	i915_gem_init_global_gtt(dev);
 	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret) {
-- 
cgit