72 files changed, 3344 insertions, 1386 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2cf04504e494..7b05fb802f4c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index bada32b33237..1256fe21850b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
 			workload->ctx_desc.lrca);
 
-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
@@ -330,8 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
 			workload->ctx_desc.lrca);
 
-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7af100f84410..2a1a3347495a 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				find_reg(engine, is_master, reg_addr);
 
 			if (!reg) {
-				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
-						 reg_addr, *cmd, engine->exec_id);
+				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+						 reg_addr, *cmd, engine->name);
 				return false;
 			}
 
@@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				desc->bits[i].mask;
 
 			if (dword != desc->bits[i].expected) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
+				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
 						 *cmd,
 						 desc->bits[i].mask,
 						 desc->bits[i].expected,
-						 dword, engine->exec_id);
+						 dword, engine->name);
 				return false;
 			}
 		}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..870c470177b5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2568,8 +2568,7 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
 
 static int i915_guc_log_control_get(void *data, u64 *val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;
 
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
@@ -2581,14 +2580,13 @@ static int i915_guc_log_control_get(void *data, u64 *val)
 
 static int i915_guc_log_control_set(void *data, u64 val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;
 	int ret;
 
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
 	if (ret)
 		return ret;
 
@@ -2596,7 +2594,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
 	ret = i915_guc_log_control(dev_priv, val);
 	intel_runtime_pm_put(dev_priv);
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3036d4835b0f..452c26505018 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
+	case I915_PARAM_HAS_EXEC_CAPTURE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_uc_init_early(dev_priv);
 	i915_memcpy_init_early(dev_priv);
 
-	ret = intel_engines_init_early(dev_priv);
-	if (ret)
-		return ret;
-
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
 		goto err_engines;
@@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_init_audio_hooks(dev_priv);
 	ret = i915_gem_load_init(dev_priv);
 	if (ret < 0)
-		goto err_workqueues;
+		goto err_irq;
 
 	intel_display_crc_init(dev_priv);
 
@@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	return 0;
 
-err_workqueues:
+err_irq:
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
@@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
 	i915_perf_fini(dev_priv);
 	i915_gem_load_cleanup(dev_priv);
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
 }
@@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
 
 	ret = i915_mmio_setup(dev_priv);
 	if (ret < 0)
-		goto put_bridge;
+		goto err_bridge;
 
 	intel_uncore_init(dev_priv);
+
+	ret = intel_engines_init_mmio(dev_priv);
+	if (ret)
+		goto err_uncore;
+
 	i915_gem_init_mmio(dev_priv);
 
 	return 0;
 
-put_bridge:
+err_uncore:
+	intel_uncore_fini(dev_priv);
+err_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 
 	return ret;
@@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct drm_i915_private *dev_priv;
 	int ret;
 
-	/* Enable nuclear pageflip on ILK+, except vlv/chv */
-	if (!i915.nuclear_pageflip &&
-	    (match_info->gen < 5 || match_info->has_gmch_display))
+	/* Enable nuclear pageflip on ILK+ */
+	if (!i915.nuclear_pageflip && match_info->gen < 5)
 		driver.driver_features &= ~DRIVER_ATOMIC;
 
 	ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949f6c1a..b20ed16da0ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -79,8 +79,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20170403"
-#define DRIVER_TIMESTAMP	1491198738
+#define DRIVER_DATE		"20170502"
+#define DRIVER_TIMESTAMP	1493710187
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -822,7 +822,6 @@ struct intel_csr {
 	func(has_gmch_display); \
 	func(has_guc); \
 	func(has_hotplug); \
-	func(has_hw_contexts); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
@@ -1025,6 +1024,9 @@ struct i915_gpu_state {
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
+		struct drm_i915_error_object **user_bo;
+		long user_bo_count;
+
 		struct drm_i915_error_object *wa_ctx;
 
 		struct drm_i915_error_request {
@@ -1511,11 +1513,7 @@ struct i915_gem_mm {
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 
-	/**
-	 * Are we in a non-interruptible section of code like
-	 * modesetting?
-	 */
-	bool interruptible;
+	u64 unordered_timeline;
 
 	/* the indicator for dispatch video commands on two BSD rings */
 	atomic_t bsd_engine_dispatch_index;
@@ -1566,7 +1564,7 @@ struct i915_gpu_error {
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
 	 *
-	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * Before the reset commences, the I915_RESET_BACKOFF bit is set
 	 * meaning that any waiters holding onto the struct_mutex should
 	 * relinquish the lock immediately in order for the reset to start.
 	 *
@@ -2362,7 +2360,6 @@ struct drm_i915_private {
 	 */
 	struct mutex av_mutex;
 
-	uint32_t hw_context_size;
 	struct list_head context_list;
 
 	u32 fdi_rx_config;
@@ -2870,7 +2867,6 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)	((dev_priv)->info.hws_needs_physical)
 
-#define HAS_HW_CONTEXTS(dev_priv)	    ((dev_priv)->info.has_hw_contexts)
 #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
 		((dev_priv)->info.has_logical_ring_contexts)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
@@ -3026,7 +3022,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_early(struct drm_i915_private *dev_priv);
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
 /* intel_hotplug.c */
@@ -3063,6 +3059,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 		       const char *fmt, ...);
 
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
+extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);
 
@@ -3091,14 +3088,26 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
 
 int intel_wait_for_register(struct drm_i915_private *dev_priv,
 			    i915_reg_t reg,
-			    const u32 mask,
-			    const u32 value,
-			    const unsigned long timeout_ms);
+			    u32 mask,
+			    u32 value,
+			    unsigned int timeout_ms);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg,
+				 u32 mask,
+				 u32 value,
+				 unsigned int fast_timeout_us,
+				 unsigned int slow_timeout_ms,
+				 u32 *out_value);
+static inline
 int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg,
-			       const u32 mask,
-			       const u32 value,
-			       const unsigned long timeout_ms);
+			       u32 mask,
+			       u32 value,
+			       unsigned int timeout_ms)
+{
+	return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
+					    2, timeout_ms, NULL);
+}
 
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
@@ -3447,8 +3456,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
 
 int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
-				  bool write);
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 struct i915_vma * __must_check
@@ -3711,8 +3721,8 @@ int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    void *eld, int port, int pipe, int tmds_clk_speed,
-			    bool dp_output, int link_rate);
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output);
 
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 532a577ff7a1..f9c6b9b5002c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+	if (!(obj->base.write_domain & flush_domains))
+		return;
+
+	/* No actual flushing is required for the GTT write domain.  Writes
+	 * to it "immediately" go to main memory as far as we know, so there's
+	 * no chipset flush.  It also doesn't land in render cache.
+	 *
+	 * However, we do have to enforce the order so that all writes through
+	 * the GTT land before any writes to the device, such as updates to
+	 * the GATT itself.
+	 *
+	 * We also have to wait a bit for the writes to land from the GTT.
+	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+	 * timing. This issue has only been observed when switching quickly
+	 * between GTT writes and CPU reads from inside the kernel on recent hw,
+	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
+	 * system agents we cannot reproduce this behaviour).
+	 */
+	wmb();
+
+	switch (obj->base.write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+				intel_runtime_pm_put(dev_priv);
+			}
+		}
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+	}
+
+	obj->base.write_domain = 0;
+}
+
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
 			const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ err:
 	return ret;
 }
 
-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915;
@@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out_unpin;
 
-	if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 
 	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
 
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  *     into userspace. (This view is aligned and sized appropriately for
  *     fenced access.)
  *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
  * Restrictions:
  *
  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  */
 int i915_gem_mmap_gtt_version(void)
 {
-	return 1;
+	return 2;
 }
 
 static inline struct i915_ggtt_view
@@ -3144,6 +3196,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		intel_engine_disarm_breadcrumbs(engine);
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 	}
+	i915_gem_timelines_mark_idle(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
@@ -3320,56 +3373,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return ret;
 }
 
-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
-		return;
-
-	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it "immediately" go to main memory as far as we know, so there's
-	 * no chipset flush.  It also doesn't land in render cache.
-	 *
-	 * However, we do have to enforce the order so that all writes through
-	 * the GTT land before any writes to the device, such as updates to
-	 * the GATT itself.
-	 *
-	 * We also have to wait a bit for the writes to land from the GTT.
-	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
-	 * timing. This issue has only been observed when switching quickly
-	 * between GTT writes and CPU reads from inside the kernel on recent hw,
-	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
-	 * system agents we cannot reproduce this behaviour).
-	 */
-	wmb();
-	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-		if (intel_runtime_pm_get_if_in_use(dev_priv)) {
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
-	}
-
-	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-	obj->base.write_domain = 0;
-}
-
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
-{
-	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-		return;
-
-	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-	obj->base.write_domain = 0;
-}
-
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 {
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3390,6 +3393,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 }
 
 /**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
+	if (ret)
+		return ret;
+
+	if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->base.read_domains = I915_GEM_DOMAIN_WC;
+		obj->base.write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
  * @write: ask for write access or read only
@@ -3428,7 +3494,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3868,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3996,7 +4062,7 @@ __busy_set_if_active(const struct dma_fence *fence,
 	if (i915_gem_request_completed(rq))
 		return 0;
 
-	return flag(rq->engine->exec_id);
+	return flag(rq->engine->uabi_id);
 }
 
 static __always_inline unsigned int
@@ -4195,7 +4261,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 	 * catch if we ever need to fix it. In the meantime, if you do spot
 	 * such a local variable, please consider fixing!
 	 */
-	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+	if (size >> PAGE_SHIFT > INT_MAX)
 		return ERR_PTR(-E2BIG);
 
 	if (overflows_type(size, obj->base.size))
@@ -4302,6 +4368,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	cond_resched();
+
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		GEM_BUG_ON(obj->bind_count);
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4349,8 +4417,11 @@ static void __i915_gem_free_work(struct work_struct *work)
 	 * unbound now.
 	 */
 
-	while ((freed = llist_del_all(&i915->mm.free_list)))
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
 		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			break;
+	}
 }
 
 static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4415,10 +4486,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * try to take over. The only way to remove the earlier state
 	 * is by resetting. However, resetting on earlier gen is tricky as
 	 * it may impact the display and we are uncertain about the stability
-	 * of the reset, so we only reset recent machines with logical
-	 * context support (that must be reset to remove any stray contexts).
+	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (HAS_HW_CONTEXTS(i915)) {
+	if (INTEL_GEN(i915) >= 5) {
 		int reset = intel_gpu_reset(i915, ALL_ENGINES);
 		WARN_ON(reset && reset != -ENODEV);
 	}
@@ -4676,7 +4746,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	i915_gem_clflush_init(dev_priv);
+	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4822,8 +4892,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
-	dev_priv->mm.interruptible = true;
-
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
@@ -4864,9 +4932,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 
 int i915_gem_freeze(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	/* Discard all purgeable objects, let userspace recover those as
+	 * required after resuming.
+	 */
 	i915_gem_shrink_all(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 }
@@ -4891,12 +4960,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	 * we update that state just before writing out the image.
 	 *
 	 * To try and reduce the hibernation image, we manually shrink
-	 * the objects as well.
+	 * the objects as well, see i915_gem_freeze()
 	 */
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
+	i915_gem_drain_freed_objects(dev_priv);
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	for (p = phases; *p; p++) {
 		list_for_each_entry(obj, *p, global_link) {
 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5a49487368ca..ee54597465b6 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -25,6 +25,8 @@
 #ifndef __I915_GEM_H__
 #define __I915_GEM_H__
 
+#include <linux/bug.h>
+
 #ifdef CONFIG_DRM_I915_DEBUG_GEM
 #define GEM_BUG_ON(expr) BUG_ON(expr)
 #define GEM_WARN_ON(expr) WARN_ON(expr)
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index ffd01e02fe94..ffac7a1f0caf 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -27,7 +27,6 @@
 #include "i915_gem_clflush.h"
 
 static DEFINE_SPINLOCK(clflush_lock);
-static u64 clflush_context;
 
 struct clflush {
 	struct dma_fence dma; /* Must be first for dma_fence_free() */
@@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		dma_fence_init(&clflush->dma,
 			       &i915_clflush_ops,
 			       &clflush_lock,
-			       clflush_context,
+			       to_i915(obj->base.dev)->mm.unordered_timeline,
 			       0);
 		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
 
@@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 	}
 }
-
-void i915_gem_clflush_init(struct drm_i915_private *i915)
-{
-	clflush_context = dma_fence_context_alloc(1);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
index b62d61a2d15f..2455a7820937 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -28,7 +28,6 @@
 struct drm_i915_private;
 struct drm_i915_gem_object;
 
-void i915_gem_clflush_init(struct drm_i915_private *i915);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 			     unsigned int flags);
 #define I915_CLFLUSH_FORCE BIT(0)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..31a73c39239f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-static int get_context_size(struct drm_i915_private *dev_priv)
-{
-	int ret;
-	u32 reg;
-
-	switch (INTEL_GEN(dev_priv)) {
-	case 6:
-		reg = I915_READ(CXT_SIZE);
-		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 7:
-		reg = I915_READ(GEN7_CXT_SIZE);
-		if (IS_HASWELL(dev_priv))
-			ret = HSW_CXT_TOTAL_SIZE;
-		else
-			ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 8:
-		ret = GEN8_CXT_TOTAL_SIZE;
-		break;
-	default:
-		BUG();
-	}
-
-	return ret;
-}
-
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	kfree(ctx);
 }
 
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	obj = i915_gem_object_create(dev_priv, size);
-	if (IS_ERR(obj))
-		return obj;
-
-	/*
-	 * Try to make the context utilize L3 as well as LLC.
-	 *
-	 * On VLV we don't have L3 controls in the PTEs so we
-	 * shouldn't touch the cache level, especially as that
-	 * would make the object snooped which might have a
-	 * negative performance impact.
-	 *
-	 * Snooping is required on non-llc platforms in execlist
-	 * mode, but since all GGTT accesses use PAT entry 0 we
-	 * get snooping anyway regardless of cache_level.
-	 *
-	 * This is only applicable for Ivy Bridge devices since
-	 * later platforms don't have L3 control bits in the PTE.
-	 */
-	if (IS_IVYBRIDGE(dev_priv)) {
-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-		/* Failure shouldn't ever happen this early */
-		if (WARN_ON(ret)) {
-			i915_gem_object_put(obj);
-			return ERR_PTR(ret);
-		}
-	}
-
-	return obj;
-}
-
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -266,26 +200,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
 
-	if (dev_priv->hw_context_size) {
-		struct drm_i915_gem_object *obj;
-		struct i915_vma *vma;
-
-		obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
-		if (IS_ERR(obj)) {
-			ret = PTR_ERR(obj);
-			goto err_out;
-		}
-
-		vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-		if (IS_ERR(vma)) {
-			i915_gem_object_put(obj);
-			ret = PTR_ERR(vma);
-			goto err_out;
-		}
-
-		ctx->engine[RCS].state = vma;
-	}
-
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
 	if (file_priv) {
@@ -443,21 +357,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 	ida_init(&dev_priv->context_hw_ida);
 
-	if (i915.enable_execlists) {
-		/* NB: intentionally left blank. We will allocate our own
-		 * backing objects as we need them, thank you very much */
-		dev_priv->hw_context_size = 0;
-	} else if (HAS_HW_CONTEXTS(dev_priv)) {
-		dev_priv->hw_context_size =
-			round_up(get_context_size(dev_priv),
-				 I915_GTT_PAGE_SIZE);
-		if (dev_priv->hw_context_size > (1<<20)) {
-			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
-					 dev_priv->hw_context_size);
-			dev_priv->hw_context_size = 0;
-		}
-	}
-
 	ctx = i915_gem_create_context(dev_priv, NULL);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context (error %ld)\n",
@@ -477,8 +376,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			i915.enable_execlists ? "LR" :
-			dev_priv->hw_context_size ? "HW" : "fake");
+			 dev_priv->engine[RCS]->context_size ? "logical" :
+			 "fake");
 	return 0;
 }
 
@@ -941,11 +840,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-static bool contexts_enabled(struct drm_device *dev)
-{
-	return i915.enable_execlists || to_i915(dev)->hw_context_size;
-}
-
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -954,12 +848,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int ret;
 
-	if (!contexts_enabled(dev))
+	if (!dev_priv->engine[RCS]->context_size)
 		return -ENODEV;
 
 	if (args->pad != 0)
@@ -977,7 +872,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(to_i915(dev), file_priv);
+	ctx = i915_gem_create_context(dev_priv, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..af1965774e7b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+			struct i915_gem_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (unlikely(!capture))
+				return -ENOMEM;
+
+			capture->next = req->capture_list;
+			capture->vma = vma;
+			req->capture_list = capture;
+		}
+
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..9074303c8888 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
 	if (i915_fence_signaled(fence))
 		return false;
 
-	intel_engine_enable_signaling(to_request(fence));
+	intel_engine_enable_signaling(to_request(fence), true);
 	return true;
 }
 
@@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		}
 
 		/* Finally reset hw state */
-		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+		tl->seqno = seqno;
 
 		list_for_each_entry(timeline, &i915->gt.timelines, link)
-			memset(timeline->engine[id].sync_seqno, 0,
-			       sizeof(timeline->engine[id].sync_seqno));
+			memset(timeline->engine[id].global_sync, 0,
+			       sizeof(timeline->engine[id].global_sync));
 	}
 
 	return 0;
@@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
 	/* Space left intentionally blank */
 }
 
+static void advance_ring(struct drm_i915_gem_request *request)
+{
+	unsigned int tail;
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+		/* We may race here with execlists resubmitting this request
+		 * as we retire it. The resubmission will move the ring->tail
+		 * forwards (to request->wa_tail). We either read the
+		 * current value that was written to hw, or the value that
+		 * is just about to be. Either works, if we miss the last two
+		 * noops - they are safe to be replayed on a reset.
+		 */
+		tail = READ_ONCE(request->ring->tail);
+	} else {
+		tail = request->postfix;
+	}
+	list_del(&request->ring_link);
+
+	request->ring->head = tail;
+}
+
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+	struct i915_gem_capture_list *capture;
+
+	capture = request->capture_list;
+	while (capture) {
+		struct i915_gem_capture_list *next = capture->next;
+
+		kfree(capture);
+		capture = next;
+	}
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->link);
 	spin_unlock_irq(&engine->timeline->lock);
 
-	/* We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	list_del(&request->ring_link);
-	request->ring->head = request->postfix;
 	if (!--request->i915->gt.active_requests) {
 		GEM_BUG_ON(!request->i915->gt.awake);
 		mod_delayed_work(request->i915->wq,
@@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 msecs_to_jiffies(100));
 	}
 	unreserve_seqno(request->engine);
+	advance_ring(request);
+
+	free_capture_list(request);
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request);
+		intel_engine_enable_signaling(request, false);
 	spin_unlock(&request->lock);
 
 	engine->emit_breadcrumb(request,
@@ -603,6 +638,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->global_seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
+	req->capture_list = NULL;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
@@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
@@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	int ret;
 
 	GEM_BUG_ON(to == from);
+	GEM_BUG_ON(to->timeline == from->timeline);
 
 	if (i915_gem_request_completed(from))
 		return 0;
@@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	if (to->timeline == from->timeline)
-		return 0;
-
 	if (to->engine == from->engine) {
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
@@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	}
 
 	seqno = i915_gem_request_global_seqno(from);
-	if (!seqno) {
-		ret = i915_sw_fence_await_dma_fence(&to->submit,
-						    &from->fence, 0,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}
+	if (!seqno)
+		goto await_dma_fence;
 
-	if (seqno <= to->timeline->sync_seqno[from->engine->id])
-		return 0;
+	if (!to->engine->semaphore.sync_to) {
+		if (!__i915_gem_request_started(from, seqno))
+			goto await_dma_fence;
 
-	trace_i915_gem_ring_sync_to(to, from);
-	if (!i915.semaphores) {
-		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
-			ret = i915_sw_fence_await_dma_fence(&to->submit,
-							    &from->fence, 0,
-							    GFP_KERNEL);
-			if (ret < 0)
-				return ret;
-		}
+		if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
+			goto await_dma_fence;
 	} else {
+		GEM_BUG_ON(!from->engine->semaphore.signal);
+
+		if (seqno <= to->timeline->global_sync[from->engine->id])
+			return 0;
+
+		trace_i915_gem_ring_sync_to(to, from);
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
 			return ret;
+
+		to->timeline->global_sync[from->engine->id] = seqno;
 	}
 
-	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
+
+await_dma_fence:
+	ret = i915_sw_fence_await_dma_fence(&to->submit,
+					    &from->fence, 0,
+					    GFP_KERNEL);
+	return ret < 0 ? ret : 0;
 }
 
 int
 i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 				 struct dma_fence *fence)
 {
-	struct dma_fence_array *array;
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
 	int ret;
-	int i;
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return 0;
-
-	if (dma_fence_is_i915(fence))
-		return i915_gem_request_await_request(req, to_request(fence));
-
-	if (!dma_fence_is_array(fence)) {
-		ret = i915_sw_fence_await_dma_fence(&req->submit,
-						    fence, I915_FENCE_TIMEOUT,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}
 
 	/* Note that if the fence-array was created in signal-on-any mode,
 	 * we should *not* decompose it into its individual fences. However,
@@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 	 * amdgpu and we should not see any incoming fence-array from
 	 * sync-file being in signal-on-any mode.
 	 */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * Requests on the same timeline are explicitly ordered, along
+		 * with their dependencies, by i915_add_request() which ensures
+		 * that requests are submitted in-order through each ring.
+		 */
+		if (fence->context == req->fence.context)
+			continue;
 
-	array = to_dma_fence_array(fence);
-	for (i = 0; i < array->num_fences; i++) {
-		struct dma_fence *child = array->fences[i];
+		/* Squash repeated waits to the same timelines */
+		if (fence->context != req->i915->mm.unordered_timeline &&
+		    intel_timeline_sync_is_later(req->timeline, fence))
+			continue;
 
-		if (dma_fence_is_i915(child))
+		if (dma_fence_is_i915(fence))
 			ret = i915_gem_request_await_request(req,
-							     to_request(child));
+							     to_request(fence));
 		else
-			ret = i915_sw_fence_await_dma_fence(&req->submit,
-							    child, I915_FENCE_TIMEOUT,
+			ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
+							    I915_FENCE_TIMEOUT,
 							    GFP_KERNEL);
 		if (ret < 0)
 			return ret;
-	}
+
+		/* Record the latest fence used against each timeline */
+		if (fence->context != req->i915->mm.unordered_timeline)
+			intel_timeline_sync_set(req->timeline, fence);
+	} while (--nchild);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a211c53c813f..4ccab5affd3c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -73,6 +73,11 @@ struct i915_priotree {
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };
 
+struct i915_gem_capture_list {
+	struct i915_gem_capture_list *next;
+	struct i915_vma *vma;
+};
+
 /**
  * Request queue structure.
  *
@@ -167,6 +172,12 @@ struct drm_i915_gem_request {
 	 * error state dump only).
 	 */
 	struct i915_vma *batch;
+	/** Additional buffers requested by userspace to be captured upon
+	 * a GPU hang. The vma/obj on this list are protected by their
+	 * active reference - all objects on this list must also be
+	 * on the active_list (of their final request).
+	 */
+	struct i915_gem_capture_list *capture_list;
 	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed303a6c4..0e7352d82ca4 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 {
-	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
@@ -53,12 +53,12 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
-static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
 {
 	if (!unlock)
 		return;
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	/* expedite the RCU grace period to free some request slabs */
 	synchronize_rcu_expedited();
@@ -156,7 +156,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	unsigned long count = 0;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 	i915_gem_retire_requests(dev_priv);
 
-	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -284,12 +284,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	struct drm_i915_gem_object *obj;
 	unsigned long count;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	i915_gem_retire_requests(dev_priv);
@@ -304,7 +303,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -314,11 +313,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	unsigned long freed;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(dev_priv,
@@ -332,26 +330,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return freed;
 }
 
-struct shrinker_lock_uninterruptible {
-	bool was_interruptible;
-	bool unlock;
-};
-
 static bool
-i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
-				       struct shrinker_lock_uninterruptible *slu,
-				       int timeout_ms)
+shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+			      int timeout_ms)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
 		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
+		    shrinker_lock(dev_priv, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -364,29 +356,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 		}
 	} while (1);
 
-	slu->was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
 	return true;
 }
 
-static void
-i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
-					 struct shrinker_lock_uninterruptible *slu)
-{
-	dev_priv->mm.interruptible = slu->was_interruptible;
-	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
-}
-
 static int
 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
+	bool unlock;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +397,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 			bound += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +417,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	bool unlock;
 	int ret;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
@@ -465,7 +447,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	}
 
 out:
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index b596ca7ee058..c597ce277a04 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -23,6 +23,32 @@
  */
 
 #include "i915_drv.h"
+#include "i915_syncmap.h"
+
+static void __intel_timeline_init(struct intel_timeline *tl,
+				  struct i915_gem_timeline *parent,
+				  u64 context,
+				  struct lock_class_key *lockclass,
+				  const char *lockname)
+{
+	tl->fence_context = context;
+	tl->common = parent;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
+#else
+	spin_lock_init(&tl->lock);
+#endif
+	init_request_active(&tl->last_request, NULL);
+	INIT_LIST_HEAD(&tl->requests);
+	i915_syncmap_init(&tl->sync);
+}
+
+static void __intel_timeline_fini(struct intel_timeline *tl)
+{
+	GEM_BUG_ON(!list_empty(&tl->requests));
+
+	i915_syncmap_free(&tl->sync);
+}
 
 static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 				    struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
+	/*
+	 * Ideally we want a set of engines on a single leaf as we expect
+	 * to mostly be tracking synchronisation between engines. It is not
+	 * a huge issue if this is not the case, but we may want to mitigate
+	 * any page crossing penalties if they become an issue.
+	 */
+	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
 	timeline->i915 = i915;
 	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
 	if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 
 	/* Called during early_init before we know how many engines there are */
 	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		tl->fence_context = fences++;
-		tl->common = timeline;
-#ifdef CONFIG_DEBUG_SPINLOCK
-		__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
-		spin_lock_init(&tl->lock);
-#endif
-		init_request_active(&tl->last_request, NULL);
-		INIT_LIST_HEAD(&tl->requests);
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_init(&timeline->engine[i],
+				      timeline, fences++,
+				      lockclass, lockname);
 
 	return 0;
 }
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 					&class, "&global_timeline->lock");
 }
 
+/**
+ * i915_gem_timelines_mark_idle -- called when the driver idles
+ * @i915 - the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
+{
+	struct i915_gem_timeline *timeline;
+	int i;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
+			struct intel_timeline *tl = &timeline->engine[i];
+
+			/*
+			 * All known fences are completed so we can scrap
+			 * the current sync point tracking and start afresh,
+			 * any attempt to wait upon a previous sync point
+			 * will be skipped as the fence was signaled.
+			 */
+			i915_syncmap_free(&tl->sync);
+		}
+	}
+}
+
 void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
 {
 	int i;
 
 	lockdep_assert_held(&timeline->i915->drm.struct_mutex);
 
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		GEM_BUG_ON(!list_empty(&tl->requests));
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_fini(&timeline->engine[i]);
 
 	list_del(&timeline->link);
 	kfree(timeline->name);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_timeline.c"
+#include "selftests/i915_gem_timeline.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 6c53e14cab2a..bfb5eb94c64d 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -27,7 +27,9 @@
 
 #include <linux/list.h>
 
+#include "i915_utils.h"
 #include "i915_gem_request.h"
+#include "i915_syncmap.h"
 
 struct i915_gem_timeline;
 
@@ -55,7 +57,25 @@ struct intel_timeline {
 	 * struct_mutex.
 	 */
 	struct i915_gem_active last_request;
-	u32 sync_seqno[I915_NUM_ENGINES];
+
+	/**
+	 * We track the most recent seqno that we wait on in every context so
+	 * that we only have to emit a new await and dependency on a more
+	 * recent sync point. As the contexts may be executed out-of-order, we
+	 * have to track each individually and can not rely on an absolute
+	 * global_seqno. When we know that all tracked fences are completed
+	 * (i.e. when the driver is idle), we know that the syncmap is
+	 * redundant and we can discard it without loss of generality.
+	 */
+	struct i915_syncmap *sync;
+	/**
+	 * Separately to the inter-context seqno map above, we track the last
+	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
+	 * that this tracks global_seqno rather than the context.seqno, and
+	 * so it is subject to the limitations of hw wraparound and that we
+	 * may need to revoke global_seqno (on pre-emption).
+	 */
+	u32 global_sync[I915_NUM_ENGINES];
 
 	struct i915_gem_timeline *common;
 };
@@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 			   struct i915_gem_timeline *tl,
 			   const char *name);
 int i915_gem_timeline_init__global(struct drm_i915_private *i915);
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
 void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
 
+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+					    u64 context, u32 seqno)
+{
+	return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+					  const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+						  u64 context, u32 seqno)
+{
+	return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+						const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..ec526d92f908 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			print_error_obj(m, dev_priv->engine[i], NULL, obj);
 		}
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			print_error_obj(m, dev_priv->engine[i],
+					"user", ee->user_bo[j]);
+
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
 				   dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
 {
 	struct i915_gpu_state *error =
 		container_of(error_ref, typeof(*error), ref);
-	int i;
+	long i, j;
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		struct drm_i915_error_engine *ee = &error->engine[i];
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			i915_error_object_free(ee->user_bo[j]);
+		kfree(ee->user_bo);
+
 		i915_error_object_free(ee->batchbuffer);
 		i915_error_object_free(ee->wa_batchbuffer);
 		i915_error_object_free(ee->ringbuffer);
@@ -1346,6 +1354,35 @@ static void record_context(struct drm_i915_error_context *e,
 	e->active = ctx->active_count;
 }
 
+static void request_record_user_bo(struct drm_i915_gem_request *request,
+				   struct drm_i915_error_engine *ee)
+{
+	struct i915_gem_capture_list *c;
+	struct drm_i915_error_object **bo;
+	long count;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next)
+		count++;
+
+	bo = NULL;
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+	if (!bo)
+		return;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next) {
+		bo[count] = i915_error_object_create(request->i915, c->vma);
+		if (!bo[count])
+			break;
+		count++;
+	}
+
+	ee->user_bo = bo;
+	ee->user_bo_count = count;
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct i915_gpu_state *error)
 {
@@ -1392,6 +1429,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				ee->wa_batchbuffer =
 					i915_error_object_create(dev_priv,
 								 engine->scratch);
+			request_record_user_bo(request, ee);
 
 			ee->ctx =
 				i915_error_object_create(dev_priv,
@@ -1560,6 +1598,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 		error->done_reg = I915_READ(DONE_REG);
 	}
 
+	if (INTEL_GEN(dev_priv) >= 5)
+		error->ccid = I915_READ(CCID);
+
 	/* 3: Feature specific registers */
 	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
 		error->gam_ecochk = I915_READ(GAM_ECOCHK);
@@ -1567,9 +1608,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 	}
 
 	/* 4: Everything else */
-	if (HAS_HW_CONTEXTS(dev_priv))
-		error->ccid = I915_READ(CCID);
-
 	if (INTEL_GEN(dev_priv) >= 8) {
 		error->ier = I915_READ(GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..7e85b5ab8ae2 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
@@ -651,7 +649,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	trace_dma_fence_enable_signal(&rq->fence);
 
 	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
-	intel_engine_enable_signaling(rq);
+	intel_engine_enable_signaling(rq, true);
 	spin_unlock(&rq->lock);
 }
 
@@ -1053,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc)
 		dev_priv->engine[RCS]->status_page.ggtt_offset;
 
 	for_each_engine(engine, dev_priv, id)
-		blob->ads.eng_state_size[engine->guc_id] =
-			intel_lr_context_size(engine);
+		blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
 
 	base = guc_ggtt_offset(vma);
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fd97fe00cd0d..c99f51c587c7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1236,7 +1236,7 @@ out:
 static void ivybridge_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, l3_parity.error_work);
+		container_of(work, typeof(*dev_priv), l3_parity.error_work);
 	u32 error_status, row, bank, subbank;
 	char *parity_event[6];
 	uint32_t misccpctl;
@@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
-	u32 val;
 
 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT |
 		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT;
+
 	if (IS_CHERRYVIEW(dev_priv))
-		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+			I915_LPE_PIPE_C_INTERRUPT;
 
 	WARN_ON(dev_priv->irq_mask != ~0);
 
-	val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT |
-		I915_LPE_PIPE_C_INTERRUPT);
-
-	enable_mask |= val;
-
 	dev_priv->irq_mask = ~enable_mask;
 
 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -4233,11 +4230,15 @@ static void i965_irq_uninstall(struct drm_device * dev)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
+	int i;
 
 	intel_hpd_init_work(dev_priv);
 
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
+
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		dev_priv->l3_parity.remap_info[i] = NULL;
 
 	if (HAS_GUC_SCHED(dev_priv))
 		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -4363,6 +4364,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 }
 
 /**
+ * intel_irq_fini - deinitializes IRQ support
+ * @i915: i915 device instance
+ *
+ * This function deinitializes all the IRQ support.
+ */
+void intel_irq_fini(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		kfree(i915->l3_parity.remap_info[i]);
+}
+
+/**
  * intel_irq_install - enables the hardware interrupt
  * @dev_priv: i915 device instance
  *
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f87b0c4e564d..f80db2ccd92f 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
@@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
@@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
 	.has_rc6 = 1, \
 	.has_dp_mst = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_guc = 1, \
 	.has_decoupled_mmio = 1, \
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 11b12f412492..ee8170cda93e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VECS_HW		3
 #define VCS2_HW		4
 
+/* Engine class */
+
+#define RENDER_CLASS		0
+#define VIDEO_DECODE_CLASS	1
+#define VIDEO_ENHANCEMENT_CLASS	2
+#define COPY_ENGINE_CLASS	3
+#define OTHER_CLASS		4
+
 /* PCI config space */
 
 #define MCHBAR_I915 0x44
@@ -3362,16 +3370,6 @@ enum skl_disp_power_wells {
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
 #define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
 enum {
 	INTEL_ADVANCED_CONTEXT = 0,
diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c
new file mode 100644
index 000000000000..0087acf731a8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "i915_syncmap.h"
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+#include "i915_selftest.h"
+
+#define SHIFT ilog2(KSYNCMAP)
+#define MASK (KSYNCMAP - 1)
+
+/*
+ * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
+ * context id to the last u32 fence seqno waited upon from that context.
+ * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
+ * the root. This allows us to access the whole tree via a single pointer
+ * to the most recently used layer. We expect fence contexts to be dense
+ * and most reuse to be on the same i915_gem_context but on neighbouring
+ * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
+ * effective lookup cache. If the new lookup is not on the same leaf, we
+ * expect it to be on the neighbouring branch.
+ *
+ * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
+ * allows us to store whether a particular seqno is valid (i.e. allows us
+ * to distinguish unset from 0).
+ *
+ * A branch holds an array of layer pointers, and has height > 0, and always
+ * has at least 2 layers (either branches or leaves) below it.
+ *
+ * For example,
+ *	for x in
+ *	  0 1 2 0x10 0x11 0x200 0x201
+ *	  0x500000 0x500001 0x503000 0x503001
+ *	  0xE<<60:
+ *		i915_syncmap_set(&sync, x, lower_32_bits(x));
+ * will build a tree like:
+ *	0xXXXXXXXXXXXXXXXX
+ *	0-> 0x0000000000XXXXXX
+ *	|   0-> 0x0000000000000XXX
+ *	|   |   0-> 0x00000000000000XX
+ *	|   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
+ *	|   |   |   1-> 0x000000000000001X 0:10, 1:11
+ *	|   |   2-> 0x000000000000020X 0:200, 1:201
+ *	|   5-> 0x000000000050XXXX
+ *	|       0-> 0x000000000050000X 0:500000, 1:500001
+ *	|       3-> 0x000000000050300X 0:503000, 1:503001
+ *	e-> 0xe00000000000000X e:e
+ */
+
+struct i915_syncmap {
+	u64 prefix;
+	unsigned int height;
+	unsigned int bitmap;
+	struct i915_syncmap *parent;
+	/*
+	 * Following this header is an array of either seqno or child pointers:
+	 * union {
+	 *	u32 seqno[KSYNCMAP];
+	 *	struct i915_syncmap *child[KSYNCMAP];
+	 * };
+	 */
+};
+
+/**
+ * i915_syncmap_init -- initialise the #i915_syncmap
+ * @root - pointer to the #i915_syncmap
+ */
+void i915_syncmap_init(struct i915_syncmap **root)
+{
+	BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
+	BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
+	BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
+	*root = NULL;
+}
+
+static inline u32 *__sync_seqno(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(p->height);
+	return (u32 *)(p + 1);
+}
+
+static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(!p->height);
+	return (struct i915_syncmap **)(p + 1);
+}
+
+static inline unsigned int
+__sync_branch_idx(const struct i915_syncmap *p, u64 id)
+{
+	return (id >> p->height) & MASK;
+}
+
+static inline unsigned int
+__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id & MASK;
+}
+
+static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
+{
+	return id >> p->height >> SHIFT;
+}
+
+static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id >> SHIFT;
+}
+
+static inline bool seqno_later(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
+}
+
+/**
+ * i915_syncmap_is_later -- compare against the last know sync point
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we are synchronising to
+ * @seqno - the sequence number along the other timeline
+ *
+ * If we have already synchronised this @root timeline with another (@id) then
+ * we can omit any repeated or earlier synchronisation requests. If the two
+ * timelines are already coupled, we can also omit the dependency between the
+ * two as that is already known via the timeline.
+ *
+ * Returns true if the two timelines are already synchronised wrt to @seqno,
+ * false if not and the synchronisation must be emitted.
+ */
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p;
+	unsigned int idx;
+
+	p = *root;
+	if (!p)
+		return false;
+
+	if (likely(__sync_leaf_prefix(p, id) == p->prefix))
+		goto found;
+
+	/* First climb the tree back to a parent branch */
+	do {
+		p = p->parent;
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/* And then descend again until we find our leaf */
+	do {
+		if (!p->height)
+			break;
+
+		p = __sync_child(p)[__sync_branch_idx(p, id)];
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) != p->prefix)
+			return false;
+	} while (1);
+
+	*root = p;
+found:
+	idx = __sync_leaf_idx(p, id);
+	if (!(p->bitmap & BIT(idx)))
+		return false;
+
+	return seqno_later(__sync_seqno(p)[idx], seqno);
+}
+
+static struct i915_syncmap *
+__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
+{
+	struct i915_syncmap *p;
+
+	p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
+	if (unlikely(!p))
+		return NULL;
+
+	p->parent = parent;
+	p->height = 0;
+	p->bitmap = 0;
+	p->prefix = __sync_leaf_prefix(p, id);
+	return p;
+}
+
+static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
+{
+	unsigned int idx = __sync_leaf_idx(p, id);
+
+	p->bitmap |= BIT(idx);
+	__sync_seqno(p)[idx] = seqno;
+}
+
+static inline void __sync_set_child(struct i915_syncmap *p,
+				    unsigned int idx,
+				    struct i915_syncmap *child)
+{
+	p->bitmap |= BIT(idx);
+	__sync_child(p)[idx] = child;
+}
+
+static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+	unsigned int idx;
+
+	if (!p) {
+		p = __sync_alloc_leaf(NULL, id);
+		if (unlikely(!p))
+			return -ENOMEM;
+
+		goto found;
+	}
+
+	/* Caller handled the likely cached case */
+	GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
+
+	/* Climb back up the tree until we find a common prefix */
+	do {
+		if (!p->parent)
+			break;
+
+		p = p->parent;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/*
+	 * No shortcut, we have to descend the tree to find the right layer
+	 * containing this fence.
+	 *
+	 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
+	 * or lower layers. Leaf nodes (height = 0) contain the fences, all
+	 * other nodes (height > 0) are internal layers that point to a lower
+	 * node. Each internal layer has at least 2 descendents.
+	 *
+	 * Starting at the top, we check whether the current prefix matches. If
+	 * it doesn't, we have gone past our target and need to insert a join
+	 * into the tree, and a new leaf node for the target as a descendent
+	 * of the join, as well as the original layer.
+	 *
+	 * The matching prefix means we are still following the right branch
+	 * of the tree. If it has height 0, we have found our leaf and just
+	 * need to replace the fence slot with ourselves. If the height is
+	 * not zero, our slot contains the next layer in the tree (unless
+	 * it is empty, in which case we can add ourselves as a new leaf).
+	 * As descend the tree the prefix grows (and height decreases).
+	 */
+	do {
+		struct i915_syncmap *next;
+
+		if (__sync_branch_prefix(p, id) != p->prefix) {
+			unsigned int above;
+
+			/* Insert a join above the current layer */
+			next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
+				       GFP_KERNEL);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			/* Compute the height at which these two diverge */
+			above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
+			above = round_up(above, SHIFT);
+			next->height = above + p->height;
+			next->prefix = __sync_branch_prefix(next, id);
+
+			/* Insert the join into the parent */
+			if (p->parent) {
+				idx = __sync_branch_idx(p->parent, id);
+				__sync_child(p->parent)[idx] = next;
+				GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
+			}
+			next->parent = p->parent;
+
+			/* Compute the idx of the other branch, not our id! */
+			idx = p->prefix >> (above - SHIFT) & MASK;
+			__sync_set_child(next, idx, p);
+			p->parent = next;
+
+			/* Ascend to the join */
+			p = next;
+		} else {
+			if (!p->height)
+				break;
+		}
+
+		/* Descend into the next layer */
+		GEM_BUG_ON(!p->height);
+		idx = __sync_branch_idx(p, id);
+		next = __sync_child(p)[idx];
+		if (!next) {
+			next = __sync_alloc_leaf(p, id);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			__sync_set_child(p, idx, next);
+			p = next;
+			break;
+		}
+
+		p = next;
+	} while (1);
+
+found:
+	GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
+	__sync_set_seqno(p, id, seqno);
+	*root = p;
+	return 0;
+}
+
+/**
+ * i915_syncmap_set -- mark the most recent syncpoint between contexts
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we have synchronised to
+ * @seqno - the sequence number along the other timeline
+ *
+ * When we synchronise this @root timeline with another (@id), we also know
+ * that we have synchronized with all previous seqno along that timeline. If
+ * we then have a request to synchronise with the same seqno or older, we can
+ * omit it, see i915_syncmap_is_later()
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+
+	/*
+	 * We expect to be called in sequence following is_later(id), which
+	 * should have preloaded the root for us.
+	 */
+	if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
+		__sync_set_seqno(p, id, seqno);
+		return 0;
+	}
+
+	return __sync_set(root, id, seqno);
+}
+
+static void __sync_free(struct i915_syncmap *p)
+{
+	if (p->height) {
+		unsigned int i;
+
+		while ((i = ffs(p->bitmap))) {
+			p->bitmap &= ~0u << i;
+			__sync_free(__sync_child(p)[i - 1]);
+		}
+	}
+
+	kfree(p);
+}
+
+/**
+ * i915_syncmap_free -- free all memory associated with the syncmap
+ * @root - pointer to the #i915_syncmap
+ *
+ * Either when the timeline is to be freed and we no longer need the sync
+ * point tracking, or when the fences are all known to be signaled and the
+ * sync point tracking is redundant, we can free the #i915_syncmap to recover
+ * its allocations.
+ *
+ * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
+ * reuse.
+ */
+void i915_syncmap_free(struct i915_syncmap **root)
+{
+	struct i915_syncmap *p;
+
+	p = *root;
+	if (!p)
+		return;
+
+	while (p->parent)
+		p = p->parent;
+
+	__sync_free(p);
+	*root = NULL;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_syncmap.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h
new file mode 100644
index 000000000000..0653f70bee82
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_SYNCMAP_H__
+#define __I915_SYNCMAP_H__
+
+#include <linux/types.h>
+
+struct i915_syncmap;
+#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */
+
+void i915_syncmap_init(struct i915_syncmap **root);
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno);
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno);
+void i915_syncmap_free(struct i915_syncmap **root);
+
+#endif /* __I915_SYNCMAP_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index f3fdfda5e558..1eef3fae4db3 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gem_context *ctx;
-	u32 *temp = NULL; /* Just here to make handling failures easy */
 	int slice = (int)(uintptr_t)attr->private;
+	u32 **remap_info;
 	int ret;
 
-	if (!HAS_HW_CONTEXTS(dev_priv))
-		return -ENXIO;
-
 	ret = l3_access_valid(dev_priv, offset);
 	if (ret)
 		return ret;
@@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	if (ret)
 		return ret;
 
-	if (!dev_priv->l3_parity.remap_info[slice]) {
-		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-		if (!temp) {
-			mutex_unlock(&dev->struct_mutex);
-			return -ENOMEM;
+	remap_info = &dev_priv->l3_parity.remap_info[slice];
+	if (!*remap_info) {
+		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+		if (!*remap_info) {
+			ret = -ENOMEM;
+			goto out;
 		}
 	}
 
@@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	 * aren't propagated. Since I cannot find a stable way to reset the GPU
 	 * at this point it is left as a TODO.
 	*/
-	if (temp)
-		dev_priv->l3_parity.remap_info[slice] = temp;
-
-	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
+	memcpy(*remap_info + (offset/4), buf, count);
 
 	/* NB: We defer the remapping until we switch to the context */
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
 		ctx->remap_slice |= (1<<slice);
 
+	ret = count;
+
+out:
 	mutex_unlock(&dev->struct_mutex);
 
-	return count;
+	return ret;
 }
 
 static struct bin_attribute dpf_attrs = {
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 52c207e81f41..d805b6e6fe71 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
 						 (int) port, (int) pipe);
 	}
 
-	switch (intel_encoder->type) {
-	case INTEL_OUTPUT_HDMI:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       crtc_state->port_clock,
-				       false, 0);
-		break;
-	case INTEL_OUTPUT_DP:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       adjusted_mode->crtc_clock,
-				       true, crtc_state->port_clock);
-		break;
-	default:
-		break;
-	}
+	intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld,
+			       crtc_state->port_clock,
+			       intel_encoder->type == INTEL_OUTPUT_DP);
 }
 
 /**
@@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
 						 (int) port, (int) pipe);
 	}
 
-	intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0);
+	intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9ccbf26124c6..183afcb036aa 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -64,10 +64,12 @@ static unsigned long wait_timeout(void)
 
 static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
 {
-	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n",
+	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n",
 			 engine->name, __builtin_return_address(0),
 			 yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted)));
+					&engine->irq_posted)),
+			 intel_engine_get_seqno(engine),
+			 intel_engine_last_submit(engine));
 
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
@@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }
 
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
-	bool first, wakeup;
+	bool first;
 	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
@@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	 * If we are the oldest waiter, enable the irq (after which we
 	 * must double check that the seqno did not complete).
 	 */
-	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
 
 	/* Now insert ourselves into the retirement ordered list of signals
 	 * on this engine. We track the oldest seqno as that will be the
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index dd3ad52b7dfe..763010f8ad89 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk)
 
 static int glk_calc_cdclk(int max_pixclk)
 {
-	if (max_pixclk > 2 * 158400)
+	/*
+	 * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
+	 * as a temporary workaround. Use a higher cdclk instead. (Note that
+	 * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
+	 * cdclk.)
+	 */
+	if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100))
 		return 316800;
-	else if (max_pixclk > 2 * 79200)
+	else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100))
 		return 158400;
 	else
 		return 79200;
@@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 	int max_cdclk_freq = dev_priv->max_cdclk_freq;
 
 	if (IS_GEMINILAKE(dev_priv))
-		return 2 * max_cdclk_freq;
+		/*
+		 * FIXME: Limiting to 99% as a temporary workaround. See
+		 * glk_calc_cdclk() for details.
+		 */
+		return 2 * max_cdclk_freq * 99 / 100;
 	else if (INTEL_INFO(dev_priv)->gen >= 9 ||
 		 IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		return max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 2797bf37c3ac..84a1f5e85153 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,13 +777,6 @@ out:
 	return ret;
 }
 
-static int intel_crt_set_property(struct drm_connector *connector,
-				  struct drm_property *property,
-				  uint64_t value)
-{
-	return 0;
-}
-
 void intel_crt_reset(struct drm_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
@@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_crt_destroy,
-	.set_property = intel_crt_set_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_get_property = intel_connector_atomic_get_property,
 };
 
 static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3617927af269..85b9e2f521a0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4861,12 +4861,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
 {
 	if (intel_crtc->overlay) {
 		struct drm_device *dev = intel_crtc->base.dev;
-		struct drm_i915_private *dev_priv = to_i915(dev);
 
 		mutex_lock(&dev->struct_mutex);
-		dev_priv->mm.interruptible = false;
 		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		dev_priv->mm.interruptible = true;
 		mutex_unlock(&dev->struct_mutex);
 	}
 
@@ -9566,6 +9563,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
 	 */
 	if (!crtc) {
 		DRM_DEBUG_KMS("no pipe available for load-detect\n");
+		ret = -ENODEV;
 		goto fail;
 	}
 
@@ -9622,6 +9620,7 @@ found:
 		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
 	if (IS_ERR(fb)) {
 		DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
+		ret = PTR_ERR(fb);
 		goto fail;
 	}
 
@@ -14934,6 +14933,7 @@ int intel_modeset_init(struct drm_device *dev)
 
 	dev->mode_config.funcs = &intel_mode_funcs;
 
+	init_llist_head(&dev_priv->atomic_helper.free_list);
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
 		  intel_atomic_helper_free_state_worker);
 
@@ -15561,13 +15561,6 @@ void intel_display_resume(struct drm_device *dev)
 	if (state)
 		state->acquire_ctx = &ctx;
 
-	/*
-	 * This is a cludge because with real atomic modeset mode_config.mutex
-	 * won't be taken. Unfortunately some probed state like
-	 * audio_codec_enable is still protected by mode_config.mutex, so lock
-	 * it here for now.
-	 */
-	mutex_lock(&dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, 0);
 
 	while (1) {
@@ -15583,7 +15576,6 @@ void intel_display_resume(struct drm_device *dev)
 
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
-	mutex_unlock(&dev->mode_config.mutex);
 
 	if (ret)
 		DRM_ERROR("Restoring old state failed with %i\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee77b519835c..08834f74d396 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -133,36 +133,55 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
 				      enum pipe pipe);
 static void intel_dp_unset_edid(struct intel_dp *intel_dp);
 
-static int
-intel_dp_max_link_bw(struct intel_dp  *intel_dp)
+static int intel_dp_num_rates(u8 link_bw_code)
 {
-	int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
-
-	switch (max_link_bw) {
+	switch (link_bw_code) {
+	default:
+		WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
+		     link_bw_code);
 	case DP_LINK_BW_1_62:
+		return 1;
 	case DP_LINK_BW_2_7:
+		return 2;
 	case DP_LINK_BW_5_4:
-		break;
-	default:
-		WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
-		     max_link_bw);
-		max_link_bw = DP_LINK_BW_1_62;
-		break;
+		return 3;
 	}
-	return max_link_bw;
 }
 
-static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp)
+/* update sink rates from dpcd */
+static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	u8 source_max, sink_max;
+	int i, num_rates;
 
-	source_max = intel_dig_port->max_lanes;
-	sink_max = intel_dp->max_sink_lane_count;
+	num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]);
+
+	for (i = 0; i < num_rates; i++)
+		intel_dp->sink_rates[i] = default_rates[i];
+
+	intel_dp->num_sink_rates = num_rates;
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_rate(struct intel_dp *intel_dp)
+{
+	return intel_dp->common_rates[intel_dp->num_common_rates - 1];
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	int source_max = intel_dig_port->max_lanes;
+	int sink_max = drm_dp_max_lane_count(intel_dp->dpcd);
 
 	return min(source_max, sink_max);
 }
 
+int intel_dp_max_lane_count(struct intel_dp *intel_dp)
+{
+	return intel_dp->max_link_lane_count;
+}
+
 int
 intel_dp_link_required(int pixel_clock, int bpp)
 {
@@ -205,34 +224,25 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp)
 	return max_dotclk;
 }
 
-static int
-intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates)
-{
-	if (intel_dp->num_sink_rates) {
-		*sink_rates = intel_dp->sink_rates;
-		return intel_dp->num_sink_rates;
-	}
-
-	*sink_rates = default_rates;
-
-	return (intel_dp->max_sink_link_bw >> 3) + 1;
-}
-
-static int
-intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
+static void
+intel_dp_set_source_rates(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+	const int *source_rates;
 	int size;
 
+	/* This should only be done once */
+	WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates);
+
 	if (IS_GEN9_LP(dev_priv)) {
-		*source_rates = bxt_rates;
+		source_rates = bxt_rates;
 		size = ARRAY_SIZE(bxt_rates);
 	} else if (IS_GEN9_BC(dev_priv)) {
-		*source_rates = skl_rates;
+		source_rates = skl_rates;
 		size = ARRAY_SIZE(skl_rates);
 	} else {
-		*source_rates = default_rates;
+		source_rates = default_rates;
 		size = ARRAY_SIZE(default_rates);
 	}
 
@@ -240,7 +250,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
 	if (!intel_dp_source_supports_hbr2(intel_dp))
 		size--;
 
-	return size;
+	intel_dp->source_rates = source_rates;
+	intel_dp->num_source_rates = size;
 }
 
 static int intersect_rates(const int *source_rates, int source_len,
@@ -266,50 +277,83 @@ static int intersect_rates(const int *source_rates, int source_len,
 	return k;
 }
 
-static int intel_dp_common_rates(struct intel_dp *intel_dp,
-				 int *common_rates)
+/* return index of rate in rates array, or -1 if not found */
+static int intel_dp_rate_index(const int *rates, int len, int rate)
 {
-	const int *source_rates, *sink_rates;
-	int source_len, sink_len;
+	int i;
 
-	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	source_len = intel_dp_source_rates(intel_dp, &source_rates);
+	for (i = 0; i < len; i++)
+		if (rate == rates[i])
+			return i;
 
-	return intersect_rates(source_rates, source_len,
-			       sink_rates, sink_len,
-			       common_rates);
+	return -1;
 }
 
-static int intel_dp_link_rate_index(struct intel_dp *intel_dp,
-				    int *common_rates, int link_rate)
+static void intel_dp_set_common_rates(struct intel_dp *intel_dp)
 {
-	int common_len;
-	int index;
+	WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates);
+
+	intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates,
+						     intel_dp->num_source_rates,
+						     intel_dp->sink_rates,
+						     intel_dp->num_sink_rates,
+						     intel_dp->common_rates);
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
-	for (index = 0; index < common_len; index++) {
-		if (link_rate == common_rates[common_len - index - 1])
-			return common_len - index - 1;
+	/* Paranoia, there should always be something in common. */
+	if (WARN_ON(intel_dp->num_common_rates == 0)) {
+		intel_dp->common_rates[0] = default_rates[0];
+		intel_dp->num_common_rates = 1;
 	}
+}
 
-	return -1;
+/* get length of common rates potentially limited by max_rate */
+static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp,
+					  int max_rate)
+{
+	const int *common_rates = intel_dp->common_rates;
+	int i, common_len = intel_dp->num_common_rates;
+
+	/* Limit results by potentially reduced max rate */
+	for (i = 0; i < common_len; i++) {
+		if (common_rates[common_len - i - 1] <= max_rate)
+			return common_len - i;
+	}
+
+	return 0;
+}
+
+static bool intel_dp_link_params_valid(struct intel_dp *intel_dp)
+{
+	/*
+	 * FIXME: we need to synchronize the current link parameters with
+	 * hardware readout. Currently fast link training doesn't work on
+	 * boot-up.
+	 */
+	if (intel_dp->link_rate == 0 ||
+	    intel_dp->link_rate > intel_dp->max_link_rate)
+		return false;
+
+	if (intel_dp->lane_count == 0 ||
+	    intel_dp->lane_count > intel_dp_max_lane_count(intel_dp))
+		return false;
+
+	return true;
 }
 
 int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 					    int link_rate, uint8_t lane_count)
 {
-	int common_rates[DP_MAX_SUPPORTED_RATES];
-	int link_rate_index;
+	int index;
 
-	link_rate_index = intel_dp_link_rate_index(intel_dp,
-						   common_rates,
-						   link_rate);
-	if (link_rate_index > 0) {
-		intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]);
-		intel_dp->max_sink_lane_count = lane_count;
+	index = intel_dp_rate_index(intel_dp->common_rates,
+				    intel_dp->num_common_rates,
+				    link_rate);
+	if (index > 0) {
+		intel_dp->max_link_rate = intel_dp->common_rates[index - 1];
+		intel_dp->max_link_lane_count = lane_count;
 	} else if (lane_count > 1) {
-		intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
-		intel_dp->max_sink_lane_count = lane_count >> 1;
+		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+		intel_dp->max_link_lane_count = lane_count >> 1;
 	} else {
 		DRM_ERROR("Link Training Unsuccessful\n");
 		return -1;
@@ -1486,24 +1530,21 @@ static void snprintf_int_array(char *str, size_t len,
 
 static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
-	const int *source_rates, *sink_rates;
-	int source_len, sink_len, common_len;
-	int common_rates[DP_MAX_SUPPORTED_RATES];
 	char str[128]; /* FIXME: too big for stack? */
 
 	if ((drm_debug & DRM_UT_KMS) == 0)
 		return;
 
-	source_len = intel_dp_source_rates(intel_dp, &source_rates);
-	snprintf_int_array(str, sizeof(str), source_rates, source_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->source_rates, intel_dp->num_source_rates);
 	DRM_DEBUG_KMS("source rates: %s\n", str);
 
-	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	snprintf_int_array(str, sizeof(str), sink_rates, sink_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->sink_rates, intel_dp->num_sink_rates);
 	DRM_DEBUG_KMS("sink rates: %s\n", str);
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
-	snprintf_int_array(str, sizeof(str), common_rates, common_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->common_rates, intel_dp->num_common_rates);
 	DRM_DEBUG_KMS("common rates: %s\n", str);
 }
 
@@ -1538,39 +1579,34 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp)
 	return true;
 }
 
-static int rate_to_index(int find, const int *rates)
-{
-	int i = 0;
-
-	for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i)
-		if (find == rates[i])
-			break;
-
-	return i;
-}
-
 int
 intel_dp_max_link_rate(struct intel_dp *intel_dp)
 {
-	int rates[DP_MAX_SUPPORTED_RATES] = {};
 	int len;
 
-	len = intel_dp_common_rates(intel_dp, rates);
+	len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate);
 	if (WARN_ON(len <= 0))
 		return 162000;
 
-	return rates[len - 1];
+	return intel_dp->common_rates[len - 1];
 }
 
 int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
 {
-	return rate_to_index(rate, intel_dp->sink_rates);
+	int i = intel_dp_rate_index(intel_dp->sink_rates,
+				    intel_dp->num_sink_rates, rate);
+
+	if (WARN_ON(i < 0))
+		i = 0;
+
+	return i;
 }
 
 void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
 			   uint8_t *link_bw, uint8_t *rate_select)
 {
-	if (intel_dp->num_sink_rates) {
+	/* eDP 1.4 rate select method. */
+	if (intel_dp->use_rate_select) {
 		*link_bw = 0;
 		*rate_select =
 			intel_dp_rate_select(intel_dp, port_clock);
@@ -1618,14 +1654,13 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	/* Conveniently, the link BW constants become indices with a shift...*/
 	int min_clock = 0;
 	int max_clock;
-	int link_rate_index;
 	int bpp, mode_rate;
 	int link_avail, link_clock;
-	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int common_len;
 	uint8_t link_bw, rate_select;
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
+	common_len = intel_dp_common_len_rate_limit(intel_dp,
+						    intel_dp->max_link_rate);
 
 	/* No common link rates between source and sink */
 	WARN_ON(common_len <= 0);
@@ -1662,16 +1697,18 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
 	/* Use values requested by Compliance Test Request */
 	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
-		link_rate_index = intel_dp_link_rate_index(intel_dp,
-							   common_rates,
-							   intel_dp->compliance.test_link_rate);
-		if (link_rate_index >= 0)
-			min_clock = max_clock = link_rate_index;
+		int index;
+
+		index = intel_dp_rate_index(intel_dp->common_rates,
+					    intel_dp->num_common_rates,
+					    intel_dp->compliance.test_link_rate);
+		if (index >= 0)
+			min_clock = max_clock = index;
 		min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count;
 	}
 	DRM_DEBUG_KMS("DP link computation with max lane count %i "
 		      "max bw %d pixel clock %iKHz\n",
-		      max_lane_count, common_rates[max_clock],
+		      max_lane_count, intel_dp->common_rates[max_clock],
 		      adjusted_mode->crtc_clock);
 
 	/* Walk through all bpp values. Luckily they're all nicely spaced with 2
@@ -1707,7 +1744,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 				lane_count <= max_lane_count;
 				lane_count <<= 1) {
 
-				link_clock = common_rates[clock];
+				link_clock = intel_dp->common_rates[clock];
 				link_avail = intel_dp_max_data_rate(link_clock,
 								    lane_count);
 
@@ -1739,7 +1776,7 @@ found:
 	pipe_config->lane_count = lane_count;
 
 	pipe_config->pipe_bpp = bpp;
-	pipe_config->port_clock = common_rates[clock];
+	pipe_config->port_clock = intel_dp->common_rates[clock];
 
 	intel_dp_compute_rate(intel_dp, pipe_config->port_clock,
 			      &link_bw, &rate_select);
@@ -3642,9 +3679,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 		uint8_t frame_sync_cap;
 
 		dev_priv->psr.sink_support = true;
-		drm_dp_dpcd_read(&intel_dp->aux,
-				 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
-				 &frame_sync_cap, 1);
+		drm_dp_dpcd_readb(&intel_dp->aux,
+				  DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
+				  &frame_sync_cap);
 		dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
 		/* PSR2 needs frame sync as well */
 		dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
@@ -3695,6 +3732,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 		intel_dp->num_sink_rates = i;
 	}
 
+	if (intel_dp->num_sink_rates)
+		intel_dp->use_rate_select = true;
+	else
+		intel_dp_set_sink_rates(intel_dp);
+
+	intel_dp_set_common_rates(intel_dp);
+
 	return true;
 }
 
@@ -3702,11 +3746,18 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 static bool
 intel_dp_get_dpcd(struct intel_dp *intel_dp)
 {
+	u8 sink_count;
+
 	if (!intel_dp_read_dpcd(intel_dp))
 		return false;
 
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT,
-			     &intel_dp->sink_count, 1) < 0)
+	/* Don't clobber cached eDP rates. */
+	if (!is_edp(intel_dp)) {
+		intel_dp_set_sink_rates(intel_dp);
+		intel_dp_set_common_rates(intel_dp);
+	}
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0)
 		return false;
 
 	/*
@@ -3714,7 +3765,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 	 * a member variable in intel_dp will track any changes
 	 * between short pulse interrupts.
 	 */
-	intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count);
+	intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count);
 
 	/*
 	 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
@@ -3743,7 +3794,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 static bool
 intel_dp_can_mst(struct intel_dp *intel_dp)
 {
-	u8 buf[1];
+	u8 mstm_cap;
 
 	if (!i915.enable_dp_mst)
 		return false;
@@ -3754,10 +3805,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
 	if (intel_dp->dpcd[DP_DPCD_REV] < 0x12)
 		return false;
 
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1)
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1)
 		return false;
 
-	return buf[0] & DP_MST_CAP;
+	return mstm_cap & DP_MST_CAP;
 }
 
 static void
@@ -3903,9 +3954,8 @@ stop:
 static bool
 intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
 {
-	return drm_dp_dpcd_read(&intel_dp->aux,
-				       DP_DEVICE_SERVICE_IRQ_VECTOR,
-				       sink_irq_vector, 1) == 1;
+	return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
+				 sink_irq_vector) == 1;
 }
 
 static bool
@@ -3926,7 +3976,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 {
 	int status = 0;
 	int min_lane_count = 1;
-	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int link_rate_index, test_link_rate;
 	uint8_t test_lane_count, test_link_bw;
 	/* (DP CTS 1.2)
@@ -3943,7 +3992,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 	test_lane_count &= DP_MAX_LANE_COUNT_MASK;
 	/* Validate the requested lane count */
 	if (test_lane_count < min_lane_count ||
-	    test_lane_count > intel_dp->max_sink_lane_count)
+	    test_lane_count > intel_dp->max_link_lane_count)
 		return DP_TEST_NAK;
 
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE,
@@ -3954,9 +4003,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 	}
 	/* Validate the requested link rate */
 	test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw);
-	link_rate_index = intel_dp_link_rate_index(intel_dp,
-						   common_rates,
-						   test_link_rate);
+	link_rate_index = intel_dp_rate_index(intel_dp->common_rates,
+					      intel_dp->num_common_rates,
+					      test_link_rate);
 	if (link_rate_index < 0)
 		return DP_TEST_NAK;
 
@@ -3969,13 +4018,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 {
 	uint8_t test_pattern;
-	uint16_t test_misc;
+	uint8_t test_misc;
 	__be16 h_width, v_height;
 	int status = 0;
 
 	/* Read the TEST_PATTERN (DP CTS 3.1.5) */
-	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN,
-				  &test_pattern, 1);
+	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN,
+				   &test_pattern);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("Test pattern read failed\n");
 		return DP_TEST_NAK;
@@ -3997,8 +4046,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 		return DP_TEST_NAK;
 	}
 
-	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0,
-				  &test_misc, 1);
+	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0,
+				   &test_misc);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("TEST MISC read failed\n");
 		return DP_TEST_NAK;
@@ -4057,10 +4106,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
 		 */
 		block += intel_connector->detect_edid->extensions;
 
-		if (!drm_dp_dpcd_write(&intel_dp->aux,
-					DP_TEST_EDID_CHECKSUM,
-					&block->checksum,
-					1))
+		if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM,
+				       block->checksum) <= 0)
 			DRM_DEBUG_KMS("Failed to write EDID checksum\n");
 
 		test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -4224,9 +4271,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 	if (!to_intel_crtc(intel_encoder->base.crtc)->active)
 		return;
 
-	/* FIXME: we need to synchronize this sort of stuff with hardware
-	 * readout. Currently fast link training doesn't work on boot-up. */
-	if (!intel_dp->lane_count)
+	/*
+	 * Validate the cached values of intel_dp->link_rate and
+	 * intel_dp->lane_count before attempting to retrain.
+	 */
+	if (!intel_dp_link_params_valid(intel_dp))
 		return;
 
 	/* Retrain if Channel EQ or CR not ok */
@@ -4613,11 +4662,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 		      yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
 
 	if (intel_dp->reset_link_params) {
-		/* Set the max lane count for sink */
-		intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+		/* Initial max link lane count */
+		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
 
-		/* Set the max link BW for sink */
-		intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
+		/* Initial max link rate */
+		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
 
 		intel_dp->reset_link_params = false;
 	}
@@ -5127,7 +5176,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port)
 	return intel_bios_is_port_edp(dev_priv, port);
 }
 
-void
+static void
 intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
 {
 	struct intel_connector *intel_connector = to_intel_connector(connector);
@@ -5932,6 +5981,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port)
 	}
 }
 
+static void intel_dp_modeset_retry_work_fn(struct work_struct *work)
+{
+	struct intel_connector *intel_connector;
+	struct drm_connector *connector;
+
+	intel_connector = container_of(work, typeof(*intel_connector),
+				       modeset_retry_work);
+	connector = &intel_connector->base;
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
+		      connector->name);
+
+	/* Grab the locks before changing connector property*/
+	mutex_lock(&connector->dev->mode_config.mutex);
+	/* Set connector link status to BAD and send a Uevent to notify
+	 * userspace to do a modeset.
+	 */
+	drm_mode_connector_set_link_status_property(connector,
+						    DRM_MODE_LINK_STATUS_BAD);
+	mutex_unlock(&connector->dev->mode_config.mutex);
+	/* Send Hotplug uevent so userspace can reprobe */
+	drm_kms_helper_hotplug_event(connector->dev);
+}
+
 bool
 intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 			struct intel_connector *intel_connector)
@@ -5944,11 +6016,17 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_dig_port->port;
 	int type;
 
+	/* Initialize the work for modeset in case of link train failure */
+	INIT_WORK(&intel_connector->modeset_retry_work,
+		  intel_dp_modeset_retry_work_fn);
+
 	if (WARN(intel_dig_port->max_lanes < 1,
 		 "Not enough lanes (%d) for DP on port %c\n",
 		 intel_dig_port->max_lanes, port_name(port)))
 		return false;
 
+	intel_dp_set_source_rates(intel_dp);
+
 	intel_dp->reset_link_params = true;
 	intel_dp->pps_pipe = INVALID_PIPE;
 	intel_dp->active_pipe = INVALID_PIPE;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0048b520baf7..b79c1c0e404c 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
 	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
 
-	if (intel_dp->num_sink_rates)
+	/* eDP 1.4 rate select method. */
+	if (!link_bw)
 		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
 				  &rate_select, 1);
 
@@ -313,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp)
 void
 intel_dp_start_link_train(struct intel_dp *intel_dp)
 {
-	intel_dp_link_training_clock_recovery(intel_dp);
-	intel_dp_link_training_channel_equalization(intel_dp);
+	struct intel_connector *intel_connector = intel_dp->attached_connector;
+
+	if (!intel_dp_link_training_clock_recovery(intel_dp))
+		goto failure_handling;
+	if (!intel_dp_link_training_channel_equalization(intel_dp))
+		goto failure_handling;
+
+	DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d",
+		      intel_dp->link_rate, intel_dp->lane_count);
+	return;
+
+ failure_handling:
+	DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d",
+		      intel_dp->link_rate, intel_dp->lane_count);
+	if (!intel_dp_get_link_train_fallback_values(intel_dp,
+						     intel_dp->link_rate,
+						     intel_dp->lane_count))
+		/* Schedule a Hotplug Uevent to userspace to start modeset */
+		schedule_work(&intel_connector->modeset_retry_work);
+	return;
 }
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index c1f62eb07c07..5af22a7c11bf 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -56,7 +56,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	 * for MST we always configure max link bw - the spec doesn't
 	 * seem to suggest we should do otherwise.
 	 */
-	lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+	lane_count = intel_dp_max_lane_count(intel_dp);
 
 	pipe_config->lane_count = lane_count;
 
@@ -294,14 +294,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
 	return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port);
 }
 
-static int
-intel_dp_mst_set_property(struct drm_connector *connector,
-			  struct drm_property *property,
-			  uint64_t val)
-{
-	return 0;
-}
-
 static void
 intel_dp_mst_connector_destroy(struct drm_connector *connector)
 {
@@ -318,8 +310,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = intel_dp_mst_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = intel_dp_mst_set_property,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_dp_mst_connector_destroy,
@@ -343,7 +334,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	int max_rate, mode_rate, max_lanes, max_link_clock;
 
 	max_link_clock = intel_dp_max_link_rate(intel_dp);
-	max_lanes = drm_dp_max_lane_count(intel_dp->dpcd);
+	max_lanes = intel_dp_max_lane_count(intel_dp);
 
 	max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
 	mode_rate = intel_dp_link_required(mode->clock, bpp);
@@ -459,7 +450,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 		drm_mode_connector_attach_encoder(&intel_connector->base,
 						  &intel_dp->mst_encoders[i]->base.base);
 	}
-	intel_dp_add_properties(intel_dp, connector);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0);
 	drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index aaee3949a422..54f3ff840812 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -88,7 +88,6 @@
 	int cpu, ret, timeout = (US) * 1000; \
 	u64 base; \
 	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
-	BUILD_BUG_ON((US) > 50000); \
 	if (!(ATOMIC)) { \
 		preempt_disable(); \
 		cpu = smp_processor_id(); \
@@ -130,8 +129,14 @@
 	ret__; \
 })
 
-#define wait_for_atomic(COND, MS)	_wait_for_atomic((COND), (MS) * 1000, 1)
-#define wait_for_atomic_us(COND, US)	_wait_for_atomic((COND), (US), 1)
+#define wait_for_atomic_us(COND, US) \
+({ \
+	BUILD_BUG_ON(!__builtin_constant_p(US)); \
+	BUILD_BUG_ON((US) > 50000); \
+	_wait_for_atomic((COND), (US), 1); \
+})
+
+#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
 
 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
@@ -321,6 +326,9 @@ struct intel_connector {
 	void *port; /* store this opaque as its illegal to dereference it */
 
 	struct intel_dp *mst_port;
+
+	/* Work struct to schedule a uevent on link train failure */
+	struct work_struct modeset_retry_work;
 };
 
 struct dpll {
@@ -949,13 +957,20 @@ struct intel_dp {
 	uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
 	uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
 	uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE];
-	/* sink rates as reported by DP_SUPPORTED_LINK_RATES */
-	uint8_t num_sink_rates;
+	/* source rates */
+	int num_source_rates;
+	const int *source_rates;
+	/* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */
+	int num_sink_rates;
 	int sink_rates[DP_MAX_SUPPORTED_RATES];
-	/* Max lane count for the sink as per DPCD registers */
-	uint8_t max_sink_lane_count;
-	/* Max link BW for the sink as per DPCD registers */
-	int max_sink_link_bw;
+	bool use_rate_select;
+	/* intersection of source and sink rates */
+	int num_common_rates;
+	int common_rates[DP_MAX_SUPPORTED_RATES];
+	/* Max lane count for the current link */
+	int max_link_lane_count;
+	/* Max rate for the current link */
+	int max_link_rate;
 	/* sink or branch descriptor */
 	struct intel_dp_desc desc;
 	struct drm_dp_aux aux;
@@ -1492,10 +1507,10 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp);
 void intel_edp_panel_vdd_on(struct intel_dp *intel_dp);
 void intel_edp_panel_on(struct intel_dp *intel_dp);
 void intel_edp_panel_off(struct intel_dp *intel_dp);
-void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector);
 void intel_dp_mst_suspend(struct drm_device *dev);
 void intel_dp_mst_resume(struct drm_device *dev);
 int intel_dp_max_link_rate(struct intel_dp *intel_dp);
+int intel_dp_max_lane_count(struct intel_dp *intel_dp);
 int intel_dp_rate_select(struct intel_dp *intel_dp, int rate);
 void intel_dp_hot_plug(struct intel_encoder *intel_encoder);
 void intel_power_sequencer_reset(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 3ffe8b1f1d48..fc0ef492252a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
 		val |= (ULPS_STATE_ENTER | DEVICE_READY);
 		I915_WRITE(MIPI_DEVICE_READY(port), val);
 
-		/* Wait for ULPS Not active */
+		/* Wait for ULPS active */
 		if (intel_wait_for_register(dev_priv,
-				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE,
-				GLK_ULPS_NOT_ACTIVE, 20))
-			DRM_ERROR("ULPS is still active\n");
+				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20))
+			DRM_ERROR("ULPS not active\n");
 
 		/* Exit ULPS */
 		val = I915_READ(MIPI_DEVICE_READY(port));
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 6025839ed3b7..c1544a53095d 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = {
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_dvo_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 };
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..6d3d83876da9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,69 +26,177 @@
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
-static const struct engine_info {
+/* Haswell does have the CXT_SIZE register however it does not appear to be
+ * valid. Now, docs explain in dwords what is in the context object. The full
+ * size is 70720 bytes, however, the power context and execlist context will
+ * never be saved (power context is stored elsewhere, and execlists don't work
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
+ */
+#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
+/* Same as Haswell, but 72064 bytes now. */
+#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
+#define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
+
+struct engine_class_info {
 	const char *name;
-	unsigned int exec_id;
+	int (*init_legacy)(struct intel_engine_cs *engine);
+	int (*init_execlists)(struct intel_engine_cs *engine);
+};
+
+static const struct engine_class_info intel_engine_classes[] = {
+	[RENDER_CLASS] = {
+		.name = "rcs",
+		.init_execlists = logical_render_ring_init,
+		.init_legacy = intel_init_render_ring_buffer,
+	},
+	[COPY_ENGINE_CLASS] = {
+		.name = "bcs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_blt_ring_buffer,
+	},
+	[VIDEO_DECODE_CLASS] = {
+		.name = "vcs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_bsd_ring_buffer,
+	},
+	[VIDEO_ENHANCEMENT_CLASS] = {
+		.name = "vecs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_vebox_ring_buffer,
+	},
+};
+
+struct engine_info {
 	unsigned int hw_id;
+	unsigned int uabi_id;
+	u8 class;
+	u8 instance;
 	u32 mmio_base;
 	unsigned irq_shift;
-	int (*init_legacy)(struct intel_engine_cs *engine);
-	int (*init_execlists)(struct intel_engine_cs *engine);
-} intel_engines[] = {
+};
+
+static const struct engine_info intel_engines[] = {
 	[RCS] = {
-		.name = "rcs",
 		.hw_id = RCS_HW,
-		.exec_id = I915_EXEC_RENDER,
+		.uabi_id = I915_EXEC_RENDER,
+		.class = RENDER_CLASS,
+		.instance = 0,
 		.mmio_base = RENDER_RING_BASE,
 		.irq_shift = GEN8_RCS_IRQ_SHIFT,
-		.init_execlists = logical_render_ring_init,
-		.init_legacy = intel_init_render_ring_buffer,
 	},
 	[BCS] = {
-		.name = "bcs",
 		.hw_id = BCS_HW,
-		.exec_id = I915_EXEC_BLT,
+		.uabi_id = I915_EXEC_BLT,
+		.class = COPY_ENGINE_CLASS,
+		.instance = 0,
 		.mmio_base = BLT_RING_BASE,
 		.irq_shift = GEN8_BCS_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_blt_ring_buffer,
 	},
 	[VCS] = {
-		.name = "vcs",
 		.hw_id = VCS_HW,
-		.exec_id = I915_EXEC_BSD,
+		.uabi_id = I915_EXEC_BSD,
+		.class = VIDEO_DECODE_CLASS,
+		.instance = 0,
 		.mmio_base = GEN6_BSD_RING_BASE,
 		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd_ring_buffer,
 	},
 	[VCS2] = {
-		.name = "vcs2",
 		.hw_id = VCS2_HW,
-		.exec_id = I915_EXEC_BSD,
+		.uabi_id = I915_EXEC_BSD,
+		.class = VIDEO_DECODE_CLASS,
+		.instance = 1,
 		.mmio_base = GEN8_BSD2_RING_BASE,
 		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd2_ring_buffer,
 	},
 	[VECS] = {
-		.name = "vecs",
 		.hw_id = VECS_HW,
-		.exec_id = I915_EXEC_VEBOX,
+		.uabi_id = I915_EXEC_VEBOX,
+		.class = VIDEO_ENHANCEMENT_CLASS,
+		.instance = 0,
 		.mmio_base = VEBOX_RING_BASE,
 		.irq_shift = GEN8_VECS_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_vebox_ring_buffer,
 	},
 };
 
+/**
+ * ___intel_engine_context_size() - return the size of the context for an engine
+ * @dev_priv: i915 device private
+ * @class: engine class
+ *
+ * Each engine class may require a different amount of space for a context
+ * image.
+ *
+ * Return: size (in bytes) of an engine class specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+static u32
+__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+{
+	u32 cxt_size;
+
+	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
+
+	switch (class) {
+	case RENDER_CLASS:
+		switch (INTEL_GEN(dev_priv)) {
+		default:
+			MISSING_CASE(INTEL_GEN(dev_priv));
+		case 9:
+			return GEN9_LR_CONTEXT_RENDER_SIZE;
+		case 8:
+			return i915.enable_execlists ?
+			       GEN8_LR_CONTEXT_RENDER_SIZE :
+			       GEN8_CXT_TOTAL_SIZE;
+		case 7:
+			if (IS_HASWELL(dev_priv))
+				return HSW_CXT_TOTAL_SIZE;
+
+			cxt_size = I915_READ(GEN7_CXT_SIZE);
+			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
+					PAGE_SIZE);
+		case 6:
+			cxt_size = I915_READ(CXT_SIZE);
+			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
+					PAGE_SIZE);
+		case 5:
+		case 4:
+		case 3:
+		case 2:
+		/* For the special day when i810 gets merged. */
+		case 1:
+			return 0;
+		}
+		break;
+	default:
+		MISSING_CASE(class);
+	case VIDEO_DECODE_CLASS:
+	case VIDEO_ENHANCEMENT_CLASS:
+	case COPY_ENGINE_CLASS:
+		if (INTEL_GEN(dev_priv) < 8)
+			return 0;
+		return GEN8_LR_CONTEXT_OTHER_SIZE;
+	}
+}
+
 static int
 intel_engine_setup(struct drm_i915_private *dev_priv,
 		   enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
+	const struct engine_class_info *class_info;
 	struct intel_engine_cs *engine;
 
+	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
+	class_info = &intel_engine_classes[info->class];
+
 	GEM_BUG_ON(dev_priv->engine[id]);
 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 	if (!engine)
@@ -96,11 +204,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 
 	engine->id = id;
 	engine->i915 = dev_priv;
-	engine->name = info->name;
-	engine->exec_id = info->exec_id;
+	WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
+			 class_info->name, info->instance) >=
+		sizeof(engine->name));
+	engine->uabi_id = info->uabi_id;
 	engine->hw_id = engine->guc_id = info->hw_id;
 	engine->mmio_base = info->mmio_base;
 	engine->irq_shift = info->irq_shift;
+	engine->class = info->class;
+	engine->instance = info->instance;
+
+	engine->context_size = __intel_engine_context_size(dev_priv,
+							   engine->class);
+	if (WARN_ON(engine->context_size > BIT(20)))
+		engine->context_size = 0;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
@@ -112,18 +229,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 }
 
 /**
- * intel_engines_init_early() - allocate the Engine Command Streamers
+ * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_early(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-	unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
-	unsigned int mask = 0;
+	const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	unsigned int mask = 0;
 	unsigned int i;
 	int err;
 
@@ -150,6 +267,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv)
 	if (WARN_ON(mask != ring_mask))
 		device_info->ring_mask = mask;
 
+	/* We always presume we have at least RCS available for later probing */
+	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
+		err = -ENODEV;
+		goto cleanup;
+	}
+
 	device_info->num_rings = hweight32(mask);
 
 	return 0;
@@ -161,7 +284,7 @@ cleanup:
 }
 
 /**
- * intel_engines_init() - allocate, populate and init the Engine Command Streamers
+ * intel_engines_init() - init the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
@@ -175,12 +298,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 	int err = 0;
 
 	for_each_engine(engine, dev_priv, id) {
+		const struct engine_class_info *class_info =
+			&intel_engine_classes[engine->class];
 		int (*init)(struct intel_engine_cs *engine);
 
 		if (i915.enable_execlists)
-			init = intel_engines[id].init_execlists;
+			init = class_info->init_execlists;
 		else
-			init = intel_engines[id].init_legacy;
+			init = class_info->init_legacy;
 		if (!init) {
 			kfree(engine);
 			dev_priv->engine[id] = NULL;
@@ -223,6 +348,9 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(!intel_engine_is_idle(engine));
+	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
+
 	/* Our semaphore implementation is strictly monotonic (i.e. we proceed
 	 * so long as the semaphore value in the register/page is greater
 	 * than the sync value), so whenever we reset the seqno,
@@ -253,13 +381,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
-	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->hangcheck.seqno = seqno;
-
 	/* After manually advancing the seqno, fake the interrupt in case
 	 * there are any waiters for that seqno.
 	 */
 	intel_engine_wakeup(engine);
+
+	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
 static void intel_engine_init_timeline(struct intel_engine_cs *engine)
@@ -1086,11 +1213,18 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	/* More white lies, if wedged, hw state is inconsistent */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return true;
+
 	/* Any inflight/incomplete requests? */
 	if (!i915_seqno_passed(intel_engine_get_seqno(engine),
 			       intel_engine_last_submit(engine)))
 		return false;
 
+	if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
+		return true;
+
 	/* Interrupt/tasklet pending? */
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index cb36cbf3818f..6156845641a3 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -23,8 +23,8 @@
 #ifndef _INTEL_GUC_FWIF_H
 #define _INTEL_GUC_FWIF_H
 
-#define GFXCORE_FAMILY_GEN9		12
-#define GFXCORE_FAMILY_UNKNOWN		0x7fffffff
+#define GUC_CORE_FAMILY_GEN9		12
+#define GUC_CORE_FAMILY_UNKNOWN		0x7fffffff
 
 #define GUC_CLIENT_PRIORITY_KMD_HIGH	0
 #define GUC_CLIENT_PRIORITY_HIGH	1
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 8a1a023e48b2..d9045b6e897b 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -61,6 +61,9 @@
 #define KBL_FW_MAJOR 9
 #define KBL_FW_MINOR 14
 
+#define GLK_FW_MAJOR 10
+#define GLK_FW_MINOR 56
+
 #define GUC_FW_PATH(platform, major, minor) \
        "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
 
@@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
+#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
+
 
 static u32 get_gttype(struct drm_i915_private *dev_priv)
 {
@@ -86,11 +91,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv)
 
 	switch (gen) {
 	case 9:
-		return GFXCORE_FAMILY_GEN9;
+		return GUC_CORE_FAMILY_GEN9;
 
 	default:
-		WARN(1, "GEN%d does not support GuC operation!\n", gen);
-		return GFXCORE_FAMILY_UNKNOWN;
+		MISSING_CASE(gen);
+		return GUC_CORE_FAMILY_UNKNOWN;
 	}
 }
 
@@ -280,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	/* init WOPCM */
-	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-	I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE);
-
 	/* Enable MIA caching. GuC clock gating is disabled. */
 	I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
 
@@ -405,6 +406,10 @@ int intel_guc_select_fw(struct intel_guc *guc)
 		guc->fw.path = I915_KBL_GUC_UCODE;
 		guc->fw.major_ver_wanted = KBL_FW_MAJOR;
 		guc->fw.minor_ver_wanted = KBL_FW_MINOR;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		guc->fw.path = I915_GLK_GUC_UCODE;
+		guc->fw.major_ver_wanted = GLK_FW_MAJOR;
+		guc->fw.minor_ver_wanted = GLK_FW_MINOR;
 	} else {
 		DRM_ERROR("No GuC firmware known for platform with GuC!\n");
 		return -ENOENT;
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 6fb63a3c65b0..16d3b8719cab 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc)
 	void *vaddr;
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
-	int ret = 0;
+	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	GEM_BUG_ON(guc_log_has_runtime(guc));
 
+	ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
+	if (ret)
+		return ret;
+
 	/* Create a WC (Uncached for read) vmalloc mapping of log
 	 * buffer pages, so that we can directly get the data
 	 * (up-to-date) from memory.
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index dce742243ba6..9b0ece427bdc 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
 				 "%s, ", engine->name);
 	msg[len-2] = '\0';
 
-	return i915_handle_error(i915, hung, msg);
+	return i915_handle_error(i915, hung, "%s", msg);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 1d623b5e09d6..52f0b2d5fad2 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
 			return false;
 	}
 
+	/* Display Wa #1139 */
+	if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) &&
+	    crtc_state->base.adjusted_mode.htotal > 5460)
+		return false;
+
 	return true;
 }
 
@@ -1392,7 +1397,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	}
 
 	if (!pipe_config->bw_constrained) {
-		DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp);
+		DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp);
 		pipe_config->pipe_bpp = desired_bpp;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 9ee819666a4c..88b4cf3f764a 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -52,6 +52,10 @@
 #define KBL_HUC_FW_MINOR 00
 #define KBL_BLD_NUM 1810
 
+#define GLK_HUC_FW_MAJOR 01
+#define GLK_HUC_FW_MINOR 07
+#define GLK_BLD_NUM 1748
+
 #define HUC_FW_PATH(platform, major, minor, bld_num) \
 	"i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
 	__stringify(minor) "_" __stringify(bld_num) ".bin"
@@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
 	KBL_HUC_FW_MINOR, KBL_BLD_NUM)
 MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
 
+#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
+	GLK_HUC_FW_MINOR, GLK_BLD_NUM)
+
 /**
  * huc_ucode_xfer() - DMA's the firmware
  * @dev_priv: the drm_i915_private device
@@ -99,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv)
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	/* init WOPCM */
-	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-	I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE |
-			HUC_LOADING_AGENT_GUC);
-
 	/* Set the source address for the uCode */
 	offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
@@ -169,6 +171,10 @@ void intel_huc_select_fw(struct intel_huc *huc)
 		huc->fw.path = I915_KBL_HUC_UCODE;
 		huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR;
 		huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		huc->fw.path = I915_GLK_HUC_UCODE;
+		huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR;
+		huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR;
 	} else {
 		DRM_ERROR("No HuC firmware known for platform with HuC!\n");
 		return;
@@ -186,68 +192,36 @@ void intel_huc_select_fw(struct intel_huc *huc)
  * earlier call to intel_huc_init(), so here we need only check that
  * is succeeded, and then transfer the image to the h/w.
  *
- * Return:	non-zero code on error
  */
-int intel_huc_init_hw(struct intel_huc *huc)
+void intel_huc_init_hw(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 	int err;
 
-	if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE)
-		return 0;
-
 	DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
 		huc->fw.path,
 		intel_uc_fw_status_repr(huc->fw.fetch_status),
 		intel_uc_fw_status_repr(huc->fw.load_status));
 
-	if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS &&
-	    huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL)
-		return -ENOEXEC;
+	if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+		return;
 
 	huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING;
 
-	switch (huc->fw.fetch_status) {
-	case INTEL_UC_FIRMWARE_FAIL:
-		/* something went wrong :( */
-		err = -EIO;
-		goto fail;
-
-	case INTEL_UC_FIRMWARE_NONE:
-	case INTEL_UC_FIRMWARE_PENDING:
-	default:
-		/* "can't happen" */
-		WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n",
-			huc->fw.path,
-			intel_uc_fw_status_repr(huc->fw.fetch_status),
-			huc->fw.fetch_status);
-		err = -ENXIO;
-		goto fail;
-
-	case INTEL_UC_FIRMWARE_SUCCESS:
-		break;
-	}
-
 	err = huc_ucode_xfer(dev_priv);
-	if (err)
-		goto fail;
 
-	huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS;
+	huc->fw.load_status = err ?
+		INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS;
 
 	DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
 		huc->fw.path,
 		intel_uc_fw_status_repr(huc->fw.fetch_status),
 		intel_uc_fw_status_repr(huc->fw.load_status));
 
-	return 0;
-
-fail:
-	if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING)
-		huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
-
-	DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
+	if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+		DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
 
-	return err;
+	return;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 25d8e76489e4..3bf65288ffff 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -63,6 +63,7 @@
 #include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 
 #include "i915_drv.h"
 #include <linux/delay.h>
@@ -110,6 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 	pinfo.size_data = sizeof(*pdata);
 	pinfo.dma_mask = DMA_BIT_MASK(32);
 
+	pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes;
+	pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */
+	pdata->port[0].pipe = -1;
+	pdata->port[1].pipe = -1;
+	pdata->port[2].pipe = -1;
 	spin_lock_init(&pdata->lpe_audio_slock);
 
 	platdev = platform_device_register_full(&pinfo);
@@ -121,6 +127,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 
 	kfree(rsc);
 
+	pm_runtime_forbid(&platdev->dev);
+	pm_runtime_set_active(&platdev->dev);
+	pm_runtime_enable(&platdev->dev);
+
 	return platdev;
 
 err:
@@ -144,44 +154,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
 
 static void lpe_audio_irq_unmask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask &= ~val;
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	POSTING_READ(VLV_IMR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static void lpe_audio_irq_mask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask |= val;
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	POSTING_READ(VLV_IIR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static struct irq_chip lpe_audio_irqchip = {
@@ -325,8 +301,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 
 	desc = irq_to_desc(dev_priv->lpe_audio.irq);
 
-	lpe_audio_irq_mask(&desc->irq_data);
-
 	lpe_audio_platdev_destroy(dev_priv);
 
 	irq_free_desc(dev_priv->lpe_audio.irq);
@@ -337,53 +311,47 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
  * intel_lpe_audio_notify() - notify lpe audio event
  * audio driver and i915
  * @dev_priv: the i915 drm device private data
+ * @pipe: pipe
+ * @port: port
  * @eld : ELD data
- * @pipe: pipe id
- * @port: port id
- * @tmds_clk_speed: tmds clock frequency in Hz
+ * @ls_clock: Link symbol clock in kHz
+ * @dp_output: Driving a DP output?
  *
  * Notify lpe audio driver of eld change.
  */
 void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    void *eld, int port, int pipe, int tmds_clk_speed,
-			    bool dp_output, int link_rate)
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output)
 {
-	unsigned long irq_flags;
-	struct intel_hdmi_lpe_audio_pdata *pdata = NULL;
+	unsigned long irqflags;
+	struct intel_hdmi_lpe_audio_pdata *pdata;
+	struct intel_hdmi_lpe_audio_port_pdata *ppdata;
 	u32 audio_enable;
 
 	if (!HAS_LPE_AUDIO(dev_priv))
 		return;
 
-	pdata = dev_get_platdata(
-		&(dev_priv->lpe_audio.platdev->dev));
+	pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev);
+	ppdata = &pdata->port[port - PORT_B];
 
-	spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags);
+	spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags);
 
 	audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port));
 
 	if (eld != NULL) {
-		memcpy(pdata->eld.eld_data, eld,
-			HDMI_MAX_ELD_BYTES);
-		pdata->eld.port_id = port;
-		pdata->eld.pipe_id = pipe;
-		pdata->hdmi_connected = true;
-
-		pdata->dp_output = dp_output;
-		if (tmds_clk_speed)
-			pdata->tmds_clock_speed = tmds_clk_speed;
-		if (link_rate)
-			pdata->link_rate = link_rate;
+		memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES);
+		ppdata->pipe = pipe;
+		ppdata->ls_clock = ls_clock;
+		ppdata->dp_output = dp_output;
 
 		/* Unmute the amp for both DP and HDMI */
 		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
 			   audio_enable & ~VLV_AMP_MUTE);
-
 	} else {
-		memset(pdata->eld.eld_data, 0,
-			HDMI_MAX_ELD_BYTES);
-		pdata->hdmi_connected = false;
-		pdata->dp_output = false;
+		memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES);
+		ppdata->pipe = -1;
+		ppdata->ls_clock = 0;
+		ppdata->dp_output = false;
 
 		/* Mute the amp for both DP and HDMI */
 		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
@@ -391,10 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
 	}
 
 	if (pdata->notify_audio_lpe)
-		pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev);
-	else
-		pdata->notify_pending = true;
+		pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B);
 
-	spin_unlock_irqrestore(&pdata->lpe_audio_slock,
-			irq_flags);
+	spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..0909549ad320 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,10 +138,6 @@
 #include "i915_drv.h"
 #include "intel_mocs.h"
 
-#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
-
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
 #define RING_EXECLIST0_VALID		(1 << 0x4)
@@ -326,8 +322,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -515,6 +510,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!dev_priv->gt.awake);
+
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
 	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
@@ -771,7 +775,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
 		goto unpin_vma;
 	}
 
-	ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias);
+	ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias);
 	if (ret)
 		goto unpin_map;
 
@@ -1139,14 +1143,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static u32 port_seqno(struct execlist_port *port)
-{
-	return port->request ? port->request->global_seqno : 0;
-}
-
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	unsigned int n;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1167,16 +1168,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
-		DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
-				 engine->name,
-				 port_seqno(&engine->execlist_port[0]),
-				 port_seqno(&engine->execlist_port[1]));
-		engine->execlist_port[0].count = 0;
-		engine->execlist_port[1].count = 0;
-		execlists_submit_ports(engine);
+
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		if (!port[n].request)
+			break;
+
+		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
+				 engine->name, n,
+				 port[n].request->global_seqno);
+
+		/* Discard the current inflight count */
+		port[n].count = 0;
 	}
 
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+		execlists_submit_ports(engine);
+
 	return 0;
 }
 
@@ -1907,44 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx,
 	return 0;
 }
 
-/**
- * intel_lr_context_size() - return the size of the context for an engine
- * @engine: which engine to find the context size for
- *
- * Each engine may require a different amount of space for a context image,
- * so when allocating (or copying) an image, this function can be used to
- * find the right size for the specific engine.
- *
- * Return: size (in bytes) of an engine-specific context image
- *
- * Note: this size includes the HWSP, which is part of the context image
- * in LRC mode, but does not include the "shared data page" used with
- * GuC submission. The caller should account for this if using the GuC.
- */
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
-{
-	int ret = 0;
-
-	WARN_ON(INTEL_GEN(engine->i915) < 8);
-
-	switch (engine->id) {
-	case RCS:
-		if (INTEL_GEN(engine->i915) >= 9)
-			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-		else
-			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-		break;
-	case VCS:
-	case BCS:
-	case VECS:
-	case VCS2:
-		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-		break;
-	}
-
-	return ret;
-}
-
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine)
 {
@@ -1957,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	WARN_ON(ce->state);
 
-	context_size = round_up(intel_lr_context_size(engine),
-				I915_GTT_PAGE_SIZE);
+	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
 	/* One extra page as the sharing data between driver and GuC */
 	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
@@ -2036,8 +2004,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index e8015e7bf4e9..52b3a1fd4059 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 206ee4f0150e..647426c75b0a 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
 	struct intel_crtc_state *pipe_config;
 	struct drm_atomic_state *state;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
-	drm_modeset_lock_all(dev);
+	drm_modeset_acquire_init(&ctx, 0);
+
 	state = drm_atomic_state_alloc(dev);
 	if (!state) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx;
+	state->acquire_ctx = &ctx;
+
+retry:
 	pipe_config = intel_atomic_get_crtc_state(state, crtc);
 	if (IS_ERR(pipe_config)) {
 		ret = PTR_ERR(pipe_config);
@@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 	ret = drm_atomic_commit(state);
 
 put_state:
+	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
 	drm_atomic_state_put(state);
 unlock:
 	WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
-	drm_modeset_unlock_all(dev);
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
 }
 
 static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 570bd603f401..cacb65fa2dd5 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1029,7 +1029,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
 	if (dev_priv->wm.pri_latency[level] == 0)
 		return USHRT_MAX;
 
-	if (!plane_state->base.visible)
+	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
 	cpp = plane_state->base.fb->format->cpp[0];
@@ -1039,7 +1039,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
 	if (WARN_ON(htotal == 0))
 		htotal = 1;
 
-	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+	if (plane->id == PLANE_CURSOR) {
 		/*
 		 * FIXME the formula gives values that are
 		 * too big for the cursor FIFO, and hence we
@@ -1203,7 +1203,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
 	int level;
 	bool dirty = false;
 
-	if (!plane_state->base.visible) {
+	if (!intel_wm_plane_visible(crtc_state, plane_state)) {
 		dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
 		goto out;
 	}
@@ -8135,9 +8135,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-				       500)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 500, 0, NULL)) {
 		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
@@ -8180,9 +8180,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-				       500)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 500, 0, NULL)) {
 		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..29b5afac7856 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -538,9 +538,9 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
-				       RING_VALID, RING_VALID,
-				       50)) {
+	if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base),
+				    RING_VALID, RING_VALID,
+				    50)) {
 		DRM_ERROR("%s initialization failed "
 			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1259,6 +1259,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(engine->id != RCS);
+
 	dev_priv->status_page_dmah =
 		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
 	if (!dev_priv->status_page_dmah)
@@ -1270,17 +1272,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 	return 0;
 }
 
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
+int intel_ring_pin(struct intel_ring *ring,
+		   struct drm_i915_private *i915,
+		   unsigned int offset_bias)
 {
-	unsigned int flags;
-	enum i915_map_type map;
+	enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
 	struct i915_vma *vma = ring->vma;
+	unsigned int flags;
 	void *addr;
 	int ret;
 
 	GEM_BUG_ON(ring->vaddr);
 
-	map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
 
 	flags = PIN_GLOBAL;
 	if (offset_bias)
@@ -1316,11 +1319,23 @@ err:
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1338,7 +1353,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 
 	obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (!obj)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_internal(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
@@ -1369,8 +1384,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	if (!ring)
 		return ERR_PTR(-ENOMEM);
 
-	ring->engine = engine;
-
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ring->size = size;
@@ -1424,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
 			    PIN_GLOBAL | PIN_HIGH);
 }
 
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create(i915, engine->context_size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Try to make the context utilize L3 as well as LLC.
+	 *
+	 * On VLV we don't have L3 controls in the PTEs so we
+	 * shouldn't touch the cache level, especially as that
+	 * would make the object snooped which might have a
+	 * negative performance impact.
+	 *
+	 * Snooping is required on non-llc platforms in execlist
+	 * mode, but since all GGTT accesses use PAT entry 0 we
+	 * get snooping anyway regardless of cache_level.
+	 *
+	 * This is only applicable for Ivy Bridge devices since
+	 * later platforms don't have L3 control bits in the PTE.
+	 */
+	if (IS_IVYBRIDGE(i915)) {
+		/* Ignore any error, regard it as a simple optimisation */
+		i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
 static int intel_ring_context_pin(struct intel_engine_cs *engine,
 				  struct i915_gem_context *ctx)
 {
@@ -1436,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
 		return 0;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
+	if (!ce->state && engine->context_size) {
+		struct i915_vma *vma;
+
+		vma = alloc_context_vma(engine);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
+			goto error;
+		}
+
+		ce->state = vma;
+	}
+
 	if (ce->state) {
 		ret = context_pin(ctx);
 		if (ret)
@@ -1481,78 +1544,70 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine,
 
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring;
-	int ret;
-
-	WARN_ON(engine->buffer);
+	int err;
 
 	intel_engine_setup_common(engine);
 
-	ret = intel_engine_init_common(engine);
-	if (ret)
-		goto error;
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err;
+
+	if (HWS_NEEDS_PHYSICAL(engine->i915))
+		err = init_phys_status_page(engine);
+	else
+		err = init_status_page(engine);
+	if (err)
+		goto err;
 
 	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
 	if (IS_ERR(ring)) {
-		ret = PTR_ERR(ring);
-		goto error;
-	}
-
-	if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-		WARN_ON(engine->id != RCS);
-		ret = init_phys_status_page(engine);
-		if (ret)
-			goto error;
-	} else {
-		ret = init_status_page(engine);
-		if (ret)
-			goto error;
+		err = PTR_ERR(ring);
+		goto err_hws;
 	}
 
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE);
-	if (ret) {
-		intel_ring_free(ring);
-		goto error;
-	}
+	err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
 	return 0;
 
-error:
-	intel_engine_cleanup(engine);
-	return ret;
+err_ring:
+	intel_ring_free(ring);
+err_hws:
+	if (HWS_NEEDS_PHYSICAL(engine->i915))
+		cleanup_phys_status_page(engine);
+	else
+		cleanup_status_page(engine);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
 }
 
 void intel_engine_cleanup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv = engine->i915;
 
-	dev_priv = engine->i915;
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
-	if (engine->buffer) {
-		WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-			(I915_READ_MODE(engine) & MODE_IDLE) == 0);
-
-		intel_ring_unpin(engine->buffer);
-		intel_ring_free(engine->buffer);
-		engine->buffer = NULL;
-	}
+	intel_ring_unpin(engine->buffer);
+	intel_ring_free(engine->buffer);
 
 	if (engine->cleanup)
 		engine->cleanup(engine);
 
-	if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-		WARN_ON(engine->id != RCS);
+	if (HWS_NEEDS_PHYSICAL(dev_priv))
 		cleanup_phys_status_page(engine);
-	} else {
+	else
 		cleanup_status_page(engine);
-	}
 
 	intel_engine_cleanup_common(engine);
 
-	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
 }
@@ -1562,8 +1617,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1672,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 		unsigned space;
 
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
+		space = __intel_ring_space(target->postfix, ring->emit,
 					   ring->size);
 		if (space >= bytes)
 			break;
@@ -1641,8 +1697,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
+	int remain_actual = ring->size - ring->emit;
+	int remain_usable = ring->effective_size - ring->emit;
 	int bytes = num_dwords * sizeof(u32);
 	int total_bytes, wait_bytes;
 	bool need_wrap = false;
@@ -1678,17 +1734,18 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
 	if (unlikely(need_wrap)) {
 		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
+		memset(ring->vaddr + ring->emit, 0, remain_actual);
+		ring->emit = 0;
 		ring->space -= remain_actual;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	cs = ring->vaddr + ring->emit;
+	GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
+	ring->emit += bytes;
 	ring->space -= bytes;
 	GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1756,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)
@@ -1736,11 +1793,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 	I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
 
 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_BSD_SLEEP_PSMI_CONTROL,
-				       GEN6_BSD_SLEEP_INDICATOR,
-				       0,
-				       50))
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_BSD_SLEEP_PSMI_CONTROL,
+					 GEN6_BSD_SLEEP_INDICATOR,
+					 0,
+					 1000, 0, NULL))
 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
 	/* Now that the ring is fully powered up, update the tail */
@@ -2182,20 +2239,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 	return intel_init_ring_buffer(engine);
 }
 
-/**
- * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
- */
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
-
-	engine->emit_flush = gen6_bsd_ring_flush;
-
-	return intel_init_ring_buffer(engine);
-}
-
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..02d741ef99ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -139,12 +139,11 @@ struct intel_ring {
 	struct i915_vma *vma;
 	void *vaddr;
 
-	struct intel_engine_cs *engine;
-
 	struct list_head request_list;
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
 	int space;
 	int size;
@@ -189,15 +188,22 @@ enum intel_engine_id {
 	VECS
 };
 
+#define INTEL_ENGINE_CS_MAX_NAME 8
+
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
-	const char	*name;
+	char name[INTEL_ENGINE_CS_MAX_NAME];
 	enum intel_engine_id id;
-	unsigned int exec_id;
+	unsigned int uabi_id;
 	unsigned int hw_id;
 	unsigned int guc_id;
-	u32		mmio_base;
+
+	u8 class;
+	u8 instance;
+	u32 context_size;
+	u32 mmio_base;
 	unsigned int irq_shift;
+
 	struct intel_ring *buffer;
 	struct intel_timeline *timeline;
 
@@ -487,7 +493,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+int intel_ring_pin(struct intel_ring *ring,
+		   struct drm_i915_private *i915,
+		   unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +521,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +550,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
@@ -551,7 +573,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
@@ -652,7 +673,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup);
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index e077c2a9e694..784df024e230 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -48,41 +48,6 @@ struct intel_tv {
 	struct intel_encoder base;
 
 	int type;
-	const char *tv_format;
-	int margin[4];
-	u32 save_TV_H_CTL_1;
-	u32 save_TV_H_CTL_2;
-	u32 save_TV_H_CTL_3;
-	u32 save_TV_V_CTL_1;
-	u32 save_TV_V_CTL_2;
-	u32 save_TV_V_CTL_3;
-	u32 save_TV_V_CTL_4;
-	u32 save_TV_V_CTL_5;
-	u32 save_TV_V_CTL_6;
-	u32 save_TV_V_CTL_7;
-	u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3;
-
-	u32 save_TV_CSC_Y;
-	u32 save_TV_CSC_Y2;
-	u32 save_TV_CSC_U;
-	u32 save_TV_CSC_U2;
-	u32 save_TV_CSC_V;
-	u32 save_TV_CSC_V2;
-	u32 save_TV_CLR_KNOBS;
-	u32 save_TV_CLR_LEVEL;
-	u32 save_TV_WIN_POS;
-	u32 save_TV_WIN_SIZE;
-	u32 save_TV_FILTER_CTL_1;
-	u32 save_TV_FILTER_CTL_2;
-	u32 save_TV_FILTER_CTL_3;
-
-	u32 save_TV_H_LUMA[60];
-	u32 save_TV_H_CHROMA[60];
-	u32 save_TV_V_LUMA[43];
-	u32 save_TV_V_CHROMA[43];
-
-	u32 save_TV_DAC;
-	u32 save_TV_CTL;
 };
 
 struct video_levels {
@@ -873,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder,
 	I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE);
 }
 
-static const struct tv_mode *
-intel_tv_mode_lookup(const char *tv_format)
+static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
-		const struct tv_mode *tv_mode = &tv_modes[i];
+	int format = conn_state->tv.mode;
 
-		if (!strcmp(tv_format, tv_mode->name))
-			return tv_mode;
-	}
-	return NULL;
-}
-
-static const struct tv_mode *
-intel_tv_mode_find(struct intel_tv *intel_tv)
-{
-	return intel_tv_mode_lookup(intel_tv->tv_format);
+	return &tv_modes[format];
 }
 
 static enum drm_mode_status
 intel_tv_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
 	if (mode->clock > max_dotclk)
@@ -925,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
 			struct drm_connector_state *conn_state)
 {
-	struct intel_tv *intel_tv = enc_to_tv(encoder);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 
 	if (!tv_mode)
 		return false;
@@ -1032,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	struct intel_tv *intel_tv = enc_to_tv(encoder);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	u32 tv_ctl;
 	u32 scctl1, scctl2, scctl3;
 	int i, j;
@@ -1135,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	else
 		ysize = 2*tv_mode->nbr_end + 1;
 
-	xpos += intel_tv->margin[TV_MARGIN_LEFT];
-	ypos += intel_tv->margin[TV_MARGIN_TOP];
-	xsize -= (intel_tv->margin[TV_MARGIN_LEFT] +
-		  intel_tv->margin[TV_MARGIN_RIGHT]);
-	ysize -= (intel_tv->margin[TV_MARGIN_TOP] +
-		  intel_tv->margin[TV_MARGIN_BOTTOM]);
+	xpos += conn_state->tv.margins.left;
+	ypos += conn_state->tv.margins.top;
+	xsize -= (conn_state->tv.margins.left +
+		  conn_state->tv.margins.right);
+	ysize -= (conn_state->tv.margins.top +
+		  conn_state->tv.margins.bottom);
 	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
 	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
 
@@ -1288,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 static void intel_tv_find_better_format(struct drm_connector *connector)
 {
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
 	if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
@@ -1304,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
 			break;
 	}
 
-	intel_tv->tv_format = tv_mode->name;
-	drm_object_property_set_value(&connector->base,
-		connector->dev->mode_config.tv_mode_property, i);
+	connector->state->tv.mode = i;
 }
 
 /**
@@ -1347,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector,
 				connector_status_connected;
 		} else
 			status = connector_status_unknown;
-	} else
-		return connector->status;
 
-	if (status != connector_status_connected)
-		return status;
-
-	intel_tv->type = type;
-	intel_tv_find_better_format(connector);
+		if (status == connector_status_connected) {
+			intel_tv->type = type;
+			intel_tv_find_better_format(connector);
+		}
 
-	return connector_status_connected;
+		return status;
+	} else
+		return connector->status;
 }
 
 static const struct input_res {
@@ -1376,12 +1323,9 @@ static const struct input_res {
  * Chose preferred mode  according to line number of TV format
  */
 static void
-intel_tv_chose_preferred_modes(struct drm_connector *connector,
+intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
 			       struct drm_display_mode *mode_ptr)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
-
 	if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
 		mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
 	else if (tv_mode->nbr_end > 480) {
@@ -1404,8 +1348,7 @@ static int
 intel_tv_get_modes(struct drm_connector *connector)
 {
 	struct drm_display_mode *mode_ptr;
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int j, count = 0;
 	u64 tmp;
 
@@ -1448,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector)
 		mode_ptr->clock = (int) tmp;
 
 		mode_ptr->type = DRM_MODE_TYPE_DRIVER;
-		intel_tv_chose_preferred_modes(connector, mode_ptr);
+		intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
 		drm_mode_probed_add(connector, mode_ptr);
 		count++;
 	}
@@ -1463,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-
-static int
-intel_tv_set_property(struct drm_connector *connector, struct drm_property *property,
-		      uint64_t val)
-{
-	struct drm_device *dev = connector->dev;
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	struct drm_crtc *crtc = intel_tv->base.base.crtc;
-	int ret = 0;
-	bool changed = false;
-
-	ret = drm_object_property_set_value(&connector->base, property, val);
-	if (ret < 0)
-		goto out;
-
-	if (property == dev->mode_config.tv_left_margin_property &&
-		intel_tv->margin[TV_MARGIN_LEFT] != val) {
-		intel_tv->margin[TV_MARGIN_LEFT] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_right_margin_property &&
-		intel_tv->margin[TV_MARGIN_RIGHT] != val) {
-		intel_tv->margin[TV_MARGIN_RIGHT] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_top_margin_property &&
-		intel_tv->margin[TV_MARGIN_TOP] != val) {
-		intel_tv->margin[TV_MARGIN_TOP] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_bottom_margin_property &&
-		intel_tv->margin[TV_MARGIN_BOTTOM] != val) {
-		intel_tv->margin[TV_MARGIN_BOTTOM] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_mode_property) {
-		if (val >= ARRAY_SIZE(tv_modes)) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (!strcmp(intel_tv->tv_format, tv_modes[val].name))
-			goto out;
-
-		intel_tv->tv_format = tv_modes[val].name;
-		changed = true;
-	} else {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (changed && crtc)
-		intel_crtc_restore_mode(crtc);
-out:
-	return ret;
-}
-
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_tv_destroy,
-	.set_property = intel_tv_set_property,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 };
 
+static int intel_tv_atomic_check(struct drm_connector *connector,
+				 struct drm_connector_state *new_state)
+{
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_connector_state *old_state;
+
+	if (!new_state->crtc)
+		return 0;
+
+	old_state = drm_atomic_get_old_connector_state(new_state->state, connector);
+	new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
+
+	if (old_state->tv.mode != new_state->tv.mode ||
+	    old_state->tv.margins.left != new_state->tv.margins.left ||
+	    old_state->tv.margins.right != new_state->tv.margins.right ||
+	    old_state->tv.margins.top != new_state->tv.margins.top ||
+	    old_state->tv.margins.bottom != new_state->tv.margins.bottom) {
+		/* Force a modeset. */
+
+		new_crtc_state->connectors_changed = true;
+	}
+
+	return 0;
+}
+
 static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
 	.detect_ctx = intel_tv_detect,
 	.mode_valid = intel_tv_mode_valid,
 	.get_modes = intel_tv_get_modes,
+	.atomic_check = intel_tv_atomic_check,
 };
 
 static const struct drm_encoder_funcs intel_tv_enc_funcs = {
@@ -1548,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	u32 tv_dac_on, tv_dac_off, save_tv_dac;
 	const char *tv_format_names[ARRAY_SIZE(tv_modes)];
 	int i, initial_mode = 0;
+	struct drm_connector_state *state;
 
 	if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED)
 		return;
@@ -1593,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 
 	intel_encoder = &intel_tv->base;
 	connector = &intel_connector->base;
+	state = connector->state;
 
 	/* The documentation, for the older chipsets at least, recommend
 	 * using a polling method rather than hotplug detection for TVs.
@@ -1630,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	intel_tv->type = DRM_MODE_CONNECTOR_Unknown;
 
 	/* BIOS margin values */
-	intel_tv->margin[TV_MARGIN_LEFT] = 54;
-	intel_tv->margin[TV_MARGIN_TOP] = 36;
-	intel_tv->margin[TV_MARGIN_RIGHT] = 46;
-	intel_tv->margin[TV_MARGIN_BOTTOM] = 37;
+	state->tv.margins.left = 54;
+	state->tv.margins.top = 36;
+	state->tv.margins.right = 46;
+	state->tv.margins.bottom = 37;
 
-	intel_tv->tv_format = tv_modes[initial_mode].name;
+	state->tv.mode = initial_mode;
 
 	drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
 	connector->interlace_allowed = false;
@@ -1649,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 				      tv_format_names);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
-				   initial_mode);
+				   state->tv.mode);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_left_margin_property,
-				   intel_tv->margin[TV_MARGIN_LEFT]);
+				   state->tv.margins.left);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_top_margin_property,
-				   intel_tv->margin[TV_MARGIN_TOP]);
+				   state->tv.margins.top);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_right_margin_property,
-				   intel_tv->margin[TV_MARGIN_RIGHT]);
+				   state->tv.margins.right);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_bottom_margin_property,
-				   intel_tv->margin[TV_MARGIN_BOTTOM]);
+				   state->tv.margins.bottom);
 }
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index c117424f1f50..7fd75ca031c3 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -99,7 +99,7 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv)
 	struct intel_guc *guc = &dev_priv->guc;
 
 	mutex_init(&guc->send_mutex);
-	guc->send = intel_guc_send_mmio;
+	guc->send = intel_guc_send_nop;
 }
 
 static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -252,13 +252,27 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
 	__intel_uc_fw_fini(&dev_priv->huc.fw);
 }
 
+static int guc_enable_communication(struct intel_guc *guc)
+{
+	/* XXX: placeholder for alternate setup */
+	guc->send = intel_guc_send_mmio;
+	return 0;
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+	guc->send = intel_guc_send_nop;
+}
+
 int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 {
+	struct intel_guc *guc = &dev_priv->guc;
 	int ret, attempts;
 
 	if (!i915.enable_guc_loading)
 		return 0;
 
+	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
 
 	/* We need to notify the guc whenever we change the GGTT */
@@ -274,6 +288,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 			goto err_guc;
 	}
 
+	/* init WOPCM */
+	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
+	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
+		   GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
+
 	/* WaEnableuKernelHeaderValidFix:skl */
 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
 	if (IS_GEN9(dev_priv))
@@ -303,6 +322,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_submission;
 
+	ret = guc_enable_communication(guc);
+	if (ret)
+		goto err_submission;
+
 	intel_guc_auth_huc(dev_priv);
 	if (i915.enable_guc_submission) {
 		if (i915.guc_log_level >= 0)
@@ -325,6 +348,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	 * marks the GPU as wedged until reset).
 	 */
 err_interrupts:
+	guc_disable_communication(guc);
 	gen9_disable_guc_interrupts(dev_priv);
 err_submission:
 	if (i915.enable_guc_submission)
@@ -359,17 +383,10 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 	i915_ggtt_disable_guc(dev_priv);
 }
 
-/*
- * Read GuC command/status register (SOFT_SCRATCH_0)
- * Return true if it contains a response rather than a command
- */
-static bool guc_recv(struct intel_guc *guc, u32 *status)
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	u32 val = I915_READ(SOFT_SCRATCH(0));
-	*status = val;
-	return INTEL_GUC_RECV_IS_RESPONSE(val);
+	WARN(1, "Unexpected send: action=%#x\n", *action);
+	return -ENODEV;
 }
 
 /*
@@ -399,13 +416,14 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
 
 	/*
-	 * Fast commands should complete in less than 10us, so sample quickly
-	 * up to that length of time, then switch to a slower sleep-wait loop.
-	 * No inte_guc_send command should ever take longer than 10ms.
+	 * No GuC command should ever take longer than 10ms.
+	 * Fast commands should still complete in 10us.
 	 */
-	ret = wait_for_us(guc_recv(guc, &status), 10);
-	if (ret)
-		ret = wait_for(guc_recv(guc, &status), 10);
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   SOFT_SCRATCH(0),
+					   INTEL_GUC_RECV_MASK,
+					   INTEL_GUC_RECV_MASK,
+					   10, 10, &status);
 	if (status != INTEL_GUC_STATUS_SUCCESS) {
 		/*
 		 * Either the GuC explicitly returned an error (which
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 4b7f73aeddac..1e0eecdcedc0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -227,6 +227,7 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
 static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
@@ -266,7 +267,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 
 /* intel_huc.c */
 void intel_huc_select_fw(struct intel_huc *huc);
-int intel_huc_init_hw(struct intel_huc *huc);
+void intel_huc_init_hw(struct intel_huc *huc);
 void intel_guc_auth_huc(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 6d1ea26b2493..aa9d3065853c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,6 +29,7 @@
 #include <linux/pm_runtime.h>
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 50
+#define GT_FIFO_TIMEOUT_MS	 10
 
 #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
 
@@ -172,22 +173,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv,
 	__gen6_gt_wait_for_thread_c0(dev_priv);
 }
 
-static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
-{
-	u32 gtfifodbg;
-
-	gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
-	if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg))
-		__raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg);
-}
-
-static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv,
-				     enum forcewake_domains fw_domains)
-{
-	fw_domains_put(dev_priv, fw_domains);
-	gen6_gt_check_fifodbg(dev_priv);
-}
-
 static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
 {
 	u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL);
@@ -195,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
 	return count & GT_FIFO_FREE_ENTRIES_MASK;
 }
 
-static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
+static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
-	int ret = 0;
+	u32 n;
 
 	/* On VLV, FIFO will be shared by both SW and HW.
 	 * So, we need to read the FREE_ENTRIES everytime */
 	if (IS_VALLEYVIEW(dev_priv))
-		dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv);
-
-	if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
-		int loop = 500;
-		u32 fifo = fifo_free_entries(dev_priv);
-
-		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
-			udelay(10);
-			fifo = fifo_free_entries(dev_priv);
+		n = fifo_free_entries(dev_priv);
+	else
+		n = dev_priv->uncore.fifo_count;
+
+	if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) {
+		if (wait_for_atomic((n = fifo_free_entries(dev_priv)) >
+				    GT_FIFO_NUM_RESERVED_ENTRIES,
+				    GT_FIFO_TIMEOUT_MS)) {
+			DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
+			return;
 		}
-		if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
-			++ret;
-		dev_priv->uncore.fifo_count = fifo;
 	}
-	dev_priv->uncore.fifo_count--;
 
-	return ret;
+	dev_priv->uncore.fifo_count = n - 1;
 }
 
 static enum hrtimer_restart
@@ -384,15 +366,35 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
 }
 
 static bool
+gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv)
+{
+	u32 fifodbg;
+
+	fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
+
+	if (unlikely(fifodbg)) {
+		DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
+		__raw_i915_write32(dev_priv, GTFIFODBG, fifodbg);
+	}
+
+	return fifodbg;
+}
+
+static bool
 check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
+	bool ret = false;
+
 	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv))
-		return fpga_check_for_unclaimed_mmio(dev_priv);
+		ret |= fpga_check_for_unclaimed_mmio(dev_priv);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		return vlv_check_for_unclaimed_mmio(dev_priv);
+		ret |= vlv_check_for_unclaimed_mmio(dev_priv);
 
-	return false;
+	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
+		ret |= gen6_check_for_fifo_debug(dev_priv);
+
+	return ret;
 }
 
 static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
@@ -404,11 +406,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
 	if (check_for_unclaimed_mmio(dev_priv))
 		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
 
-	/* clear out old GT FIFO errors */
-	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
-		__raw_i915_write32(dev_priv, GTFIFODBG,
-				   __raw_i915_read32(dev_priv, GTFIFODBG));
-
 	/* WaDisableShadowRegForCpd:chv */
 	if (IS_CHERRYVIEW(dev_priv)) {
 		__raw_i915_write32(dev_priv, GTFIFOCTL,
@@ -1047,15 +1044,10 @@ __gen2_write(32)
 #define __gen6_write(x) \
 static void \
 gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
-	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(offset)) { \
-		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-	} \
+	if (NEEDS_FORCE_WAKE(offset)) \
+		__gen6_gt_wait_for_fifo(dev_priv); \
 	__raw_i915_write##x(dev_priv, reg, val); \
-	if (unlikely(__fifo_ret)) { \
-		gen6_gt_check_fifodbg(dev_priv); \
-	} \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -1108,19 +1100,19 @@ __gen6_write(32)
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
-#define ASSIGN_WRITE_MMIO_VFUNCS(x) \
+#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \
 do { \
-	dev_priv->uncore.funcs.mmio_writeb = x##_write8; \
-	dev_priv->uncore.funcs.mmio_writew = x##_write16; \
-	dev_priv->uncore.funcs.mmio_writel = x##_write32; \
+	(i915)->uncore.funcs.mmio_writeb = x##_write8; \
+	(i915)->uncore.funcs.mmio_writew = x##_write16; \
+	(i915)->uncore.funcs.mmio_writel = x##_write32; \
 } while (0)
 
-#define ASSIGN_READ_MMIO_VFUNCS(x) \
+#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \
 do { \
-	dev_priv->uncore.funcs.mmio_readb = x##_read8; \
-	dev_priv->uncore.funcs.mmio_readw = x##_read16; \
-	dev_priv->uncore.funcs.mmio_readl = x##_read32; \
-	dev_priv->uncore.funcs.mmio_readq = x##_read64; \
+	(i915)->uncore.funcs.mmio_readb = x##_read8; \
+	(i915)->uncore.funcs.mmio_readw = x##_read16; \
+	(i915)->uncore.funcs.mmio_readl = x##_read32; \
+	(i915)->uncore.funcs.mmio_readq = x##_read64; \
 } while (0)
 
 
@@ -1190,11 +1182,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 			       FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
-		if (!IS_CHERRYVIEW(dev_priv))
-			dev_priv->uncore.funcs.force_wake_put =
-				fw_domains_put_with_fifo;
-		else
-			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA,
@@ -1202,11 +1190,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		if (IS_HASWELL(dev_priv))
-			dev_priv->uncore.funcs.force_wake_put =
-				fw_domains_put_with_fifo;
-		else
-			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
 	} else if (IS_IVYBRIDGE(dev_priv)) {
@@ -1223,8 +1207,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 		 */
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put =
-			fw_domains_put_with_fifo;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 
 		/* We need to init first for ECOBUS access and then
 		 * determine later if we want to reinit, in case of MT access is
@@ -1242,7 +1225,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 		spin_lock_irq(&dev_priv->uncore.lock);
 		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
 		ecobus = __raw_i915_read32(dev_priv, ECOBUS);
-		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER);
+		fw_domains_put(dev_priv, FORCEWAKE_RENDER);
 		spin_unlock_irq(&dev_priv->uncore.lock);
 
 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
@@ -1254,8 +1237,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	} else if (IS_GEN6(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put =
-			fw_domains_put_with_fifo;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE, FORCEWAKE_ACK);
 	}
@@ -1310,34 +1292,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 		i915_pmic_bus_access_notifier;
 
 	if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen2);
-		ASSIGN_READ_MMIO_VFUNCS(gen2);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2);
 	} else if (IS_GEN5(dev_priv)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen5);
-		ASSIGN_READ_MMIO_VFUNCS(gen5);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5);
 	} else if (IS_GEN(dev_priv, 6, 7)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen6);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6);
 
 		if (IS_VALLEYVIEW(dev_priv)) {
 			ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges);
-			ASSIGN_READ_MMIO_VFUNCS(fwtable);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 		} else {
-			ASSIGN_READ_MMIO_VFUNCS(gen6);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
 		}
 	} else if (IS_GEN8(dev_priv)) {
 		if (IS_CHERRYVIEW(dev_priv)) {
 			ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges);
-			ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-			ASSIGN_READ_MMIO_VFUNCS(fwtable);
+			ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 
 		} else {
-			ASSIGN_WRITE_MMIO_VFUNCS(gen8);
-			ASSIGN_READ_MMIO_VFUNCS(gen6);
+			ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
 		}
 	} else {
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
-		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 		if (HAS_DECOUPLED_MMIO(dev_priv)) {
 			dev_priv->uncore.funcs.mmio_readl =
 						gen9_decoupled_read32;
@@ -1353,8 +1335,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 }
-#undef ASSIGN_WRITE_MMIO_VFUNCS
-#undef ASSIGN_READ_MMIO_VFUNCS
 
 void intel_uncore_fini(struct drm_i915_private *dev_priv)
 {
@@ -1534,7 +1514,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
 	 */
 	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
 
-	/* Spin waiting for the device to ack the reset requests */
+	/* Wait for the device to ack the reset requests */
 	return intel_wait_for_register_fw(dev_priv,
 					  GEN6_GDRST, hw_domain_mask, 0,
 					  500);
@@ -1585,19 +1565,23 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
 }
 
 /**
- * intel_wait_for_register_fw - wait until register matches expected state
+ * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
  * @reg: the register to read
  * @mask: mask to apply to register value
  * @value: expected value
- * @timeout_ms: timeout in millisecond
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ * @out_value: optional placeholder to hold registry value
  *
  * This routine waits until the target register @reg contains the expected
  * @value after applying the @mask, i.e. it waits until ::
  *
  *     (I915_READ_FW(reg) & mask) == value
  *
- * Otherwise, the wait will timeout after @timeout_ms milliseconds.
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20,0000 microseconds.
  *
  * Note that this routine assumes the caller holds forcewake asserted, it is
  * not suitable for very long waits. See intel_wait_for_register() if you
@@ -1606,16 +1590,31 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
  *
  * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
  */
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg,
-			       const u32 mask,
-			       const u32 value,
-			       const unsigned long timeout_ms)
-{
-#define done ((I915_READ_FW(reg) & mask) == value)
-	int ret = wait_for_us(done, 2);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg,
+				 u32 mask,
+				 u32 value,
+				 unsigned int fast_timeout_us,
+				 unsigned int slow_timeout_ms,
+				 u32 *out_value)
+{
+	u32 reg_value;
+#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
+	int ret;
+
+	/* Catch any overuse of this function */
+	might_sleep_if(slow_timeout_ms);
+	GEM_BUG_ON(fast_timeout_us > 20000);
+
+	ret = -ETIMEDOUT;
+	if (fast_timeout_us && fast_timeout_us <= 20000)
+		ret = _wait_for_atomic(done, fast_timeout_us, 0);
 	if (ret)
-		ret = wait_for(done, timeout_ms);
+		ret = wait_for(done, slow_timeout_ms);
+
+	if (out_value)
+		*out_value = reg_value;
+
 	return ret;
 #undef done
 }
@@ -1639,18 +1638,26 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
  */
 int intel_wait_for_register(struct drm_i915_private *dev_priv,
 			    i915_reg_t reg,
-			    const u32 mask,
-			    const u32 value,
-			    const unsigned long timeout_ms)
+			    u32 mask,
+			    u32 value,
+			    unsigned int timeout_ms)
 {
-
 	unsigned fw =
 		intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
 	int ret;
 
-	intel_uncore_forcewake_get(dev_priv, fw);
-	ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2);
-	intel_uncore_forcewake_put(dev_priv, fw);
+	might_sleep();
+
+	spin_lock_irq(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, fw);
+
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   reg, mask, value,
+					   2, 0, NULL);
+
+	intel_uncore_forcewake_put__locked(dev_priv, fw);
+	spin_unlock_irq(&dev_priv->uncore.lock);
+
 	if (ret)
 		ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
 			       timeout_ms);
@@ -1658,7 +1665,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
-static int gen8_request_engine_reset(struct intel_engine_cs *engine)
+static int gen8_reset_engine_start(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	int ret;
@@ -1677,7 +1684,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine)
+static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
@@ -1692,14 +1699,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 	unsigned int tmp;
 
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		if (gen8_request_engine_reset(engine))
+		if (gen8_reset_engine_start(engine))
 			goto not_ready;
 
 	return gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_unrequest_engine_reset(engine);
+		gen8_reset_engine_cancel(engine);
 
 	return -EIO;
 }
@@ -1754,17 +1761,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
 int intel_guc_reset(struct drm_i915_private *dev_priv)
 {
 	int ret;
-	unsigned long irqflags;
 
 	if (!HAS_GUC(dev_priv))
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
 	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -1873,5 +1875,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_uncore.c"
 #include "selftests/intel_uncore.c"
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f08d0179b3df..95d4aebc0181 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	typeof(v) *map;
 	int err;
 
-	/* XXX GTT write followed by WC write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		return err;
 
@@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	typeof(v) map;
 	int err;
 
-	/* XXX WC write followed by GTT write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	err = i915_gem_object_set_to_wc_domain(obj, false);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 67d82bf1407f..8f011c447e41 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
-		i915_gem_object_flush_gtt_write_domain(obj);
+		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		}
 
 		mutex_lock(&i915->drm.struct_mutex);
+		intel_runtime_pm_get(i915);
 		err = make_obj_busy(obj);
+		intel_runtime_pm_put(i915);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 98b7aac41eec..6664cb2eb0b8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
new file mode 100644
index 000000000000..6df00cc02c12
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+#include "mock_timeline.h"
+
+struct __igt_sync {
+	const char *name;
+	u32 seqno;
+	bool expected;
+	bool set;
+};
+
+static int __igt_sync(struct intel_timeline *tl,
+		      u64 ctx,
+		      const struct __igt_sync *p,
+		      const char *name)
+{
+	int ret;
+
+	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+		       name, p->name, ctx, p->seqno, yesno(p->expected));
+		return -EINVAL;
+	}
+
+	if (p->set) {
+		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int igt_sync(void *arg)
+{
+	const struct __igt_sync pass[] = {
+		{ "unset", 0, false, false },
+		{ "new", 0, false, true },
+		{ "0a", 0, true, true },
+		{ "1a", 1, false, true },
+		{ "1b", 1, true, true },
+		{ "0b", 0, true, false },
+		{ "2a", 2, false, true },
+		{ "4", 4, false, true },
+		{ "INT_MAX", INT_MAX, false, true },
+		{ "INT_MAX-1", INT_MAX-1, true, false },
+		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
+		{ "INT_MAX", INT_MAX, true, false },
+		{ "UINT_MAX", UINT_MAX, false, true },
+		{ "wrap", 0, false, true },
+		{ "unwrap", UINT_MAX, true, false },
+		{},
+	}, *p;
+	struct intel_timeline *tl;
+	int order, offset;
+	int ret;
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	for (p = pass; p->name; p++) {
+		for (order = 1; order < 64; order++) {
+			for (offset = -1; offset <= (order > 1); offset++) {
+				u64 ctx = BIT_ULL(order) + offset;
+
+				ret = __igt_sync(tl, ctx, p, "1");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	for (order = 1; order < 64; order++) {
+		for (offset = -1; offset <= (order > 1); offset++) {
+			u64 ctx = BIT_ULL(order) + offset;
+
+			for (p = pass; p->name; p++) {
+				ret = __igt_sync(tl, ctx, p, "2");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+
+out:
+	mock_timeline_destroy(tl);
+	return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+	return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
+}
+
+static int bench_sync(void *arg)
+{
+#define M (1 << 20)
+	struct rnd_state prng;
+	struct intel_timeline *tl;
+	unsigned long end_time, count;
+	u64 prng32_1M;
+	ktime_t kt;
+	int order, last_order;
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Lookups from cache are very fast and so the random number generation
+	 * and the loop itself becomes a significant factor in the per-iteration
+	 * timings. We try to compensate the results by measuring the overhead
+	 * of the prng and subtract it from the reported results.
+	 */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 x;
+
+		/* Make sure the compiler doesn't optimise away the prng call */
+		WRITE_ONCE(x, prandom_u32_state(&prng));
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	prng32_1M = ktime_to_ns(kt) * M / count;
+
+	/* Benchmark (only) setting random context ids */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		__intel_timeline_sync_set(tl, id, 0);
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
+	pr_info("%s: %lu random insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		if (!__intel_timeline_sync_is_later(tl, id, 0)) {
+			mock_timeline_destroy(tl);
+			pr_err("Lookup of %llu failed\n", id);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
+	pr_info("%s: %lu random lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Benchmark setting the first N (in order) contexts */
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		__intel_timeline_sync_set(tl, count++, 0);
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
+			pr_err("Lookup of %lu failed\n", end_time);
+			mock_timeline_destroy(tl);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Benchmark searching for a random context id and maybe changing it */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 id = random_engine(&prng);
+		u32 seqno = prandom_u32_state(&prng);
+
+		if (!__intel_timeline_sync_is_later(tl, id, seqno))
+			__intel_timeline_sync_set(tl, id, seqno);
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
+	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	/* Benchmark searching for a known context id and changing the seqno */
+	for (last_order = 1, order = 1; order < 32;
+	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
+		unsigned int mask = BIT(order) - 1;
+
+		tl = mock_timeline(0);
+		if (!tl)
+			return -ENOMEM;
+
+		count = 0;
+		kt = ktime_get();
+		end_time = jiffies + HZ/10;
+		do {
+			/* Without assuming too many details of the underlying
+			 * implementation, try to identify its phase-changes
+			 * (if any)!
+			 */
+			u64 id = (u64)(count & mask) << order;
+
+			__intel_timeline_sync_is_later(tl, id, 0);
+			__intel_timeline_sync_set(tl, id, 0);
+
+			count++;
+		} while (!time_after(jiffies, end_time));
+		kt = ktime_sub(ktime_get(), kt);
+		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+			__func__, count, order,
+			(long long)div64_ul(ktime_to_ns(kt), count));
+		mock_timeline_destroy(tl);
+		cond_resched();
+	}
+
+	return 0;
+#undef M
+}
+
+int i915_gem_timeline_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_sync),
+		SUBTEST(bench_sync),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index be9a9ebf5692..76c1f149a0a0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -10,8 +10,10 @@
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
 selftest(scatterlist, scatterlist_mock_selftests)
+selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
+selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_gem_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index c17c83c30637..d044bf9a6feb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -30,6 +30,17 @@
 
 #include "i915_random.h"
 
+u64 i915_prandom_u64_state(struct rnd_state *rnd)
+{
+	u64 x;
+
+	x = prandom_u32_state(rnd);
+	x <<= 32;
+	x |= prandom_u32_state(rnd);
+
+	return x;
+}
+
 static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
 {
 	return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index b9c334ce6cd9..6c9379871384 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -41,6 +41,8 @@
 #define I915_RND_SUBSTATE(name__, parent__) \
 	struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__)))
 
+u64 i915_prandom_u64_state(struct rnd_state *rnd);
+
 unsigned int *i915_random_order(unsigned int count,
 				struct rnd_state *state);
 void i915_random_reorder(unsigned int *order,
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
new file mode 100644
index 000000000000..bcab3d00a785
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+static char *
+__sync_print(struct i915_syncmap *p,
+	     char *buf, unsigned long *sz,
+	     unsigned int depth,
+	     unsigned int last,
+	     unsigned int idx)
+{
+	unsigned long len;
+	unsigned int i, X;
+
+	if (depth) {
+		unsigned int d;
+
+		for (d = 0; d < depth - 1; d++) {
+			if (last & BIT(depth - d - 1))
+				len = scnprintf(buf, *sz, "|   ");
+			else
+				len = scnprintf(buf, *sz, "    ");
+			buf += len;
+			*sz -= len;
+		}
+		len = scnprintf(buf, *sz, "%x-> ", idx);
+		buf += len;
+		*sz -= len;
+	}
+
+	/* We mark bits after the prefix as "X" */
+	len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT);
+	buf += len;
+	*sz -= len;
+	X = (p->height + SHIFT) / 4;
+	scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX");
+
+	if (!p->height) {
+		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+			len = scnprintf(buf, *sz, " %x:%x,",
+					i, __sync_seqno(p)[i]);
+			buf += len;
+			*sz -= len;
+		}
+		buf -= 1;
+		*sz += 1;
+	}
+
+	len = scnprintf(buf, *sz, "\n");
+	buf += len;
+	*sz -= len;
+
+	if (p->height) {
+		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+			buf = __sync_print(__sync_child(p)[i], buf, sz,
+					   depth + 1,
+					   last << 1 | !!(p->bitmap >> (i + 1)),
+					   i);
+		}
+	}
+
+	return buf;
+}
+
+static bool
+i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz)
+{
+	if (!p)
+		return false;
+
+	while (p->parent)
+		p = p->parent;
+
+	__sync_print(p, buf, &sz, 0, 1, 0);
+	return true;
+}
+
+static int check_syncmap_free(struct i915_syncmap **sync)
+{
+	i915_syncmap_free(sync);
+	if (*sync) {
+		pr_err("sync not cleared after free\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dump_syncmap(struct i915_syncmap *sync, int err)
+{
+	char *buf;
+
+	if (!err)
+		return check_syncmap_free(&sync);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		goto skip;
+
+	if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE))
+		pr_err("%s", buf);
+
+	kfree(buf);
+
+skip:
+	i915_syncmap_free(&sync);
+	return err;
+}
+
+static int igt_syncmap_init(void *arg)
+{
+	struct i915_syncmap *sync = (void *)~0ul;
+
+	/*
+	 * Cursory check that we can initialise a random pointer and transform
+	 * it into the root pointer of a syncmap.
+	 */
+
+	i915_syncmap_init(&sync);
+	return check_syncmap_free(&sync);
+}
+
+static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno)
+{
+	if (leaf->height) {
+		pr_err("%s: not a leaf, height is %d\n",
+		       __func__, leaf->height);
+		return -EINVAL;
+	}
+
+	if (__sync_seqno(leaf)[idx] != seqno) {
+		pr_err("%s: seqno[%d], found %x, expected %x\n",
+		       __func__, idx, __sync_seqno(leaf)[idx], seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+	int err;
+
+	err = i915_syncmap_set(sync, context, seqno);
+	if (err)
+		return err;
+
+	if ((*sync)->height) {
+		pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n",
+		       context, (*sync)->height, (*sync)->prefix);
+		return -EINVAL;
+	}
+
+	if ((*sync)->parent) {
+		pr_err("Inserting first context=%llx created branches!\n",
+		       context);
+		return -EINVAL;
+	}
+
+	if (hweight32((*sync)->bitmap) != 1) {
+		pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n",
+		       (*sync)->bitmap, hweight32((*sync)->bitmap));
+		return -EINVAL;
+	}
+
+	err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+	if (err)
+		return err;
+
+	if (!i915_syncmap_is_later(sync, context, seqno)) {
+		pr_err("Lookup of first context=%llx/seqno=%x failed!\n",
+		       context, seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igt_syncmap_one(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	unsigned long max = 1;
+	int err;
+
+	/*
+	 * Check that inserting a new id, creates a leaf and only that leaf.
+	 */
+
+	i915_syncmap_init(&sync);
+
+	do {
+		u64 context = i915_prandom_u64_state(&prng);
+		unsigned long loop;
+
+		err = check_syncmap_free(&sync);
+		if (err)
+			goto out;
+
+		for (loop = 0; loop <= max; loop++) {
+			err = check_one(&sync, context,
+					prandom_u32_state(&prng));
+			if (err)
+				goto out;
+		}
+		max++;
+	} while (!__igt_timeout(end_time, NULL));
+	pr_debug("%s: Completed %lu single insertions\n",
+		 __func__, max * (max - 1) / 2);
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+	int err;
+
+	err = i915_syncmap_set(sync, context, seqno);
+	if (err)
+		return err;
+
+	if ((*sync)->height) {
+		pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+		       context, (*sync)->height, (*sync)->prefix);
+		return -EINVAL;
+	}
+
+	if (hweight32((*sync)->bitmap) != 1) {
+		pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n",
+		       context, (*sync)->bitmap, hweight32((*sync)->bitmap));
+		return -EINVAL;
+	}
+
+	err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+	if (err)
+		return err;
+
+	if (!i915_syncmap_is_later(sync, context, seqno)) {
+		pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n",
+		       context, seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igt_syncmap_join_above(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int pass, order;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * When we have a new id that doesn't fit inside the existing tree,
+	 * we need to add a new layer above.
+	 *
+	 * 1: 0x00000001
+	 * 2: 0x00000010
+	 * 3: 0x00000100
+	 * 4: 0x00001000
+	 * ...
+	 * Each pass the common prefix shrinks and we have to insert a join.
+	 * Each join will only contain two branches, the latest of which
+	 * is always a leaf.
+	 *
+	 * If we then reuse the same set of contexts, we expect to build an
+	 * identical tree.
+	 */
+	for (pass = 0; pass < 3; pass++) {
+		for (order = 0; order < 64; order += SHIFT) {
+			u64 context = BIT_ULL(order);
+			struct i915_syncmap *join;
+
+			err = check_leaf(&sync, context, 0);
+			if (err)
+				goto out;
+
+			join = sync->parent;
+			if (!join) /* very first insert will have no parents */
+				continue;
+
+			if (!join->height) {
+				pr_err("Parent with no height!\n");
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (hweight32(join->bitmap) != 2) {
+				pr_err("Join does not have 2 children: %x (%d)\n",
+				       join->bitmap, hweight32(join->bitmap));
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) {
+				pr_err("Leaf misplaced in parent!\n");
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_join_below(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int step, order, idx;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * Check that we can split a compacted branch by replacing it with
+	 * a join.
+	 */
+	for (step = 0; step < KSYNCMAP; step++) {
+		for (order = 64 - SHIFT; order > 0; order -= SHIFT) {
+			u64 context = step * BIT_ULL(order);
+
+			err = i915_syncmap_set(&sync, context, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n",
+				       context, order, step, sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+	for (step = 0; step < KSYNCMAP; step++) {
+		for (order = SHIFT; order < 64; order += SHIFT) {
+			u64 context = step * BIT_ULL(order);
+
+			if (!i915_syncmap_is_later(&sync, context, 0)) {
+				pr_err("1: context %llx (order=%d, step=%d) not found\n",
+				       context, order, step);
+				err = -EINVAL;
+				goto out;
+			}
+
+			for (idx = 1; idx < KSYNCMAP; idx++) {
+				if (i915_syncmap_is_later(&sync, context + idx, 0)) {
+					pr_err("1: context %llx (order=%d, step=%d) should not exist\n",
+					       context + idx, order, step);
+					err = -EINVAL;
+					goto out;
+				}
+			}
+		}
+	}
+
+	for (order = SHIFT; order < 64; order += SHIFT) {
+		for (step = 0; step < KSYNCMAP; step++) {
+			u64 context = step * BIT_ULL(order);
+
+			if (!i915_syncmap_is_later(&sync, context, 0)) {
+				pr_err("2: context %llx (order=%d, step=%d) not found\n",
+				       context, order, step);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_neighbours(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	int err;
+
+	/*
+	 * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP
+	 * neighbouring ids, they all fit into the same leaf.
+	 */
+
+	i915_syncmap_init(&sync);
+	do {
+		u64 context = i915_prandom_u64_state(&prng) & ~MASK;
+		unsigned int idx;
+
+		if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */
+			continue;
+
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			err = i915_syncmap_set(&sync, context + idx, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+				       context, sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (sync->bitmap != BIT(idx + 1) - 1) {
+				pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n",
+				       context, idx,
+				       sync->bitmap, hweight32(sync->bitmap),
+				       BIT(idx + 1) - 1, idx + 1);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	} while (!__igt_timeout(end_time, NULL));
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_compact(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int idx, order;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * The syncmap are "space efficient" compressed radix trees - any
+	 * branch with only one child is skipped and replaced by the child.
+	 *
+	 * If we construct a tree with ids that are neighbouring at a non-zero
+	 * height, we form a join but each child of that join is directly a
+	 * leaf holding the single id.
+	 */
+	for (order = SHIFT; order < 64; order += SHIFT) {
+		err = check_syncmap_free(&sync);
+		if (err)
+			goto out;
+
+		/* Create neighbours in the parent */
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			u64 context = idx * BIT_ULL(order) + idx;
+
+			err = i915_syncmap_set(&sync, context, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n",
+				       context, order, idx,
+				       sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		sync = sync->parent;
+		if (sync->parent) {
+			pr_err("Parent (join) of last leaf was not the sync!\n");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (sync->height != order) {
+			pr_err("Join does not have the expected height, found %d, expected %d\n",
+			       sync->height, order);
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (sync->bitmap != BIT(KSYNCMAP) - 1) {
+			pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n",
+			       sync->bitmap, hweight32(sync->bitmap),
+			       BIT(KSYNCMAP) - 1, KSYNCMAP);
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Each of our children should be a leaf */
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			struct i915_syncmap *leaf = __sync_child(sync)[idx];
+
+			if (leaf->height) {
+				pr_err("Child %d is a not leaf!\n", idx);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (leaf->parent != sync) {
+				pr_err("Child %d is not attached to us!\n",
+				       idx);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (!is_power_of_2(leaf->bitmap)) {
+				pr_err("Child %d holds more than one id, found %x (%d)\n",
+				       idx, leaf->bitmap, hweight32(leaf->bitmap));
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (leaf->bitmap != BIT(idx)) {
+				pr_err("Child %d has wrong seqno idx, found %d, expected %d\n",
+				       idx, ilog2(leaf->bitmap), idx);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_random(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	unsigned long count, phase, i;
+	u32 seqno;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * Having tried to test the individual operations within i915_syncmap,
+	 * run a smoketest exploring the entire u64 space with random
+	 * insertions.
+	 */
+
+	count = 0;
+	phase = jiffies + HZ/100 + 1;
+	do {
+		u64 context = i915_prandom_u64_state(&prng);
+
+		err = i915_syncmap_set(&sync, context, 0);
+		if (err)
+			goto out;
+
+		count++;
+	} while (!time_after(jiffies, phase));
+	seqno = 0;
+
+	phase = 0;
+	do {
+		I915_RND_STATE(ctx);
+		u32 last_seqno = seqno;
+		bool expect;
+
+		seqno = prandom_u32_state(&prng);
+		expect = seqno_later(last_seqno, seqno);
+
+		for (i = 0; i < count; i++) {
+			u64 context = i915_prandom_u64_state(&ctx);
+
+			if (i915_syncmap_is_later(&sync, context, seqno) != expect) {
+				pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n",
+				       context, last_seqno, seqno, expect);
+				err = -EINVAL;
+				goto out;
+			}
+
+			err = i915_syncmap_set(&sync, context, seqno);
+			if (err)
+				goto out;
+		}
+
+		phase++;
+	} while (!__igt_timeout(end_time, NULL));
+	pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count);
+out:
+	return dump_syncmap(sync, err);
+}
+
+int i915_syncmap_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_syncmap_init),
+		SUBTEST(igt_syncmap_one),
+		SUBTEST(igt_syncmap_join_above),
+		SUBTEST(igt_syncmap_join_below),
+		SUBTEST(igt_syncmap_neighbours),
+		SUBTEST(igt_syncmap_compact),
+		SUBTEST(igt_syncmap_random),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 0ad624a1db90..b8e53bdc3cc4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	if (!ring)
 		return NULL;
 
-	ring->engine = engine;
 	ring->size = sz;
 	ring->effective_size = sz;
 	ring->vaddr = (void *)(ring + 1);
@@ -141,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 
 	/* minimal engine setup for requests */
 	engine->base.i915 = i915;
-	engine->base.name = name;
+	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id++;
 	engine->base.status_page.page_addr = (void *)(engine + 1);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 6a8258eacdcb..f321bdfe0b5b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -30,6 +30,7 @@
 #include "mock_gem_device.h"
 #include "mock_gem_object.h"
 #include "mock_gtt.h"
+#include "mock_uncore.h"
 
 void mock_device_flush(struct drm_i915_private *i915)
 {
@@ -143,6 +144,7 @@ struct drm_i915_private *mock_gem_device(void)
 	mkwrite_device_info(i915)->gen = -1;
 
 	spin_lock_init(&i915->mm.object_stat_lock);
+	mock_uncore_init(i915);
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
new file mode 100644
index 000000000000..47b1f47c5812
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context)
+{
+	static struct lock_class_key class;
+	struct intel_timeline *tl;
+
+	tl = kzalloc(sizeof(*tl), GFP_KERNEL);
+	if (!tl)
+		return NULL;
+
+	__intel_timeline_init(tl, NULL, context, &class, "mock");
+
+	return tl;
+}
+
+void mock_timeline_destroy(struct intel_timeline *tl)
+{
+	__intel_timeline_fini(tl);
+	kfree(tl);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h
new file mode 100644
index 000000000000..c27ff4639b8b
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_TIMELINE__
+#define __MOCK_TIMELINE__
+
+#include "../i915_gem_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context);
+void mock_timeline_destroy(struct intel_timeline *tl);
+
+#endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
new file mode 100644
index 000000000000..8ef14c7e5e38
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_uncore.h"
+
+#define __nop_write(x) \
+static void \
+nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { }
+__nop_write(8)
+__nop_write(16)
+__nop_write(32)
+
+#define __nop_read(x) \
+static u##x \
+nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; }
+__nop_read(8)
+__nop_read(16)
+__nop_read(32)
+__nop_read(64)
+
+void mock_uncore_init(struct drm_i915_private *i915)
+{
+	ASSIGN_WRITE_MMIO_VFUNCS(i915, nop);
+	ASSIGN_READ_MMIO_VFUNCS(i915, nop);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h
new file mode 100644
index 000000000000..d79aa3ca4d51
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_UNCORE_H
+#define __MOCK_UNCORE_H
+
+void mock_uncore_init(struct drm_i915_private *i915);
+
+#endif /* !__MOCK_UNCORE_H */
diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h
index e9892b4c3af1..b6121c8fe539 100644
--- a/include/drm/intel_lpe_audio.h
+++ b/include/drm/intel_lpe_audio.h
@@ -31,20 +31,20 @@ struct platform_device;
 
 #define HDMI_MAX_ELD_BYTES	128
 
-struct intel_hdmi_lpe_audio_eld {
-	int port_id;
-	int pipe_id;
-	unsigned char eld_data[HDMI_MAX_ELD_BYTES];
+struct intel_hdmi_lpe_audio_port_pdata {
+	u8 eld[HDMI_MAX_ELD_BYTES];
+	int port;
+	int pipe;
+	int ls_clock;
+	bool dp_output;
 };
 
 struct intel_hdmi_lpe_audio_pdata {
-	bool notify_pending;
-	int tmds_clock_speed;
-	bool hdmi_connected;
-	bool dp_output;
-	int link_rate;
-	struct intel_hdmi_lpe_audio_eld eld;
-	void (*notify_audio_lpe)(struct platform_device *pdev);
+	struct intel_hdmi_lpe_audio_port_pdata port[3]; /* for ports B,C,D */
+	int num_ports;
+	int num_pipes;
+
+	void (*notify_audio_lpe)(struct platform_device *pdev, int port); /* port: 0==B,1==C,2==D */
 	spinlock_t lpe_audio_slock;
 };
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3554495bef13..f24a80d2d42e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE	 44
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE	 45
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -666,6 +672,8 @@ struct drm_i915_gem_relocation_entry {
 #define I915_GEM_DOMAIN_VERTEX		0x00000020
 /** GTT domain - aperture and scanout */
 #define I915_GEM_DOMAIN_GTT		0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC		0x00000080
 /** @} */
 
 struct drm_i915_gem_exec_object {
@@ -773,8 +781,15 @@ struct drm_i915_gem_exec_object2 {
  * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
  */
 #define EXEC_OBJECT_ASYNC		(1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE		(1<<7)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
 	__u64 flags;
 
 	union {
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index c505b019e09c..909391d5270c 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -42,6 +42,11 @@
 #include <drm/intel_lpe_audio.h>
 #include "intel_hdmi_audio.h"
 
+#define for_each_pipe(card_ctx, pipe) \
+	for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++)
+#define for_each_port(card_ctx, port) \
+	for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++)
+
 /*standard module options for ALSA. This module supports only one card*/
 static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
 static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
@@ -189,15 +194,30 @@ static void had_substream_put(struct snd_intelhad *intelhaddata)
 	spin_unlock_irqrestore(&intelhaddata->had_spinlock, flags);
 }
 
+static u32 had_config_offset(int pipe)
+{
+	switch (pipe) {
+	default:
+	case 0:
+		return AUDIO_HDMI_CONFIG_A;
+	case 1:
+		return AUDIO_HDMI_CONFIG_B;
+	case 2:
+		return AUDIO_HDMI_CONFIG_C;
+	}
+}
+
 /* Register access functions */
-static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg)
+static u32 had_read_register_raw(struct snd_intelhad_card *card_ctx,
+				 int pipe, u32 reg)
 {
-	return ioread32(ctx->mmio_start + ctx->had_config_offset + reg);
+	return ioread32(card_ctx->mmio_start + had_config_offset(pipe) + reg);
 }
 
-static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val)
+static void had_write_register_raw(struct snd_intelhad_card *card_ctx,
+				   int pipe, u32 reg, u32 val)
 {
-	iowrite32(val, ctx->mmio_start + ctx->had_config_offset + reg);
+	iowrite32(val, card_ctx->mmio_start + had_config_offset(pipe) + reg);
 }
 
 static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
@@ -205,13 +225,13 @@ static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
 	if (!ctx->connected)
 		*val = 0;
 	else
-		*val = had_read_register_raw(ctx, reg);
+		*val = had_read_register_raw(ctx->card_ctx, ctx->pipe, reg);
 }
 
 static void had_write_register(struct snd_intelhad *ctx, u32 reg, u32 val)
 {
 	if (ctx->connected)
-		had_write_register_raw(ctx, reg, val);
+		had_write_register_raw(ctx->card_ctx, ctx->pipe, reg, val);
 }
 
 /*
@@ -1358,6 +1378,9 @@ static void had_process_hot_plug(struct snd_intelhad *intelhaddata)
 		return;
 	}
 
+	/* Disable Audio */
+	had_enable_audio(intelhaddata, false);
+
 	intelhaddata->connected = true;
 	dev_dbg(intelhaddata->dev,
 		"%s @ %d:DEBUG PLUG/UNPLUG : HAD_DRV_CONNECTED\n",
@@ -1519,22 +1542,32 @@ static const struct snd_kcontrol_new had_controls[] = {
  */
 static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 {
-	struct snd_intelhad *ctx = dev_id;
-	u32 audio_stat;
+	struct snd_intelhad_card *card_ctx = dev_id;
+	u32 audio_stat[3] = {};
+	int pipe, port;
+
+	for_each_pipe(card_ctx, pipe) {
+		/* use raw register access to ack IRQs even while disconnected */
+		audio_stat[pipe] = had_read_register_raw(card_ctx, pipe,
+							 AUD_HDMI_STATUS) &
+			(HDMI_AUDIO_UNDERRUN | HDMI_AUDIO_BUFFER_DONE);
+
+		if (audio_stat[pipe])
+			had_write_register_raw(card_ctx, pipe,
+					       AUD_HDMI_STATUS, audio_stat[pipe]);
+	}
 
-	/* use raw register access to ack IRQs even while disconnected */
-	audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		int pipe = ctx->pipe;
 
-	if (audio_stat & HDMI_AUDIO_UNDERRUN) {
-		had_write_register_raw(ctx, AUD_HDMI_STATUS,
-				       HDMI_AUDIO_UNDERRUN);
-		had_process_buffer_underrun(ctx);
-	}
+		if (pipe < 0)
+			continue;
 
-	if (audio_stat & HDMI_AUDIO_BUFFER_DONE) {
-		had_write_register_raw(ctx, AUD_HDMI_STATUS,
-				       HDMI_AUDIO_BUFFER_DONE);
-		had_process_buffer_done(ctx);
+		if (audio_stat[pipe] & HDMI_AUDIO_BUFFER_DONE)
+			had_process_buffer_done(ctx);
+		if (audio_stat[pipe] & HDMI_AUDIO_UNDERRUN)
+			had_process_buffer_underrun(ctx);
 	}
 
 	return IRQ_HANDLED;
@@ -1543,9 +1576,10 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 /*
  * monitor plug/unplug notification from i915; just kick off the work
  */
-static void notify_audio_lpe(struct platform_device *pdev)
+static void notify_audio_lpe(struct platform_device *pdev, int port)
 {
-	struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
 
 	schedule_work(&ctx->hdmi_audio_wq);
 }
@@ -1556,47 +1590,51 @@ static void had_audio_wq(struct work_struct *work)
 	struct snd_intelhad *ctx =
 		container_of(work, struct snd_intelhad, hdmi_audio_wq);
 	struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data;
+	struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port];
 
 	pm_runtime_get_sync(ctx->dev);
 	mutex_lock(&ctx->mutex);
-	if (!pdata->hdmi_connected) {
-		dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n",
-			__func__);
+	if (ppdata->pipe < 0) {
+		dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n",
+			__func__, ctx->port);
+
 		memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */
+
+		ctx->dp_output = false;
+		ctx->tmds_clock_speed = 0;
+		ctx->link_rate = 0;
+
+		/* Shut down the stream */
 		had_process_hot_unplug(ctx);
-	} else {
-		struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld;
 
+		ctx->pipe = -1;
+	} else {
 		dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n",
-			__func__, eld->port_id,	pdata->tmds_clock_speed);
+			__func__, ctx->port, ppdata->ls_clock);
 
-		switch (eld->pipe_id) {
-		case 0:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-			break;
-		case 1:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_B;
-			break;
-		case 2:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_C;
-			break;
-		default:
-			dev_dbg(ctx->dev, "Invalid pipe %d\n",
-				eld->pipe_id);
-			break;
-		}
-
-		memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld));
+		memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld));
 
-		ctx->dp_output = pdata->dp_output;
-		ctx->tmds_clock_speed = pdata->tmds_clock_speed;
-		ctx->link_rate = pdata->link_rate;
+		ctx->dp_output = ppdata->dp_output;
+		if (ctx->dp_output) {
+			ctx->tmds_clock_speed = 0;
+			ctx->link_rate = ppdata->ls_clock;
+		} else {
+			ctx->tmds_clock_speed = ppdata->ls_clock;
+			ctx->link_rate = 0;
+		}
 
+		/*
+		 * Shut down the stream before we change
+		 * the pipe assignment for this pcm device
+		 */
 		had_process_hot_plug(ctx);
 
-		/* Process mode change if stream is active */
+		ctx->pipe = ppdata->pipe;
+
+		/* Restart the stream if necessary */
 		had_process_mode_change(ctx);
 	}
+
 	mutex_unlock(&ctx->mutex);
 	pm_runtime_mark_last_busy(ctx->dev);
 	pm_runtime_put_autosuspend(ctx->dev);
@@ -1605,11 +1643,17 @@ static void had_audio_wq(struct work_struct *work)
 /*
  * Jack interface
  */
-static int had_create_jack(struct snd_intelhad *ctx)
+static int had_create_jack(struct snd_intelhad *ctx,
+			   struct snd_pcm *pcm)
 {
+	char hdmi_str[32];
 	int err;
 
-	err = snd_jack_new(ctx->card, "HDMI/DP", SND_JACK_AVOUT, &ctx->jack,
+	snprintf(hdmi_str, sizeof(hdmi_str),
+		 "HDMI/DP,pcm=%d", pcm->device);
+
+	err = snd_jack_new(ctx->card_ctx->card, hdmi_str,
+			   SND_JACK_AVOUT, &ctx->jack,
 			   true, false);
 	if (err < 0)
 		return err;
@@ -1623,13 +1667,18 @@ static int had_create_jack(struct snd_intelhad *ctx)
 
 static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
-	struct snd_pcm_substream *substream;
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
+	int port;
 
-	substream = had_substream_get(ctx);
-	if (substream) {
-		snd_pcm_suspend(substream);
-		had_substream_put(ctx);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		struct snd_pcm_substream *substream;
+
+		substream = had_substream_get(ctx);
+		if (substream) {
+			snd_pcm_suspend(substream);
+			had_substream_put(ctx);
+		}
 	}
 
 	return 0;
@@ -1637,12 +1686,12 @@ static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
 
 static int __maybe_unused hdmi_lpe_audio_suspend(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
 	int err;
 
 	err = hdmi_lpe_audio_runtime_suspend(dev);
 	if (!err)
-		snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D3hot);
+		snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D3hot);
 	return err;
 }
 
@@ -1654,24 +1703,34 @@ static int hdmi_lpe_audio_runtime_resume(struct device *dev)
 
 static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
 
 	hdmi_lpe_audio_runtime_resume(dev);
-	snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D0);
+	snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D0);
 	return 0;
 }
 
 /* release resources */
 static void hdmi_lpe_audio_free(struct snd_card *card)
 {
-	struct snd_intelhad *ctx = card->private_data;
+	struct snd_intelhad_card *card_ctx = card->private_data;
+	struct intel_hdmi_lpe_audio_pdata *pdata = card_ctx->dev->platform_data;
+	int port;
 
-	cancel_work_sync(&ctx->hdmi_audio_wq);
+	spin_lock_irq(&pdata->lpe_audio_slock);
+	pdata->notify_audio_lpe = NULL;
+	spin_unlock_irq(&pdata->lpe_audio_slock);
 
-	if (ctx->mmio_start)
-		iounmap(ctx->mmio_start);
-	if (ctx->irq >= 0)
-		free_irq(ctx->irq, ctx);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+
+		cancel_work_sync(&ctx->hdmi_audio_wq);
+	}
+
+	if (card_ctx->mmio_start)
+		iounmap(card_ctx->mmio_start);
+	if (card_ctx->irq >= 0)
+		free_irq(card_ctx->irq, card_ctx);
 }
 
 /*
@@ -1683,12 +1742,12 @@ static void hdmi_lpe_audio_free(struct snd_card *card)
 static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
 	struct snd_card *card;
-	struct snd_intelhad *ctx;
+	struct snd_intelhad_card *card_ctx;
 	struct snd_pcm *pcm;
 	struct intel_hdmi_lpe_audio_pdata *pdata;
 	int irq;
 	struct resource *res_mmio;
-	int i, ret;
+	int port, ret;
 
 	pdata = pdev->dev.platform_data;
 	if (!pdata) {
@@ -1711,39 +1770,30 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	/* create a card instance with ALSA framework */
 	ret = snd_card_new(&pdev->dev, hdmi_card_index, hdmi_card_id,
-			   THIS_MODULE, sizeof(*ctx), &card);
+			   THIS_MODULE, sizeof(*card_ctx), &card);
 	if (ret)
 		return ret;
 
-	ctx = card->private_data;
-	spin_lock_init(&ctx->had_spinlock);
-	mutex_init(&ctx->mutex);
-	ctx->connected = false;
-	ctx->dev = &pdev->dev;
-	ctx->card = card;
-	ctx->aes_bits = SNDRV_PCM_DEFAULT_CON_SPDIF;
+	card_ctx = card->private_data;
+	card_ctx->dev = &pdev->dev;
+	card_ctx->card = card;
 	strcpy(card->driver, INTEL_HAD);
 	strcpy(card->shortname, "Intel HDMI/DP LPE Audio");
 	strcpy(card->longname, "Intel HDMI/DP LPE Audio");
 
-	ctx->irq = -1;
-	ctx->tmds_clock_speed = DIS_SAMPLE_RATE_148_5;
-	INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+	card_ctx->irq = -1;
 
 	card->private_free = hdmi_lpe_audio_free;
 
-	/* assume pipe A as default */
-	ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-
-	platform_set_drvdata(pdev, ctx);
+	platform_set_drvdata(pdev, card_ctx);
 
 	dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
 		__func__, (unsigned int)res_mmio->start,
 		(unsigned int)res_mmio->end);
 
-	ctx->mmio_start = ioremap_nocache(res_mmio->start,
-					  (size_t)(resource_size(res_mmio)));
-	if (!ctx->mmio_start) {
+	card_ctx->mmio_start = ioremap_nocache(res_mmio->start,
+					       (size_t)(resource_size(res_mmio)));
+	if (!card_ctx->mmio_start) {
 		dev_err(&pdev->dev, "Could not get ioremap\n");
 		ret = -EACCES;
 		goto err;
@@ -1751,54 +1801,79 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	/* setup interrupt handler */
 	ret = request_irq(irq, display_pipe_interrupt_handler, 0,
-			  pdev->name, ctx);
+			  pdev->name, card_ctx);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "request_irq failed\n");
 		goto err;
 	}
 
-	ctx->irq = irq;
-
-	ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS,
-			  MAX_CAP_STREAMS, &pcm);
-	if (ret)
-		goto err;
-
-	/* setup private data which can be retrieved when required */
-	pcm->private_data = ctx;
-	pcm->info_flags = 0;
-	strncpy(pcm->name, card->shortname, strlen(card->shortname));
-	/* setup the ops for playabck */
-	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
+	card_ctx->irq = irq;
 
 	/* only 32bit addressable */
 	dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 
-	/* allocate dma pages;
-	 * try to allocate 600k buffer as default which is large enough
-	 */
-	snd_pcm_lib_preallocate_pages_for_all(pcm,
-			SNDRV_DMA_TYPE_DEV, NULL,
-			HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+	init_channel_allocations();
 
-	/* create controls */
-	for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
-		ret = snd_ctl_add(card, snd_ctl_new1(&had_controls[i], ctx));
-		if (ret < 0)
+	card_ctx->num_pipes = pdata->num_pipes;
+	card_ctx->num_ports = pdata->num_ports;
+
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		int i;
+
+		ctx->card_ctx = card_ctx;
+		ctx->dev = card_ctx->dev;
+		ctx->port = port;
+		ctx->pipe = -1;
+
+		INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+
+		ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
+				  MAX_CAP_STREAMS, &pcm);
+		if (ret)
 			goto err;
-	}
 
-	init_channel_allocations();
+		/* setup private data which can be retrieved when required */
+		pcm->private_data = ctx;
+		pcm->info_flags = 0;
+		strncpy(pcm->name, card->shortname, strlen(card->shortname));
+		/* setup the ops for playabck */
+		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
 
-	/* Register channel map controls */
-	ret = had_register_chmap_ctls(ctx, pcm);
-	if (ret < 0)
-		goto err;
+		/* allocate dma pages;
+		 * try to allocate 600k buffer as default which is large enough
+		 */
+		snd_pcm_lib_preallocate_pages_for_all(pcm,
+						      SNDRV_DMA_TYPE_DEV, NULL,
+						      HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+
+		/* create controls */
+		for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
+			struct snd_kcontrol *kctl;
+
+			kctl = snd_ctl_new1(&had_controls[i], ctx);
+			if (!kctl) {
+				ret = -ENOMEM;
+				goto err;
+			}
 
-	ret = had_create_jack(ctx);
-	if (ret < 0)
-		goto err;
+			kctl->id.device = pcm->device;
+
+			ret = snd_ctl_add(card, kctl);
+			if (ret < 0)
+				goto err;
+		}
+
+		/* Register channel map controls */
+		ret = had_register_chmap_ctls(ctx, pcm);
+		if (ret < 0)
+			goto err;
+
+		ret = had_create_jack(ctx, pcm);
+		if (ret < 0)
+			goto err;
+	}
 
 	ret = snd_card_register(card);
 	if (ret)
@@ -1806,19 +1881,18 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	spin_lock_irq(&pdata->lpe_audio_slock);
 	pdata->notify_audio_lpe = notify_audio_lpe;
-	pdata->notify_pending = false;
 	spin_unlock_irq(&pdata->lpe_audio_slock);
 
-	/* runtime PM isn't enabled as default, since it won't save much on
-	 * BYT/CHT devices; user who want the runtime PM should adjust the
-	 * power/ontrol and power/autosuspend_delay_ms sysfs entries instead
-	 */
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
 
 	dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__);
-	schedule_work(&ctx->hdmi_audio_wq);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+
+		schedule_work(&ctx->hdmi_audio_wq);
+	}
 
 	return 0;
 
@@ -1834,9 +1908,9 @@ err:
  */
 static int hdmi_lpe_audio_remove(struct platform_device *pdev)
 {
-	struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
 
-	snd_card_free(ctx->card);
+	snd_card_free(card_ctx->card);
 	return 0;
 }
 
diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h
index 2d3e389f76b3..0d91bb5dbab7 100644
--- a/sound/x86/intel_hdmi_audio.h
+++ b/sound/x86/intel_hdmi_audio.h
@@ -32,7 +32,6 @@
 
 #include "intel_hdmi_lpe_audio.h"
 
-#define PCM_INDEX		0
 #define MAX_PB_STREAMS		1
 #define MAX_CAP_STREAMS		0
 #define BYTES_PER_WORD		0x4
@@ -101,7 +100,7 @@ struct pcm_stream_info {
  * @chmap: holds channel map info
  */
 struct snd_intelhad {
-	struct snd_card	*card;
+	struct snd_intelhad_card *card_ctx;
 	bool		connected;
 	struct		pcm_stream_info stream_info;
 	unsigned char	eld[HDMI_MAX_ELD_BYTES];
@@ -112,6 +111,8 @@ struct snd_intelhad {
 	struct snd_pcm_chmap *chmap;
 	int tmds_clock_speed;
 	int link_rate;
+	int port; /* fixed */
+	int pipe; /* can change dynamically */
 
 	/* ring buffer (BD) position index */
 	unsigned int bd_head;
@@ -123,9 +124,6 @@ struct snd_intelhad {
 	unsigned int period_bytes;	/* PCM period size in bytes */
 
 	/* internal stuff */
-	int irq;
-	void __iomem *mmio_start;
-	unsigned int had_config_offset;
 	union aud_cfg aud_config;	/* AUD_CONFIG reg value cache */
 	struct work_struct hdmi_audio_wq;
 	struct mutex mutex; /* for protecting chmap and eld */
@@ -133,4 +131,16 @@ struct snd_intelhad {
 	struct snd_jack *jack;
 };
 
+struct snd_intelhad_card {
+	struct snd_card	*card;
+	struct device *dev;
+
+	/* internal stuff */
+	int irq;
+	void __iomem *mmio_start;
+	int num_pipes;
+	int num_ports;
+	struct snd_intelhad pcm_ctx[3]; /* one for each port */
+};
+
 #endif /* _INTEL_HDMI_AUDIO_ */