diff options
72 files changed, 3344 insertions, 1386 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2cf04504e494..7b05fb802f4c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -16,6 +16,7 @@ i915-y := i915_drv.o \ i915_params.o \ i915_pci.o \ i915_suspend.o \ + i915_syncmap.o \ i915_sw_fence.o \ i915_sysfs.o \ intel_csr.o \ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index bada32b33237..1256fe21850b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; @@ -330,8 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7af100f84410..2a1a3347495a 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine, find_reg(engine, is_master, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", - reg_addr, *cmd, engine->exec_id); + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", + reg_addr, *cmd, engine->name); return false; } @@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n", *cmd, desc->bits[i].mask, desc->bits[i].expected, - dword, engine->exec_id); + dword, engine->name); return false; } } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d689e511744e..870c470177b5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2568,8 +2568,7 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) static int i915_guc_log_control_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; if (!dev_priv->guc.log.vma) return -EINVAL; @@ -2581,14 +2580,13 @@ static int i915_guc_log_control_get(void *data, u64 *val) static int i915_guc_log_control_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; int ret; if (!dev_priv->guc.log.vma) return -EINVAL; - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) return ret; @@ -2596,7 +2594,7 @@ static int i915_guc_log_control_set(void *data, u64 val) ret = i915_guc_log_control(dev_priv, val); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3036d4835b0f..452c26505018 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: case I915_PARAM_HAS_EXEC_FENCE: + case I915_PARAM_HAS_EXEC_CAPTURE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_uc_init_early(dev_priv); i915_memcpy_init_early(dev_priv); - ret = intel_engines_init_early(dev_priv); - if (ret) - return ret; - ret = i915_workqueues_init(dev_priv); if (ret < 0) goto err_engines; @@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_audio_hooks(dev_priv); ret = i915_gem_load_init(dev_priv); if (ret < 0) - goto err_workqueues; + goto err_irq; intel_display_crc_init(dev_priv); @@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, return 0; -err_workqueues: +err_irq: + intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); err_engines: i915_engines_cleanup(dev_priv); @@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); + intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); } @@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) ret = i915_mmio_setup(dev_priv); if (ret < 0) - goto put_bridge; + goto err_bridge; intel_uncore_init(dev_priv); + + ret = intel_engines_init_mmio(dev_priv); + if (ret) + goto err_uncore; + i915_gem_init_mmio(dev_priv); return 0; -put_bridge: +err_uncore: + intel_uncore_fini(dev_priv); +err_bridge: pci_dev_put(dev_priv->bridge_dev); return ret; @@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) struct drm_i915_private *dev_priv; int ret; - /* Enable nuclear pageflip on ILK+, except vlv/chv */ - if (!i915.nuclear_pageflip && - (match_info->gen < 5 || match_info->has_gmch_display)) + /* Enable nuclear pageflip on ILK+ */ + if (!i915.nuclear_pageflip && match_info->gen < 5) driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9b0949f6c1a..b20ed16da0ad 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170403" -#define DRIVER_TIMESTAMP 1491198738 +#define DRIVER_DATE "20170502" +#define DRIVER_TIMESTAMP 1493710187 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -822,7 +822,6 @@ struct intel_csr { func(has_gmch_display); \ func(has_guc); \ func(has_hotplug); \ - func(has_hw_contexts); \ func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ @@ -1025,6 +1024,9 @@ struct i915_gpu_state { u32 *pages[0]; } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + struct drm_i915_error_object **user_bo; + long user_bo_count; + struct drm_i915_error_object *wa_ctx; struct drm_i915_error_request { @@ -1511,11 +1513,7 @@ struct i915_gem_mm { /** LRU list of objects with fence regs on them. */ struct list_head fence_list; - /** - * Are we in a non-interruptible section of code like - * modesetting? - */ - bool interruptible; + u64 unordered_timeline; /* the indicator for dispatch video commands on two BSD rings */ atomic_t bsd_engine_dispatch_index; @@ -1566,7 +1564,7 @@ struct i915_gpu_error { * * This is a counter which gets incremented when reset is triggered, * - * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set + * Before the reset commences, the I915_RESET_BACKOFF bit is set * meaning that any waiters holding onto the struct_mutex should * relinquish the lock immediately in order for the reset to start. * @@ -2362,7 +2360,6 @@ struct drm_i915_private { */ struct mutex av_mutex; - uint32_t hw_context_size; struct list_head context_list; u32 fdi_rx_config; @@ -2870,7 +2867,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical) -#define HAS_HW_CONTEXTS(dev_priv) ((dev_priv)->info.has_hw_contexts) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) #define USES_PPGTT(dev_priv) (i915.enable_ppgtt) @@ -3026,7 +3022,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init_mmio(struct drm_i915_private *dev_priv); int intel_engines_init(struct drm_i915_private *dev_priv); /* intel_hotplug.c */ @@ -3063,6 +3059,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, const char *fmt, ...); extern void intel_irq_init(struct drm_i915_private *dev_priv); +extern void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); @@ -3091,14 +3088,26 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms); + u32 mask, + u32 value, + unsigned int timeout_ms); +int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value); +static inline int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms); + u32 mask, + u32 value, + unsigned int timeout_ms) +{ + return __intel_wait_for_register_fw(dev_priv, reg, mask, value, + 2, timeout_ms, NULL); +} static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { @@ -3447,8 +3456,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, - bool write); +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); +int __must_check +i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); struct i915_vma * __must_check @@ -3711,8 +3721,8 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv); void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv); void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv); void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int tmds_clk_speed, - bool dp_output, int link_rate); + enum pipe pipe, enum port port, + const void *eld, int ls_clock, bool dp_output); /* intel_i2c.c */ extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 532a577ff7a1..f9c6b9b5002c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,8 +46,6 @@ #include <linux/dma-buf.h> static void i915_gem_flush_free_objects(struct drm_i915_private *i915); -static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { @@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, args->size, &args->handle); } +static inline enum fb_op_origin +fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) +{ + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + + if (!(obj->base.write_domain & flush_domains)) + return; + + /* No actual flushing is required for the GTT write domain. Writes + * to it "immediately" go to main memory as far as we know, so there's + * no chipset flush. It also doesn't land in render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour). + */ + wmb(); + + switch (obj->base.write_domain) { + case I915_GEM_DOMAIN_GTT: + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } + } + + intel_fb_obj_flush(obj, + fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + } + + obj->base.write_domain = 0; +} + static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, @@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -1501,13 +1554,6 @@ err: return ret; } -static inline enum fb_op_origin -write_origin(struct drm_i915_gem_object *obj, unsigned domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915; @@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out_unpin; - if (read_domains & I915_GEM_DOMAIN_GTT) - err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + if (read_domains & I915_GEM_DOMAIN_WC) + err = i915_gem_object_set_to_wc_domain(obj, write_domain); + else if (read_domains & I915_GEM_DOMAIN_GTT) + err = i915_gem_object_set_to_gtt_domain(obj, write_domain); else - err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + err = i915_gem_object_set_to_cpu_domain(obj, write_domain); /* And bump the LRU for this access */ i915_gem_object_bump_inactive_ggtt(obj); @@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); if (write_domain != 0) - intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); + intel_fb_obj_invalidate(obj, + fb_write_origin(obj, write_domain)); out_unpin: i915_gem_object_unpin_pages(obj); @@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) * into userspace. (This view is aligned and sized appropriately for * fenced access.) * + * 2 - Recognise WC as a separate cache domain so that we can flush the + * delayed writes via GTT before performing direct access via WC. + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 1; + return 2; } static inline struct i915_ggtt_view @@ -3144,6 +3196,7 @@ i915_gem_idle_work_handler(struct work_struct *work) intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); } + i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3320,56 +3373,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return ret; } -/** Flushes the GTT write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - * - * We also have to wait a bit for the writes to land from the GTT. - * An uncached read (i.e. mmio) seems to be ideal for the round-trip - * timing. This issue has only been observed when switching quickly - * between GTT writes and CPU reads from inside the kernel on recent hw, - * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). - */ - wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - if (intel_runtime_pm_get_if_in_use(dev_priv)) { - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } - } - - intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); - - obj->base.write_domain = 0; -} - -/** Flushes the CPU write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) -{ - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) - return; - - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - obj->base.write_domain = 0; -} - static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) @@ -3390,6 +3393,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) } /** + * Moves a single object to the WC read, and possibly write domain. + * @obj: object to act on + * @write: ask for write access or read only + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ +int +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) +{ + int ret; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); + if (ret) + return ret; + + if (obj->base.write_domain == I915_GEM_DOMAIN_WC) + return 0; + + /* Flush and acquire obj->pages so that we are coherent through + * direct access in memory with previous cached writes through + * shmemfs and that our cache domain tracking remains valid. + * For example, if the obj->filp was moved to swap without us + * being notified and releasing the pages, we would mistakenly + * continue to assume that the obj remained out of the CPU cached + * domain. + */ + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + + /* Serialise direct access to this object with the barriers for + * coherent writes from the GPU, by effectively invalidating the + * WC domain upon first access. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) + mb(); + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); + obj->base.read_domains |= I915_GEM_DOMAIN_WC; + if (write) { + obj->base.read_domains = I915_GEM_DOMAIN_WC; + obj->base.write_domain = I915_GEM_DOMAIN_WC; + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + return 0; +} + +/** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on * @write: ask for write access or read only @@ -3428,7 +3494,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3802,7 +3868,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { @@ -3996,7 +4062,7 @@ __busy_set_if_active(const struct dma_fence *fence, if (i915_gem_request_completed(rq)) return 0; - return flag(rq->engine->exec_id); + return flag(rq->engine->uabi_id); } static __always_inline unsigned int @@ -4195,7 +4261,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) * catch if we ever need to fix it. In the meantime, if you do spot * such a local variable, please consider fixing! */ - if (WARN_ON(size >> PAGE_SHIFT > INT_MAX)) + if (size >> PAGE_SHIFT > INT_MAX) return ERR_PTR(-E2BIG); if (overflows_type(size, obj->base.size)) @@ -4302,6 +4368,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); + cond_resched(); + llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); @@ -4349,8 +4417,11 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ - while ((freed = llist_del_all(&i915->mm.free_list))) + while ((freed = llist_del_all(&i915->mm.free_list))) { __i915_gem_free_objects(i915, freed); + if (need_resched()) + break; + } } static void __i915_gem_free_object_rcu(struct rcu_head *head) @@ -4415,10 +4486,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * try to take over. The only way to remove the earlier state * is by resetting. However, resetting on earlier gen is tricky as * it may impact the display and we are uncertain about the stability - * of the reset, so we only reset recent machines with logical - * context support (that must be reset to remove any stray contexts). + * of the reset, so this could be applied to even earlier gen. */ - if (HAS_HW_CONTEXTS(i915)) { + if (INTEL_GEN(i915) >= 5) { int reset = intel_gpu_reset(i915, ALL_ENGINES); WARN_ON(reset && reset != -ENODEV); } @@ -4676,7 +4746,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_clflush_init(dev_priv); + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; @@ -4822,8 +4892,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->pending_flip_queue); - dev_priv->mm.interruptible = true; - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); spin_lock_init(&dev_priv->fb_tracking.lock); @@ -4864,9 +4932,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); + /* Discard all purgeable objects, let userspace recover those as + * required after resuming. + */ i915_gem_shrink_all(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } @@ -4891,12 +4960,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * we update that state just before writing out the image. * * To try and reduce the hibernation image, we manually shrink - * the objects as well. + * the objects as well, see i915_gem_freeze() */ - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); + i915_gem_drain_freed_objects(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); for (p = phases; *p; p++) { list_for_each_entry(obj, *p, global_link) { obj->base.read_domains = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 5a49487368ca..ee54597465b6 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -25,6 +25,8 @@ #ifndef __I915_GEM_H__ #define __I915_GEM_H__ +#include <linux/bug.h> + #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index ffd01e02fe94..ffac7a1f0caf 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -27,7 +27,6 @@ #include "i915_gem_clflush.h" static DEFINE_SPINLOCK(clflush_lock); -static u64 clflush_context; struct clflush { struct dma_fence dma; /* Must be first for dma_fence_free() */ @@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, dma_fence_init(&clflush->dma, &i915_clflush_ops, &clflush_lock, - clflush_context, + to_i915(obj->base.dev)->mm.unordered_timeline, 0); i915_sw_fence_init(&clflush->wait, i915_clflush_notify); @@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); } } - -void i915_gem_clflush_init(struct drm_i915_private *i915) -{ - clflush_context = dma_fence_context_alloc(1); -} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h index b62d61a2d15f..2455a7820937 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -28,7 +28,6 @@ struct drm_i915_private; struct drm_i915_gem_object; -void i915_gem_clflush_init(struct drm_i915_private *i915); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c4966913..31a73c39239f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,33 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static int get_context_size(struct drm_i915_private *dev_priv) -{ - int ret; - u32 reg; - - switch (INTEL_GEN(dev_priv)) { - case 6: - reg = I915_READ(CXT_SIZE); - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; - break; - case 7: - reg = I915_READ(GEN7_CXT_SIZE); - if (IS_HASWELL(dev_priv)) - ret = HSW_CXT_TOTAL_SIZE; - else - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; - break; - case 8: - ret = GEN8_CXT_TOTAL_SIZE; - break; - default: - BUG(); - } - - return ret; -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * -alloc_context_obj(struct drm_i915_private *dev_priv, u64 size) -{ - struct drm_i915_gem_object *obj; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return obj; - - /* - * Try to make the context utilize L3 as well as LLC. - * - * On VLV we don't have L3 controls in the PTEs so we - * shouldn't touch the cache level, especially as that - * would make the object snooped which might have a - * negative performance impact. - * - * Snooping is required on non-llc platforms in execlist - * mode, but since all GGTT accesses use PAT entry 0 we - * get snooping anyway regardless of cache_level. - * - * This is only applicable for Ivy Bridge devices since - * later platforms don't have L3 control bits in the PTE. - */ - if (IS_IVYBRIDGE(dev_priv)) { - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); - /* Failure shouldn't ever happen this early */ - if (WARN_ON(ret)) { - i915_gem_object_put(obj); - return ERR_PTR(ret); - } - } - - return obj; -} - static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); @@ -266,26 +200,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_out; - } - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - ret = PTR_ERR(vma); - goto err_out; - } - - ctx->engine[RCS].state = vma; - } - /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; if (file_priv) { @@ -443,21 +357,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->context_hw_ida); - if (i915.enable_execlists) { - /* NB: intentionally left blank. We will allocate our own - * backing objects as we need them, thank you very much */ - dev_priv->hw_context_size = 0; - } else if (HAS_HW_CONTEXTS(dev_priv)) { - dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), - I915_GTT_PAGE_SIZE); - if (dev_priv->hw_context_size > (1<<20)) { - DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", - dev_priv->hw_context_size); - dev_priv->hw_context_size = 0; - } - } - ctx = i915_gem_create_context(dev_priv, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", @@ -477,8 +376,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); DRM_DEBUG_DRIVER("%s context support initialized\n", - i915.enable_execlists ? "LR" : - dev_priv->hw_context_size ? "HW" : "fake"); + dev_priv->engine[RCS]->context_size ? "logical" : + "fake"); return 0; } @@ -941,11 +840,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) return 0; } -static bool contexts_enabled(struct drm_device *dev) -{ - return i915.enable_execlists || to_i915(dev)->hw_context_size; -} - static bool client_is_banned(struct drm_i915_file_private *file_priv) { return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; @@ -954,12 +848,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_context_create *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int ret; - if (!contexts_enabled(dev)) + if (!dev_priv->engine[RCS]->context_size) return -ENODEV; if (args->pad != 0) @@ -977,7 +872,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(to_i915(dev), file_priv); + ctx = i915_gem_create_context(dev_priv, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3e59c8ef27b..af1965774e7b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { + struct i915_gem_capture_list *capture; + + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (unlikely(!capture)) + return -ENOMEM; + + capture->next = req->capture_list; + capture->vma = vma; + req->capture_list = capture; + } + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 5ddbc9499775..9074303c8888 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence) if (i915_fence_signaled(fence)) return false; - intel_engine_enable_signaling(to_request(fence)); + intel_engine_enable_signaling(to_request(fence), true); return true; } @@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) } /* Finally reset hw state */ - tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); + tl->seqno = seqno; list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].sync_seqno, 0, - sizeof(timeline->engine[id].sync_seqno)); + memset(timeline->engine[id].global_sync, 0, + sizeof(timeline->engine[id].global_sync)); } return 0; @@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active, /* Space left intentionally blank */ } +static void advance_ring(struct drm_i915_gem_request *request) +{ + unsigned int tail; + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + if (list_is_last(&request->ring_link, &request->ring->request_list)) { + /* We may race here with execlists resubmitting this request + * as we retire it. The resubmission will move the ring->tail + * forwards (to request->wa_tail). We either read the + * current value that was written to hw, or the value that + * is just about to be. Either works, if we miss the last two + * noops - they are safe to be replayed on a reset. + */ + tail = READ_ONCE(request->ring->tail); + } else { + tail = request->postfix; + } + list_del(&request->ring_link); + + request->ring->head = tail; +} + +static void free_capture_list(struct drm_i915_gem_request *request) +{ + struct i915_gem_capture_list *capture; + + capture = request->capture_list; + while (capture) { + struct i915_gem_capture_list *next = capture->next; + + kfree(capture); + capture = next; + } +} + static void i915_gem_request_retire(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; @@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(&request->link); spin_unlock_irq(&engine->timeline->lock); - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - list_del(&request->ring_link); - request->ring->head = request->postfix; if (!--request->i915->gt.active_requests) { GEM_BUG_ON(!request->i915->gt.awake); mod_delayed_work(request->i915->wq, @@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) msecs_to_jiffies(100)); } unreserve_seqno(request->engine); + advance_ring(request); + + free_capture_list(request); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request); + intel_engine_enable_signaling(request, false); spin_unlock(&request->lock); engine->emit_breadcrumb(request, @@ -603,6 +638,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->global_seqno = 0; req->file_priv = NULL; req->batch = NULL; + req->capture_list = NULL; /* * Reserve space in the ring buffer for all the commands required to @@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GPU processing the request, we never over-estimate the * position of the head. */ - req->head = req->ring->tail; + req->head = req->ring->emit; /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); @@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, int ret; GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); if (i915_gem_request_completed(from)) return 0; @@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - if (to->timeline == from->timeline) - return 0; - if (to->engine == from->engine) { ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, @@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, } seqno = i915_gem_request_global_seqno(from); - if (!seqno) { - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - GFP_KERNEL); - return ret < 0 ? ret : 0; - } + if (!seqno) + goto await_dma_fence; - if (seqno <= to->timeline->sync_seqno[from->engine->id]) - return 0; + if (!to->engine->semaphore.sync_to) { + if (!__i915_gem_request_started(from, seqno)) + goto await_dma_fence; - trace_i915_gem_ring_sync_to(to, from); - if (!i915.semaphores) { - if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) { - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - GFP_KERNEL); - if (ret < 0) - return ret; - } + if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2)) + goto await_dma_fence; } else { + GEM_BUG_ON(!from->engine->semaphore.signal); + + if (seqno <= to->timeline->global_sync[from->engine->id]) + return 0; + + trace_i915_gem_ring_sync_to(to, from); ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; + + to->timeline->global_sync[from->engine->id] = seqno; } - to->timeline->sync_seqno[from->engine->id] = seqno; return 0; + +await_dma_fence: + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + GFP_KERNEL); + return ret < 0 ? ret : 0; } int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, struct dma_fence *fence) { - struct dma_fence_array *array; + struct dma_fence **child = &fence; + unsigned int nchild = 1; int ret; - int i; - - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return 0; - - if (dma_fence_is_i915(fence)) - return i915_gem_request_await_request(req, to_request(fence)); - - if (!dma_fence_is_array(fence)) { - ret = i915_sw_fence_await_dma_fence(&req->submit, - fence, I915_FENCE_TIMEOUT, - GFP_KERNEL); - return ret < 0 ? ret : 0; - } /* Note that if the fence-array was created in signal-on-any mode, * we should *not* decompose it into its individual fences. However, @@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, * amdgpu and we should not see any incoming fence-array from * sync-file being in signal-on-any mode. */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } + + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_add_request() which ensures + * that requests are submitted in-order through each ring. + */ + if (fence->context == req->fence.context) + continue; - array = to_dma_fence_array(fence); - for (i = 0; i < array->num_fences; i++) { - struct dma_fence *child = array->fences[i]; + /* Squash repeated waits to the same timelines */ + if (fence->context != req->i915->mm.unordered_timeline && + intel_timeline_sync_is_later(req->timeline, fence)) + continue; - if (dma_fence_is_i915(child)) + if (dma_fence_is_i915(fence)) ret = i915_gem_request_await_request(req, - to_request(child)); + to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&req->submit, - child, I915_FENCE_TIMEOUT, + ret = i915_sw_fence_await_dma_fence(&req->submit, fence, + I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) return ret; - } + + /* Record the latest fence used against each timeline */ + if (fence->context != req->i915->mm.unordered_timeline) + intel_timeline_sync_set(req->timeline, fence); + } while (--nchild); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index a211c53c813f..4ccab5affd3c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -73,6 +73,11 @@ struct i915_priotree { #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) }; +struct i915_gem_capture_list { + struct i915_gem_capture_list *next; + struct i915_vma *vma; +}; + /** * Request queue structure. * @@ -167,6 +172,12 @@ struct drm_i915_gem_request { * error state dump only). */ struct i915_vma *batch; + /** Additional buffers requested by userspace to be captured upon + * a GPU hang. The vma/obj on this list are protected by their + * active reference - all objects on this list must also be + * on the active_list (of their final request). + */ + struct i915_gem_capture_list *capture_list; struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 129ed303a6c4..0e7352d82ca4 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -35,9 +35,9 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) +static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) { - switch (mutex_trylock_recursive(&dev->struct_mutex)) { + switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) { case MUTEX_TRYLOCK_FAILED: return false; @@ -53,12 +53,12 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) BUG(); } -static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) +static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) { if (!unlock) return; - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); /* expedite the RCU grace period to free some request slabs */ synchronize_rcu_expedited(); @@ -156,7 +156,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, unsigned long count = 0; bool unlock; - if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return 0; trace_i915_gem_shrink(dev_priv, target, flags); @@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, i915_gem_retire_requests(dev_priv); - i915_gem_shrinker_unlock(&dev_priv->drm, unlock); + shrinker_unlock(dev_priv, unlock); return count; } @@ -284,12 +284,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_device *dev = &dev_priv->drm; struct drm_i915_gem_object *obj; unsigned long count; bool unlock; - if (!i915_gem_shrinker_lock(dev, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return 0; i915_gem_retire_requests(dev_priv); @@ -304,7 +303,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; } - i915_gem_shrinker_unlock(dev, unlock); + shrinker_unlock(dev_priv, unlock); return count; } @@ -314,11 +313,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_device *dev = &dev_priv->drm; unsigned long freed; bool unlock; - if (!i915_gem_shrinker_lock(dev, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return SHRINK_STOP; freed = i915_gem_shrink(dev_priv, @@ -332,26 +330,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - i915_gem_shrinker_unlock(dev, unlock); + shrinker_unlock(dev_priv, unlock); return freed; } -struct shrinker_lock_uninterruptible { - bool was_interruptible; - bool unlock; -}; - static bool -i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, - struct shrinker_lock_uninterruptible *slu, - int timeout_ms) +shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, + int timeout_ms) { unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && - i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) + shrinker_lock(dev_priv, unlock)) break; schedule_timeout_killable(1); @@ -364,29 +356,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, } } while (1); - slu->was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; return true; } -static void -i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv, - struct shrinker_lock_uninterruptible *slu) -{ - dev_priv->mm.interruptible = slu->was_interruptible; - i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock); -} - static int i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) { struct drm_i915_private *dev_priv = container_of(nb, struct drm_i915_private, mm.oom_notifier); - struct shrinker_lock_uninterruptible slu; struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; + bool unlock; - if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) + if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) return NOTIFY_DONE; freed_pages = i915_gem_shrink_all(dev_priv); @@ -415,7 +397,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) bound += obj->base.size >> PAGE_SHIFT; } - i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); + shrinker_unlock(dev_priv, unlock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -435,12 +417,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr { struct drm_i915_private *dev_priv = container_of(nb, struct drm_i915_private, mm.vmap_notifier); - struct shrinker_lock_uninterruptible slu; struct i915_vma *vma, *next; unsigned long freed_pages = 0; + bool unlock; int ret; - if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) + if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) return NOTIFY_DONE; /* Force everything onto the inactive lists */ @@ -465,7 +447,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr } out: - i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); + shrinker_unlock(dev_priv, unlock); *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index b596ca7ee058..c597ce277a04 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -23,6 +23,32 @@ */ #include "i915_drv.h" +#include "i915_syncmap.h" + +static void __intel_timeline_init(struct intel_timeline *tl, + struct i915_gem_timeline *parent, + u64 context, + struct lock_class_key *lockclass, + const char *lockname) +{ + tl->fence_context = context; + tl->common = parent; +#ifdef CONFIG_DEBUG_SPINLOCK + __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); +#else + spin_lock_init(&tl->lock); +#endif + init_request_active(&tl->last_request, NULL); + INIT_LIST_HEAD(&tl->requests); + i915_syncmap_init(&tl->sync); +} + +static void __intel_timeline_fini(struct intel_timeline *tl) +{ + GEM_BUG_ON(!list_empty(&tl->requests)); + + i915_syncmap_free(&tl->sync); +} static int __i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *timeline, @@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915, lockdep_assert_held(&i915->drm.struct_mutex); + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); if (!timeline->name) @@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915, /* Called during early_init before we know how many engines there are */ fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - tl->fence_context = fences++; - tl->common = timeline; -#ifdef CONFIG_DEBUG_SPINLOCK - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); -#else - spin_lock_init(&tl->lock); -#endif - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - } + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) + __intel_timeline_init(&timeline->engine[i], + timeline, fences++, + lockclass, lockname); return 0; } @@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) &class, "&global_timeline->lock"); } +/** + * i915_gem_timelines_mark_idle -- called when the driver idles + * @i915 - the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_gem_timelines_mark_idle(struct drm_i915_private *i915) +{ + struct i915_gem_timeline *timeline; + int i; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { + struct intel_timeline *tl = &timeline->engine[i]; + + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&tl->sync); + } + } +} + void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) { int i; lockdep_assert_held(&timeline->i915->drm.struct_mutex); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - GEM_BUG_ON(!list_empty(&tl->requests)); - } + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) + __intel_timeline_fini(&timeline->engine[i]); list_del(&timeline->link); kfree(timeline->name); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_gem_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 6c53e14cab2a..bfb5eb94c64d 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,7 +27,9 @@ #include <linux/list.h> +#include "i915_utils.h" #include "i915_gem_request.h" +#include "i915_syncmap.h" struct i915_gem_timeline; @@ -55,7 +57,25 @@ struct intel_timeline { * struct_mutex. */ struct i915_gem_active last_request; - u32 sync_seqno[I915_NUM_ENGINES]; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + /** + * Separately to the inter-context seqno map above, we track the last + * barrier (e.g. semaphore wait) to the global engine timelines. Note + * that this tracks global_seqno rather than the context.seqno, and + * so it is subject to the limitations of hw wraparound and that we + * may need to revoke global_seqno (on pre-emption). + */ + u32 global_sync[I915_NUM_ENGINES]; struct i915_gem_timeline *common; }; @@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); int i915_gem_timeline_init__global(struct drm_i915_private *i915); +void i915_gem_timelines_mark_idle(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8effc59f5cb5..ec526d92f908 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, dev_priv->engine[i], NULL, obj); } + for (j = 0; j < ee->user_bo_count; j++) + print_error_obj(m, dev_priv->engine[i], + "user", ee->user_bo[j]); + if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", dev_priv->engine[i]->name, @@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = container_of(error_ref, typeof(*error), ref); - int i; + long i, j; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { struct drm_i915_error_engine *ee = &error->engine[i]; + for (j = 0; j < ee->user_bo_count; j++) + i915_error_object_free(ee->user_bo[j]); + kfree(ee->user_bo); + i915_error_object_free(ee->batchbuffer); i915_error_object_free(ee->wa_batchbuffer); i915_error_object_free(ee->ringbuffer); @@ -1346,6 +1354,35 @@ static void record_context(struct drm_i915_error_context *e, e->active = ctx->active_count; } +static void request_record_user_bo(struct drm_i915_gem_request *request, + struct drm_i915_error_engine *ee) +{ + struct i915_gem_capture_list *c; + struct drm_i915_error_object **bo; + long count; + + count = 0; + for (c = request->capture_list; c; c = c->next) + count++; + + bo = NULL; + if (count) + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); + if (!bo) + return; + + count = 0; + for (c = request->capture_list; c; c = c->next) { + bo[count] = i915_error_object_create(request->i915, c->vma); + if (!bo[count]) + break; + count++; + } + + ee->user_bo = bo; + ee->user_bo_count = count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { @@ -1392,6 +1429,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->wa_batchbuffer = i915_error_object_create(dev_priv, engine->scratch); + request_record_user_bo(request, ee); ee->ctx = i915_error_object_create(dev_priv, @@ -1560,6 +1598,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->done_reg = I915_READ(DONE_REG); } + if (INTEL_GEN(dev_priv) >= 5) + error->ccid = I915_READ(CCID); + /* 3: Feature specific registers */ if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { error->gam_ecochk = I915_READ(GAM_ECOCHK); @@ -1567,9 +1608,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } /* 4: Everything else */ - if (HAS_HW_CONTEXTS(dev_priv)) - error->ccid = I915_READ(CCID); - if (INTEL_GEN(dev_priv) >= 8) { error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1642fff9cf13..7e85b5ab8ae2 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, GEM_BUG_ON(freespace < wqi_size); /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = rq->tail; - assert_ring_tail_valid(rq->ring, rq->tail); - tail >>= 3; + tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we @@ -651,7 +649,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq) trace_dma_fence_enable_signal(&rq->fence); spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); - intel_engine_enable_signaling(rq); + intel_engine_enable_signaling(rq, true); spin_unlock(&rq->lock); } @@ -1053,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fd97fe00cd0d..c99f51c587c7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1236,7 +1236,7 @@ out: static void ivybridge_parity_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, l3_parity.error_work); + container_of(work, typeof(*dev_priv), l3_parity.error_work); u32 error_status, row, bank, subbank; char *parity_event[6]; uint32_t misccpctl; @@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 pipestat_mask; u32 enable_mask; enum pipe pipe; - u32 val; pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | PIPE_CRC_DONE_INTERRUPT_STATUS; @@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask = I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_LPE_PIPE_A_INTERRUPT | + I915_LPE_PIPE_B_INTERRUPT; + if (IS_CHERRYVIEW(dev_priv)) - enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT; + enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | + I915_LPE_PIPE_C_INTERRUPT; WARN_ON(dev_priv->irq_mask != ~0); - val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT | - I915_LPE_PIPE_C_INTERRUPT); - - enable_mask |= val; - dev_priv->irq_mask = ~enable_mask; GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); @@ -4233,11 +4230,15 @@ static void i965_irq_uninstall(struct drm_device * dev) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + int i; intel_hpd_init_work(dev_priv); INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); + for (i = 0; i < MAX_L3_SLICES; ++i) + dev_priv->l3_parity.remap_info[i] = NULL; if (HAS_GUC_SCHED(dev_priv)) dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; @@ -4363,6 +4364,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv) } /** + * intel_irq_fini - deinitializes IRQ support + * @i915: i915 device instance + * + * This function deinitializes all the IRQ support. + */ +void intel_irq_fini(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < MAX_L3_SLICES; ++i) + kfree(i915->l3_parity.remap_info[i]); +} + +/** * intel_irq_install - enables the hardware interrupt * @dev_priv: i915 device instance * diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f87b0c4e564d..f80db2ccd92f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS @@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = { .has_runtime_pm = 1, .has_rc6 = 1, .has_gmbus_irq = 1, - .has_hw_contexts = 1, .has_gmch_display = 1, .has_hotplug = 1, .has_aliasing_ppgtt = 1, @@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = { .has_resource_streamer = 1, .has_rc6 = 1, .has_gmbus_irq = 1, - .has_hw_contexts = 1, .has_logical_ring_contexts = 1, .has_gmch_display = 1, .has_aliasing_ppgtt = 1, @@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_logical_ring_contexts = 1, \ .has_guc = 1, \ .has_decoupled_mmio = 1, \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 11b12f412492..ee8170cda93e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VECS_HW 3 #define VCS2_HW 4 +/* Engine class */ + +#define RENDER_CLASS 0 +#define VIDEO_DECODE_CLASS 1 +#define VIDEO_ENHANCEMENT_CLASS 2 +#define COPY_ENGINE_CLASS 3 +#define OTHER_CLASS 4 + /* PCI config space */ #define MCHBAR_I915 0x44 @@ -3362,16 +3370,6 @@ enum skl_disp_power_wells { #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) #define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -/* Haswell does have the CXT_SIZE register however it does not appear to be - * valid. Now, docs explain in dwords what is in the context object. The full - * size is 70720 bytes, however, the power context and execlist context will - * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size, including the extra state required for the - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. - */ -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) enum { INTEL_ADVANCED_CONTEXT = 0, diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c new file mode 100644 index 000000000000..0087acf731a8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_syncmap.c @@ -0,0 +1,412 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/slab.h> + +#include "i915_syncmap.h" + +#include "i915_gem.h" /* GEM_BUG_ON() */ +#include "i915_selftest.h" + +#define SHIFT ilog2(KSYNCMAP) +#define MASK (KSYNCMAP - 1) + +/* + * struct i915_syncmap is a layer of a radixtree that maps a u64 fence + * context id to the last u32 fence seqno waited upon from that context. + * Unlike lib/radixtree it uses a parent pointer that allows traversal back to + * the root. This allows us to access the whole tree via a single pointer + * to the most recently used layer. We expect fence contexts to be dense + * and most reuse to be on the same i915_gem_context but on neighbouring + * engines (i.e. on adjacent contexts) and reuse the same leaf, a very + * effective lookup cache. If the new lookup is not on the same leaf, we + * expect it to be on the neighbouring branch. + * + * A leaf holds an array of u32 seqno, and has height 0. The bitmap field + * allows us to store whether a particular seqno is valid (i.e. allows us + * to distinguish unset from 0). + * + * A branch holds an array of layer pointers, and has height > 0, and always + * has at least 2 layers (either branches or leaves) below it. + * + * For example, + * for x in + * 0 1 2 0x10 0x11 0x200 0x201 + * 0x500000 0x500001 0x503000 0x503001 + * 0xE<<60: + * i915_syncmap_set(&sync, x, lower_32_bits(x)); + * will build a tree like: + * 0xXXXXXXXXXXXXXXXX + * 0-> 0x0000000000XXXXXX + * | 0-> 0x0000000000000XXX + * | | 0-> 0x00000000000000XX + * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 + * | | | 1-> 0x000000000000001X 0:10, 1:11 + * | | 2-> 0x000000000000020X 0:200, 1:201 + * | 5-> 0x000000000050XXXX + * | 0-> 0x000000000050000X 0:500000, 1:500001 + * | 3-> 0x000000000050300X 0:503000, 1:503001 + * e-> 0xe00000000000000X e:e + */ + +struct i915_syncmap { + u64 prefix; + unsigned int height; + unsigned int bitmap; + struct i915_syncmap *parent; + /* + * Following this header is an array of either seqno or child pointers: + * union { + * u32 seqno[KSYNCMAP]; + * struct i915_syncmap *child[KSYNCMAP]; + * }; + */ +}; + +/** + * i915_syncmap_init -- initialise the #i915_syncmap + * @root - pointer to the #i915_syncmap + */ +void i915_syncmap_init(struct i915_syncmap **root) +{ + BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); + BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); + BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap)); + *root = NULL; +} + +static inline u32 *__sync_seqno(struct i915_syncmap *p) +{ + GEM_BUG_ON(p->height); + return (u32 *)(p + 1); +} + +static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) +{ + GEM_BUG_ON(!p->height); + return (struct i915_syncmap **)(p + 1); +} + +static inline unsigned int +__sync_branch_idx(const struct i915_syncmap *p, u64 id) +{ + return (id >> p->height) & MASK; +} + +static inline unsigned int +__sync_leaf_idx(const struct i915_syncmap *p, u64 id) +{ + GEM_BUG_ON(p->height); + return id & MASK; +} + +static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) +{ + return id >> p->height >> SHIFT; +} + +static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) +{ + GEM_BUG_ON(p->height); + return id >> SHIFT; +} + +static inline bool seqno_later(u32 a, u32 b) +{ + return (s32)(a - b) >= 0; +} + +/** + * i915_syncmap_is_later -- compare against the last know sync point + * @root - pointer to the #i915_syncmap + * @id - the context id (other timeline) we are synchronising to + * @seqno - the sequence number along the other timeline + * + * If we have already synchronised this @root timeline with another (@id) then + * we can omit any repeated or earlier synchronisation requests. If the two + * timelines are already coupled, we can also omit the dependency between the + * two as that is already known via the timeline. + * + * Returns true if the two timelines are already synchronised wrt to @seqno, + * false if not and the synchronisation must be emitted. + */ +bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p; + unsigned int idx; + + p = *root; + if (!p) + return false; + + if (likely(__sync_leaf_prefix(p, id) == p->prefix)) + goto found; + + /* First climb the tree back to a parent branch */ + do { + p = p->parent; + if (!p) + return false; + + if (__sync_branch_prefix(p, id) == p->prefix) + break; + } while (1); + + /* And then descend again until we find our leaf */ + do { + if (!p->height) + break; + + p = __sync_child(p)[__sync_branch_idx(p, id)]; + if (!p) + return false; + + if (__sync_branch_prefix(p, id) != p->prefix) + return false; + } while (1); + + *root = p; +found: + idx = __sync_leaf_idx(p, id); + if (!(p->bitmap & BIT(idx))) + return false; + + return seqno_later(__sync_seqno(p)[idx], seqno); +} + +static struct i915_syncmap * +__sync_alloc_leaf(struct i915_syncmap *parent, u64 id) +{ + struct i915_syncmap *p; + + p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); + if (unlikely(!p)) + return NULL; + + p->parent = parent; + p->height = 0; + p->bitmap = 0; + p->prefix = __sync_leaf_prefix(p, id); + return p; +} + +static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) +{ + unsigned int idx = __sync_leaf_idx(p, id); + + p->bitmap |= BIT(idx); + __sync_seqno(p)[idx] = seqno; +} + +static inline void __sync_set_child(struct i915_syncmap *p, + unsigned int idx, + struct i915_syncmap *child) +{ + p->bitmap |= BIT(idx); + __sync_child(p)[idx] = child; +} + +static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p = *root; + unsigned int idx; + + if (!p) { + p = __sync_alloc_leaf(NULL, id); + if (unlikely(!p)) + return -ENOMEM; + + goto found; + } + + /* Caller handled the likely cached case */ + GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); + + /* Climb back up the tree until we find a common prefix */ + do { + if (!p->parent) + break; + + p = p->parent; + + if (__sync_branch_prefix(p, id) == p->prefix) + break; + } while (1); + + /* + * No shortcut, we have to descend the tree to find the right layer + * containing this fence. + * + * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences + * or lower layers. Leaf nodes (height = 0) contain the fences, all + * other nodes (height > 0) are internal layers that point to a lower + * node. Each internal layer has at least 2 descendents. + * + * Starting at the top, we check whether the current prefix matches. If + * it doesn't, we have gone past our target and need to insert a join + * into the tree, and a new leaf node for the target as a descendent + * of the join, as well as the original layer. + * + * The matching prefix means we are still following the right branch + * of the tree. If it has height 0, we have found our leaf and just + * need to replace the fence slot with ourselves. If the height is + * not zero, our slot contains the next layer in the tree (unless + * it is empty, in which case we can add ourselves as a new leaf). + * As descend the tree the prefix grows (and height decreases). + */ + do { + struct i915_syncmap *next; + + if (__sync_branch_prefix(p, id) != p->prefix) { + unsigned int above; + + /* Insert a join above the current layer */ + next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), + GFP_KERNEL); + if (unlikely(!next)) + return -ENOMEM; + + /* Compute the height at which these two diverge */ + above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); + above = round_up(above, SHIFT); + next->height = above + p->height; + next->prefix = __sync_branch_prefix(next, id); + + /* Insert the join into the parent */ + if (p->parent) { + idx = __sync_branch_idx(p->parent, id); + __sync_child(p->parent)[idx] = next; + GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); + } + next->parent = p->parent; + + /* Compute the idx of the other branch, not our id! */ + idx = p->prefix >> (above - SHIFT) & MASK; + __sync_set_child(next, idx, p); + p->parent = next; + + /* Ascend to the join */ + p = next; + } else { + if (!p->height) + break; + } + + /* Descend into the next layer */ + GEM_BUG_ON(!p->height); + idx = __sync_branch_idx(p, id); + next = __sync_child(p)[idx]; + if (!next) { + next = __sync_alloc_leaf(p, id); + if (unlikely(!next)) + return -ENOMEM; + + __sync_set_child(p, idx, next); + p = next; + break; + } + + p = next; + } while (1); + +found: + GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); + __sync_set_seqno(p, id, seqno); + *root = p; + return 0; +} + +/** + * i915_syncmap_set -- mark the most recent syncpoint between contexts + * @root - pointer to the #i915_syncmap + * @id - the context id (other timeline) we have synchronised to + * @seqno - the sequence number along the other timeline + * + * When we synchronise this @root timeline with another (@id), we also know + * that we have synchronized with all previous seqno along that timeline. If + * we then have a request to synchronise with the same seqno or older, we can + * omit it, see i915_syncmap_is_later() + * + * Returns 0 on success, or a negative error code. + */ +int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p = *root; + + /* + * We expect to be called in sequence following is_later(id), which + * should have preloaded the root for us. + */ + if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { + __sync_set_seqno(p, id, seqno); + return 0; + } + + return __sync_set(root, id, seqno); +} + +static void __sync_free(struct i915_syncmap *p) +{ + if (p->height) { + unsigned int i; + + while ((i = ffs(p->bitmap))) { + p->bitmap &= ~0u << i; + __sync_free(__sync_child(p)[i - 1]); + } + } + + kfree(p); +} + +/** + * i915_syncmap_free -- free all memory associated with the syncmap + * @root - pointer to the #i915_syncmap + * + * Either when the timeline is to be freed and we no longer need the sync + * point tracking, or when the fences are all known to be signaled and the + * sync point tracking is redundant, we can free the #i915_syncmap to recover + * its allocations. + * + * Will reinitialise the @root pointer so that the #i915_syncmap is ready for + * reuse. + */ +void i915_syncmap_free(struct i915_syncmap **root) +{ + struct i915_syncmap *p; + + p = *root; + if (!p) + return; + + while (p->parent) + p = p->parent; + + __sync_free(p); + *root = NULL; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_syncmap.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h new file mode 100644 index 000000000000..0653f70bee82 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_syncmap.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SYNCMAP_H__ +#define __I915_SYNCMAP_H__ + +#include <linux/types.h> + +struct i915_syncmap; +#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */ + +void i915_syncmap_init(struct i915_syncmap **root); +int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno); +bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno); +void i915_syncmap_free(struct i915_syncmap **root); + +#endif /* __I915_SYNCMAP_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index f3fdfda5e558..1eef3fae4db3 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct drm_device *dev = &dev_priv->drm; struct i915_gem_context *ctx; - u32 *temp = NULL; /* Just here to make handling failures easy */ int slice = (int)(uintptr_t)attr->private; + u32 **remap_info; int ret; - if (!HAS_HW_CONTEXTS(dev_priv)) - return -ENXIO; - ret = l3_access_valid(dev_priv, offset); if (ret) return ret; @@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (ret) return ret; - if (!dev_priv->l3_parity.remap_info[slice]) { - temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!temp) { - mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + remap_info = &dev_priv->l3_parity.remap_info[slice]; + if (!*remap_info) { + *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!*remap_info) { + ret = -ENOMEM; + goto out; } } @@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj, * aren't propagated. Since I cannot find a stable way to reset the GPU * at this point it is left as a TODO. */ - if (temp) - dev_priv->l3_parity.remap_info[slice] = temp; - - memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count); + memcpy(*remap_info + (offset/4), buf, count); /* NB: We defer the remapping until we switch to the context */ list_for_each_entry(ctx, &dev_priv->context_list, link) ctx->remap_slice |= (1<<slice); + ret = count; + +out: mutex_unlock(&dev->struct_mutex); - return count; + return ret; } static struct bin_attribute dpf_attrs = { diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 52c207e81f41..d805b6e6fe71 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, (int) port, (int) pipe); } - switch (intel_encoder->type) { - case INTEL_OUTPUT_HDMI: - intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, - crtc_state->port_clock, - false, 0); - break; - case INTEL_OUTPUT_DP: - intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, - adjusted_mode->crtc_clock, - true, crtc_state->port_clock); - break; - default: - break; - } + intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld, + crtc_state->port_clock, + intel_encoder->type == INTEL_OUTPUT_DP); } /** @@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder) (int) port, (int) pipe); } - intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0); + intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false); } /** diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9ccbf26124c6..183afcb036aa 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -64,10 +64,12 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n", engine->name, __builtin_return_address(0), yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted))); + &engine->irq_posted)), + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine)); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } -void intel_engine_enable_signaling(struct drm_i915_gem_request *request) +void intel_engine_enable_signaling(struct drm_i915_gem_request *request, + bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; - bool first, wakeup; + bool first; u32 seqno; /* Note that we may be called from an interrupt handler on another @@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ - wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); + wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); /* Now insert ourselves into the retirement ordered list of signals * on this engine. We track the oldest seqno as that will be the diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index dd3ad52b7dfe..763010f8ad89 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk) static int glk_calc_cdclk(int max_pixclk) { - if (max_pixclk > 2 * 158400) + /* + * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk + * as a temporary workaround. Use a higher cdclk instead. (Note that + * intel_compute_max_dotclk() limits the max pixel clock to 99% of max + * cdclk.) + */ + if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100)) return 316800; - else if (max_pixclk > 2 * 79200) + else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100)) return 158400; else return 79200; @@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) int max_cdclk_freq = dev_priv->max_cdclk_freq; if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; + /* + * FIXME: Limiting to 99% as a temporary workaround. See + * glk_calc_cdclk() for details. + */ + return 2 * max_cdclk_freq * 99 / 100; else if (INTEL_INFO(dev_priv)->gen >= 9 || IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) return max_cdclk_freq; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2797bf37c3ac..84a1f5e85153 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -777,13 +777,6 @@ out: return ret; } -static int intel_crt_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - return 0; -} - void intel_crt_reset(struct drm_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->dev); @@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = { .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_crt_destroy, - .set_property = intel_crt_set_property, + .set_property = drm_atomic_helper_connector_set_property, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_get_property = intel_connector_atomic_get_property, }; static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3617927af269..85b9e2f521a0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4861,12 +4861,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { if (intel_crtc->overlay) { struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); mutex_lock(&dev->struct_mutex); - dev_priv->mm.interruptible = false; (void) intel_overlay_switch_off(intel_crtc->overlay); - dev_priv->mm.interruptible = true; mutex_unlock(&dev->struct_mutex); } @@ -9566,6 +9563,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, */ if (!crtc) { DRM_DEBUG_KMS("no pipe available for load-detect\n"); + ret = -ENODEV; goto fail; } @@ -9622,6 +9620,7 @@ found: DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); if (IS_ERR(fb)) { DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); + ret = PTR_ERR(fb); goto fail; } @@ -14934,6 +14933,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; + init_llist_head(&dev_priv->atomic_helper.free_list); INIT_WORK(&dev_priv->atomic_helper.free_work, intel_atomic_helper_free_state_worker); @@ -15561,13 +15561,6 @@ void intel_display_resume(struct drm_device *dev) if (state) state->acquire_ctx = &ctx; - /* - * This is a cludge because with real atomic modeset mode_config.mutex - * won't be taken. Unfortunately some probed state like - * audio_codec_enable is still protected by mode_config.mutex, so lock - * it here for now. - */ - mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); while (1) { @@ -15583,7 +15576,6 @@ void intel_display_resume(struct drm_device *dev) drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); - mutex_unlock(&dev->mode_config.mutex); if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ee77b519835c..08834f74d396 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -133,36 +133,55 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); -static int -intel_dp_max_link_bw(struct intel_dp *intel_dp) +static int intel_dp_num_rates(u8 link_bw_code) { - int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE]; - - switch (max_link_bw) { + switch (link_bw_code) { + default: + WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", + link_bw_code); case DP_LINK_BW_1_62: + return 1; case DP_LINK_BW_2_7: + return 2; case DP_LINK_BW_5_4: - break; - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - max_link_bw); - max_link_bw = DP_LINK_BW_1_62; - break; + return 3; } - return max_link_bw; } -static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp) +/* update sink rates from dpcd */ +static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - u8 source_max, sink_max; + int i, num_rates; - source_max = intel_dig_port->max_lanes; - sink_max = intel_dp->max_sink_lane_count; + num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + + for (i = 0; i < num_rates; i++) + intel_dp->sink_rates[i] = default_rates[i]; + + intel_dp->num_sink_rates = num_rates; +} + +/* Theoretical max between source and sink */ +static int intel_dp_max_common_rate(struct intel_dp *intel_dp) +{ + return intel_dp->common_rates[intel_dp->num_common_rates - 1]; +} + +/* Theoretical max between source and sink */ +static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + int source_max = intel_dig_port->max_lanes; + int sink_max = drm_dp_max_lane_count(intel_dp->dpcd); return min(source_max, sink_max); } +int intel_dp_max_lane_count(struct intel_dp *intel_dp) +{ + return intel_dp->max_link_lane_count; +} + int intel_dp_link_required(int pixel_clock, int bpp) { @@ -205,34 +224,25 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) return max_dotclk; } -static int -intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) -{ - if (intel_dp->num_sink_rates) { - *sink_rates = intel_dp->sink_rates; - return intel_dp->num_sink_rates; - } - - *sink_rates = default_rates; - - return (intel_dp->max_sink_link_bw >> 3) + 1; -} - -static int -intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) +static void +intel_dp_set_source_rates(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + const int *source_rates; int size; + /* This should only be done once */ + WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates); + if (IS_GEN9_LP(dev_priv)) { - *source_rates = bxt_rates; + source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); } else if (IS_GEN9_BC(dev_priv)) { - *source_rates = skl_rates; + source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { - *source_rates = default_rates; + source_rates = default_rates; size = ARRAY_SIZE(default_rates); } @@ -240,7 +250,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (!intel_dp_source_supports_hbr2(intel_dp)) size--; - return size; + intel_dp->source_rates = source_rates; + intel_dp->num_source_rates = size; } static int intersect_rates(const int *source_rates, int source_len, @@ -266,50 +277,83 @@ static int intersect_rates(const int *source_rates, int source_len, return k; } -static int intel_dp_common_rates(struct intel_dp *intel_dp, - int *common_rates) +/* return index of rate in rates array, or -1 if not found */ +static int intel_dp_rate_index(const int *rates, int len, int rate) { - const int *source_rates, *sink_rates; - int source_len, sink_len; + int i; - sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); - source_len = intel_dp_source_rates(intel_dp, &source_rates); + for (i = 0; i < len; i++) + if (rate == rates[i]) + return i; - return intersect_rates(source_rates, source_len, - sink_rates, sink_len, - common_rates); + return -1; } -static int intel_dp_link_rate_index(struct intel_dp *intel_dp, - int *common_rates, int link_rate) +static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { - int common_len; - int index; + WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates); + + intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates, + intel_dp->num_source_rates, + intel_dp->sink_rates, + intel_dp->num_sink_rates, + intel_dp->common_rates); - common_len = intel_dp_common_rates(intel_dp, common_rates); - for (index = 0; index < common_len; index++) { - if (link_rate == common_rates[common_len - index - 1]) - return common_len - index - 1; + /* Paranoia, there should always be something in common. */ + if (WARN_ON(intel_dp->num_common_rates == 0)) { + intel_dp->common_rates[0] = default_rates[0]; + intel_dp->num_common_rates = 1; } +} - return -1; +/* get length of common rates potentially limited by max_rate */ +static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp, + int max_rate) +{ + const int *common_rates = intel_dp->common_rates; + int i, common_len = intel_dp->num_common_rates; + + /* Limit results by potentially reduced max rate */ + for (i = 0; i < common_len; i++) { + if (common_rates[common_len - i - 1] <= max_rate) + return common_len - i; + } + + return 0; +} + +static bool intel_dp_link_params_valid(struct intel_dp *intel_dp) +{ + /* + * FIXME: we need to synchronize the current link parameters with + * hardware readout. Currently fast link training doesn't work on + * boot-up. + */ + if (intel_dp->link_rate == 0 || + intel_dp->link_rate > intel_dp->max_link_rate) + return false; + + if (intel_dp->lane_count == 0 || + intel_dp->lane_count > intel_dp_max_lane_count(intel_dp)) + return false; + + return true; } int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count) { - int common_rates[DP_MAX_SUPPORTED_RATES]; - int link_rate_index; + int index; - link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, - link_rate); - if (link_rate_index > 0) { - intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]); - intel_dp->max_sink_lane_count = lane_count; + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + link_rate); + if (index > 0) { + intel_dp->max_link_rate = intel_dp->common_rates[index - 1]; + intel_dp->max_link_lane_count = lane_count; } else if (lane_count > 1) { - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); - intel_dp->max_sink_lane_count = lane_count >> 1; + intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp); + intel_dp->max_link_lane_count = lane_count >> 1; } else { DRM_ERROR("Link Training Unsuccessful\n"); return -1; @@ -1486,24 +1530,21 @@ static void snprintf_int_array(char *str, size_t len, static void intel_dp_print_rates(struct intel_dp *intel_dp) { - const int *source_rates, *sink_rates; - int source_len, sink_len, common_len; - int common_rates[DP_MAX_SUPPORTED_RATES]; char str[128]; /* FIXME: too big for stack? */ if ((drm_debug & DRM_UT_KMS) == 0) return; - source_len = intel_dp_source_rates(intel_dp, &source_rates); - snprintf_int_array(str, sizeof(str), source_rates, source_len); + snprintf_int_array(str, sizeof(str), + intel_dp->source_rates, intel_dp->num_source_rates); DRM_DEBUG_KMS("source rates: %s\n", str); - sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); - snprintf_int_array(str, sizeof(str), sink_rates, sink_len); + snprintf_int_array(str, sizeof(str), + intel_dp->sink_rates, intel_dp->num_sink_rates); DRM_DEBUG_KMS("sink rates: %s\n", str); - common_len = intel_dp_common_rates(intel_dp, common_rates); - snprintf_int_array(str, sizeof(str), common_rates, common_len); + snprintf_int_array(str, sizeof(str), + intel_dp->common_rates, intel_dp->num_common_rates); DRM_DEBUG_KMS("common rates: %s\n", str); } @@ -1538,39 +1579,34 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp) return true; } -static int rate_to_index(int find, const int *rates) -{ - int i = 0; - - for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i) - if (find == rates[i]) - break; - - return i; -} - int intel_dp_max_link_rate(struct intel_dp *intel_dp) { - int rates[DP_MAX_SUPPORTED_RATES] = {}; int len; - len = intel_dp_common_rates(intel_dp, rates); + len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate); if (WARN_ON(len <= 0)) return 162000; - return rates[len - 1]; + return intel_dp->common_rates[len - 1]; } int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { - return rate_to_index(rate, intel_dp->sink_rates); + int i = intel_dp_rate_index(intel_dp->sink_rates, + intel_dp->num_sink_rates, rate); + + if (WARN_ON(i < 0)) + i = 0; + + return i; } void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, uint8_t *link_bw, uint8_t *rate_select) { - if (intel_dp->num_sink_rates) { + /* eDP 1.4 rate select method. */ + if (intel_dp->use_rate_select) { *link_bw = 0; *rate_select = intel_dp_rate_select(intel_dp, port_clock); @@ -1618,14 +1654,13 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; - int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; - int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int common_len; uint8_t link_bw, rate_select; - common_len = intel_dp_common_rates(intel_dp, common_rates); + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_link_rate); /* No common link rates between source and sink */ WARN_ON(common_len <= 0); @@ -1662,16 +1697,18 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Use values requested by Compliance Test Request */ if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, - intel_dp->compliance.test_link_rate); - if (link_rate_index >= 0) - min_clock = max_clock = link_rate_index; + int index; + + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + min_clock = max_clock = index; min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", - max_lane_count, common_rates[max_clock], + max_lane_count, intel_dp->common_rates[max_clock], adjusted_mode->crtc_clock); /* Walk through all bpp values. Luckily they're all nicely spaced with 2 @@ -1707,7 +1744,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, lane_count <= max_lane_count; lane_count <<= 1) { - link_clock = common_rates[clock]; + link_clock = intel_dp->common_rates[clock]; link_avail = intel_dp_max_data_rate(link_clock, lane_count); @@ -1739,7 +1776,7 @@ found: pipe_config->lane_count = lane_count; pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = common_rates[clock]; + pipe_config->port_clock = intel_dp->common_rates[clock]; intel_dp_compute_rate(intel_dp, pipe_config->port_clock, &link_bw, &rate_select); @@ -3642,9 +3679,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) uint8_t frame_sync_cap; dev_priv->psr.sink_support = true; - drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap, 1); + drm_dp_dpcd_readb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap); dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; /* PSR2 needs frame sync as well */ dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; @@ -3695,6 +3732,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } + if (intel_dp->num_sink_rates) + intel_dp->use_rate_select = true; + else + intel_dp_set_sink_rates(intel_dp); + + intel_dp_set_common_rates(intel_dp); + return true; } @@ -3702,11 +3746,18 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) static bool intel_dp_get_dpcd(struct intel_dp *intel_dp) { + u8 sink_count; + if (!intel_dp_read_dpcd(intel_dp)) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count, 1) < 0) + /* Don't clobber cached eDP rates. */ + if (!is_edp(intel_dp)) { + intel_dp_set_sink_rates(intel_dp); + intel_dp_set_common_rates(intel_dp); + } + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0) return false; /* @@ -3714,7 +3765,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * a member variable in intel_dp will track any changes * between short pulse interrupts. */ - intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count); /* * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that @@ -3743,7 +3794,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) static bool intel_dp_can_mst(struct intel_dp *intel_dp) { - u8 buf[1]; + u8 mstm_cap; if (!i915.enable_dp_mst) return false; @@ -3754,10 +3805,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp) if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1) return false; - return buf[0] & DP_MST_CAP; + return mstm_cap & DP_MST_CAP; } static void @@ -3903,9 +3954,8 @@ stop: static bool intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector) { - return drm_dp_dpcd_read(&intel_dp->aux, - DP_DEVICE_SERVICE_IRQ_VECTOR, - sink_irq_vector, 1) == 1; + return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, + sink_irq_vector) == 1; } static bool @@ -3926,7 +3976,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; int min_lane_count = 1; - int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int link_rate_index, test_link_rate; uint8_t test_lane_count, test_link_bw; /* (DP CTS 1.2) @@ -3943,7 +3992,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) test_lane_count &= DP_MAX_LANE_COUNT_MASK; /* Validate the requested lane count */ if (test_lane_count < min_lane_count || - test_lane_count > intel_dp->max_sink_lane_count) + test_lane_count > intel_dp->max_link_lane_count) return DP_TEST_NAK; status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, @@ -3954,9 +4003,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) } /* Validate the requested link rate */ test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); - link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, - test_link_rate); + link_rate_index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + test_link_rate); if (link_rate_index < 0) return DP_TEST_NAK; @@ -3969,13 +4018,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { uint8_t test_pattern; - uint16_t test_misc; + uint8_t test_misc; __be16 h_width, v_height; int status = 0; /* Read the TEST_PATTERN (DP CTS 3.1.5) */ - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, - &test_pattern, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern); if (status <= 0) { DRM_DEBUG_KMS("Test pattern read failed\n"); return DP_TEST_NAK; @@ -3997,8 +4046,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) return DP_TEST_NAK; } - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, - &test_misc, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0, + &test_misc); if (status <= 0) { DRM_DEBUG_KMS("TEST MISC read failed\n"); return DP_TEST_NAK; @@ -4057,10 +4106,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) */ block += intel_connector->detect_edid->extensions; - if (!drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_EDID_CHECKSUM, - &block->checksum, - 1)) + if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM, + block->checksum) <= 0) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; @@ -4224,9 +4271,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!to_intel_crtc(intel_encoder->base.crtc)->active) return; - /* FIXME: we need to synchronize this sort of stuff with hardware - * readout. Currently fast link training doesn't work on boot-up. */ - if (!intel_dp->lane_count) + /* + * Validate the cached values of intel_dp->link_rate and + * intel_dp->lane_count before attempting to retrain. + */ + if (!intel_dp_link_params_valid(intel_dp)) return; /* Retrain if Channel EQ or CR not ok */ @@ -4613,11 +4662,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(drm_dp_tps3_supported(intel_dp->dpcd))); if (intel_dp->reset_link_params) { - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + /* Initial max link lane count */ + intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + /* Initial max link rate */ + intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp); intel_dp->reset_link_params = false; } @@ -5127,7 +5176,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port) return intel_bios_is_port_edp(dev_priv, port); } -void +static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -5932,6 +5981,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) } } +static void intel_dp_modeset_retry_work_fn(struct work_struct *work) +{ + struct intel_connector *intel_connector; + struct drm_connector *connector; + + intel_connector = container_of(work, typeof(*intel_connector), + modeset_retry_work); + connector = &intel_connector->base; + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, + connector->name); + + /* Grab the locks before changing connector property*/ + mutex_lock(&connector->dev->mode_config.mutex); + /* Set connector link status to BAD and send a Uevent to notify + * userspace to do a modeset. + */ + drm_mode_connector_set_link_status_property(connector, + DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); + /* Send Hotplug uevent so userspace can reprobe */ + drm_kms_helper_hotplug_event(connector->dev); +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5944,11 +6016,17 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, enum port port = intel_dig_port->port; int type; + /* Initialize the work for modeset in case of link train failure */ + INIT_WORK(&intel_connector->modeset_retry_work, + intel_dp_modeset_retry_work_fn); + if (WARN(intel_dig_port->max_lanes < 1, "Not enough lanes (%d) for DP on port %c\n", intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp_set_source_rates(intel_dp); + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 0048b520baf7..b79c1c0e404c 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); - if (intel_dp->num_sink_rates) + /* eDP 1.4 rate select method. */ + if (!link_bw) drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, &rate_select, 1); @@ -313,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp) void intel_dp_start_link_train(struct intel_dp *intel_dp) { - intel_dp_link_training_clock_recovery(intel_dp); - intel_dp_link_training_channel_equalization(intel_dp); + struct intel_connector *intel_connector = intel_dp->attached_connector; + + if (!intel_dp_link_training_clock_recovery(intel_dp)) + goto failure_handling; + if (!intel_dp_link_training_channel_equalization(intel_dp)) + goto failure_handling; + + DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d", + intel_dp->link_rate, intel_dp->lane_count); + return; + + failure_handling: + DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d", + intel_dp->link_rate, intel_dp->lane_count); + if (!intel_dp_get_link_train_fallback_values(intel_dp, + intel_dp->link_rate, + intel_dp->lane_count)) + /* Schedule a Hotplug Uevent to userspace to start modeset */ + schedule_work(&intel_connector->modeset_retry_work); + return; } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c1f62eb07c07..5af22a7c11bf 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -56,7 +56,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. */ - lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + lane_count = intel_dp_max_lane_count(intel_dp); pipe_config->lane_count = lane_count; @@ -294,14 +294,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); } -static int -intel_dp_mst_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - return 0; -} - static void intel_dp_mst_connector_destroy(struct drm_connector *connector) { @@ -318,8 +310,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = intel_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_dp_mst_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_dp_mst_connector_destroy, @@ -343,7 +334,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int max_rate, mode_rate, max_lanes, max_link_clock; max_link_clock = intel_dp_max_link_rate(intel_dp); - max_lanes = drm_dp_max_lane_count(intel_dp->dpcd); + max_lanes = intel_dp_max_lane_count(intel_dp); max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); mode_rate = intel_dp_link_required(mode->clock, bpp); @@ -459,7 +450,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_mode_connector_attach_encoder(&intel_connector->base, &intel_dp->mst_encoders[i]->base.base); } - intel_dp_add_properties(intel_dp, connector); drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index aaee3949a422..54f3ff840812 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -88,7 +88,6 @@ int cpu, ret, timeout = (US) * 1000; \ u64 base; \ _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - BUILD_BUG_ON((US) > 50000); \ if (!(ATOMIC)) { \ preempt_disable(); \ cpu = smp_processor_id(); \ @@ -130,8 +129,14 @@ ret__; \ }) -#define wait_for_atomic(COND, MS) _wait_for_atomic((COND), (MS) * 1000, 1) -#define wait_for_atomic_us(COND, US) _wait_for_atomic((COND), (US), 1) +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) @@ -321,6 +326,9 @@ struct intel_connector { void *port; /* store this opaque as its illegal to dereference it */ struct intel_dp *mst_port; + + /* Work struct to schedule a uevent on link train failure */ + struct work_struct modeset_retry_work; }; struct dpll { @@ -949,13 +957,20 @@ struct intel_dp { uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; - /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ - uint8_t num_sink_rates; + /* source rates */ + int num_source_rates; + const int *source_rates; + /* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */ + int num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; - /* Max lane count for the sink as per DPCD registers */ - uint8_t max_sink_lane_count; - /* Max link BW for the sink as per DPCD registers */ - int max_sink_link_bw; + bool use_rate_select; + /* intersection of source and sink rates */ + int num_common_rates; + int common_rates[DP_MAX_SUPPORTED_RATES]; + /* Max lane count for the current link */ + int max_link_lane_count; + /* Max rate for the current link */ + int max_link_rate; /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; @@ -1492,10 +1507,10 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp); void intel_edp_panel_vdd_on(struct intel_dp *intel_dp); void intel_edp_panel_on(struct intel_dp *intel_dp); void intel_edp_panel_off(struct intel_dp *intel_dp); -void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector); void intel_dp_mst_suspend(struct drm_device *dev); void intel_dp_mst_resume(struct drm_device *dev); int intel_dp_max_link_rate(struct intel_dp *intel_dp); +int intel_dp_max_lane_count(struct intel_dp *intel_dp); int intel_dp_rate_select(struct intel_dp *intel_dp, int rate); void intel_dp_hot_plug(struct intel_encoder *intel_encoder); void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 3ffe8b1f1d48..fc0ef492252a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* Wait for ULPS Not active */ + /* Wait for ULPS active */ if (intel_wait_for_register(dev_priv, - MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, - GLK_ULPS_NOT_ACTIVE, 20)) - DRM_ERROR("ULPS is still active\n"); + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20)) + DRM_ERROR("ULPS not active\n"); /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 6025839ed3b7..c1544a53095d 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = { .early_unregister = intel_connector_unregister, .destroy = intel_dvo_destroy, .fill_modes = drm_helper_probe_single_connector_modes, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, }; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 854e8e0c836b..6d3d83876da9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,69 +26,177 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" -static const struct engine_info { +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +/* Same as Haswell, but 72064 bytes now. */ +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + +struct engine_class_info { const char *name; - unsigned int exec_id; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); +}; + +static const struct engine_class_info intel_engine_classes[] = { + [RENDER_CLASS] = { + .name = "rcs", + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + }, + [COPY_ENGINE_CLASS] = { + .name = "bcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + }, + [VIDEO_DECODE_CLASS] = { + .name = "vcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + }, + [VIDEO_ENHANCEMENT_CLASS] = { + .name = "vecs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + }, +}; + +struct engine_info { unsigned int hw_id; + unsigned int uabi_id; + u8 class; + u8 instance; u32 mmio_base; unsigned irq_shift; - int (*init_legacy)(struct intel_engine_cs *engine); - int (*init_execlists)(struct intel_engine_cs *engine); -} intel_engines[] = { +}; + +static const struct engine_info intel_engines[] = { [RCS] = { - .name = "rcs", .hw_id = RCS_HW, - .exec_id = I915_EXEC_RENDER, + .uabi_id = I915_EXEC_RENDER, + .class = RENDER_CLASS, + .instance = 0, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init_execlists = logical_render_ring_init, - .init_legacy = intel_init_render_ring_buffer, }, [BCS] = { - .name = "bcs", .hw_id = BCS_HW, - .exec_id = I915_EXEC_BLT, + .uabi_id = I915_EXEC_BLT, + .class = COPY_ENGINE_CLASS, + .instance = 0, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_blt_ring_buffer, }, [VCS] = { - .name = "vcs", .hw_id = VCS_HW, - .exec_id = I915_EXEC_BSD, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 0, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { - .name = "vcs2", .hw_id = VCS2_HW, - .exec_id = I915_EXEC_BSD, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 1, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd2_ring_buffer, }, [VECS] = { - .name = "vecs", .hw_id = VECS_HW, - .exec_id = I915_EXEC_VEBOX, + .uabi_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 0, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_vebox_ring_buffer, }, }; +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +{ + u32 cxt_size; + + BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); + + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return i915.enable_execlists ? + GEN8_LR_CONTEXT_RENDER_SIZE : + GEN8_CXT_TOTAL_SIZE; + case 7: + if (IS_HASWELL(dev_priv)) + return HSW_CXT_TOTAL_SIZE; + + cxt_size = I915_READ(GEN7_CXT_SIZE); + return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 6: + cxt_size = I915_READ(CXT_SIZE); + return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 5: + case 4: + case 3: + case 2: + /* For the special day when i810 gets merged. */ + case 1: + return 0; + } + break; + default: + MISSING_CASE(class); + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + if (INTEL_GEN(dev_priv) < 8) + return 0; + return GEN8_LR_CONTEXT_OTHER_SIZE; + } +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; + const struct engine_class_info *class_info; struct intel_engine_cs *engine; + GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); + class_info = &intel_engine_classes[info->class]; + GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) @@ -96,11 +204,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; - engine->name = info->name; - engine->exec_id = info->exec_id; + WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", + class_info->name, info->instance) >= + sizeof(engine->name)); + engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; + engine->class = info->class; + engine->instance = info->instance; + + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -112,18 +229,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init_early() - allocate the Engine Command Streamers + * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init_early(struct drm_i915_private *dev_priv) +int intel_engines_init_mmio(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; - unsigned int mask = 0; + const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned int mask = 0; unsigned int i; int err; @@ -150,6 +267,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv) if (WARN_ON(mask != ring_mask)) device_info->ring_mask = mask; + /* We always presume we have at least RCS available for later probing */ + if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { + err = -ENODEV; + goto cleanup; + } + device_info->num_rings = hweight32(mask); return 0; @@ -161,7 +284,7 @@ cleanup: } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init() - init the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. @@ -175,12 +298,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv) int err = 0; for_each_engine(engine, dev_priv, id) { + const struct engine_class_info *class_info = + &intel_engine_classes[engine->class]; int (*init)(struct intel_engine_cs *engine); if (i915.enable_execlists) - init = intel_engines[id].init_execlists; + init = class_info->init_execlists; else - init = intel_engines[id].init_legacy; + init = class_info->init_legacy; if (!init) { kfree(engine); dev_priv->engine[id] = NULL; @@ -223,6 +348,9 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(!intel_engine_is_idle(engine)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + /* Our semaphore implementation is strictly monotonic (i.e. we proceed * so long as the semaphore value in the register/page is greater * than the sync value), so whenever we reset the seqno, @@ -253,13 +381,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->hangcheck.seqno = seqno; - /* After manually advancing the seqno, fake the interrupt in case * there are any waiters for that seqno. */ intel_engine_wakeup(engine); + + GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } static void intel_engine_init_timeline(struct intel_engine_cs *engine) @@ -1086,11 +1213,18 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + /* More white lies, if wedged, hw state is inconsistent */ + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return true; + /* Any inflight/incomplete requests? */ if (!i915_seqno_passed(intel_engine_get_seqno(engine), intel_engine_last_submit(engine))) return false; + if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) + return true; + /* Interrupt/tasklet pending? */ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index cb36cbf3818f..6156845641a3 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,8 +23,8 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GFXCORE_FAMILY_GEN9 12 -#define GFXCORE_FAMILY_UNKNOWN 0x7fffffff +#define GUC_CORE_FAMILY_GEN9 12 +#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 8a1a023e48b2..d9045b6e897b 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -61,6 +61,9 @@ #define KBL_FW_MAJOR 9 #define KBL_FW_MINOR 14 +#define GLK_FW_MAJOR 10 +#define GLK_FW_MINOR 56 + #define GUC_FW_PATH(platform, major, minor) \ "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" @@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); +#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) + static u32 get_gttype(struct drm_i915_private *dev_priv) { @@ -86,11 +91,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv) switch (gen) { case 9: - return GFXCORE_FAMILY_GEN9; + return GUC_CORE_FAMILY_GEN9; default: - WARN(1, "GEN%d does not support GuC operation!\n", gen); - return GFXCORE_FAMILY_UNKNOWN; + MISSING_CASE(gen); + return GUC_CORE_FAMILY_UNKNOWN; } } @@ -280,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE); - /* Enable MIA caching. GuC clock gating is disabled. */ I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); @@ -405,6 +406,10 @@ int intel_guc_select_fw(struct intel_guc *guc) guc->fw.path = I915_KBL_GUC_UCODE; guc->fw.major_ver_wanted = KBL_FW_MAJOR; guc->fw.minor_ver_wanted = KBL_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + guc->fw.path = I915_GLK_GUC_UCODE; + guc->fw.major_ver_wanted = GLK_FW_MAJOR; + guc->fw.minor_ver_wanted = GLK_FW_MINOR; } else { DRM_ERROR("No GuC firmware known for platform with GuC!\n"); return -ENOENT; diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 6fb63a3c65b0..16d3b8719cab 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc) void *vaddr; struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; - int ret = 0; + int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); GEM_BUG_ON(guc_log_has_runtime(guc)); + ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true); + if (ret) + return ret; + /* Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data * (up-to-date) from memory. diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index dce742243ba6..9b0ece427bdc 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, msg); + return i915_handle_error(i915, hung, "%s", msg); } /* diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1d623b5e09d6..52f0b2d5fad2 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) return false; } + /* Display Wa #1139 */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) && + crtc_state->base.adjusted_mode.htotal > 5460) + return false; + return true; } @@ -1392,7 +1397,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, } if (!pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp); + DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp); pipe_config->pipe_bpp = desired_bpp; } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 9ee819666a4c..88b4cf3f764a 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -52,6 +52,10 @@ #define KBL_HUC_FW_MINOR 00 #define KBL_BLD_NUM 1810 +#define GLK_HUC_FW_MAJOR 01 +#define GLK_HUC_FW_MINOR 07 +#define GLK_BLD_NUM 1748 + #define HUC_FW_PATH(platform, major, minor, bld_num) \ "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ __stringify(minor) "_" __stringify(bld_num) ".bin" @@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE); KBL_HUC_FW_MINOR, KBL_BLD_NUM) MODULE_FIRMWARE(I915_KBL_HUC_UCODE); +#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ + GLK_HUC_FW_MINOR, GLK_BLD_NUM) + /** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device @@ -99,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE | - HUC_LOADING_AGENT_GUC); - /* Set the source address for the uCode */ offset = guc_ggtt_offset(vma) + huc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); @@ -169,6 +171,10 @@ void intel_huc_select_fw(struct intel_huc *huc) huc->fw.path = I915_KBL_HUC_UCODE; huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + huc->fw.path = I915_GLK_HUC_UCODE; + huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; } else { DRM_ERROR("No HuC firmware known for platform with HuC!\n"); return; @@ -186,68 +192,36 @@ void intel_huc_select_fw(struct intel_huc *huc) * earlier call to intel_huc_init(), so here we need only check that * is succeeded, and then transfer the image to the h/w. * - * Return: non-zero code on error */ -int intel_huc_init_hw(struct intel_huc *huc) +void intel_huc_init_hw(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); int err; - if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE) - return 0; - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", huc->fw.path, intel_uc_fw_status_repr(huc->fw.fetch_status), intel_uc_fw_status_repr(huc->fw.load_status)); - if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS && - huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL) - return -ENOEXEC; + if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return; huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - switch (huc->fw.fetch_status) { - case INTEL_UC_FIRMWARE_FAIL: - /* something went wrong :( */ - err = -EIO; - goto fail; - - case INTEL_UC_FIRMWARE_NONE: - case INTEL_UC_FIRMWARE_PENDING: - default: - /* "can't happen" */ - WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - huc->fw.fetch_status); - err = -ENXIO; - goto fail; - - case INTEL_UC_FIRMWARE_SUCCESS: - break; - } - err = huc_ucode_xfer(dev_priv); - if (err) - goto fail; - huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; + huc->fw.load_status = err ? + INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", huc->fw.path, intel_uc_fw_status_repr(huc->fw.fetch_status), intel_uc_fw_status_repr(huc->fw.load_status)); - return 0; - -fail: - if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING) - huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; - - DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); + if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); - return err; + return; } /** diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 25d8e76489e4..3bf65288ffff 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -63,6 +63,7 @@ #include <linux/acpi.h> #include <linux/device.h> #include <linux/pci.h> +#include <linux/pm_runtime.h> #include "i915_drv.h" #include <linux/delay.h> @@ -110,6 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); + pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes; + pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */ + pdata->port[0].pipe = -1; + pdata->port[1].pipe = -1; + pdata->port[2].pipe = -1; spin_lock_init(&pdata->lpe_audio_slock); platdev = platform_device_register_full(&pinfo); @@ -121,6 +127,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) kfree(rsc); + pm_runtime_forbid(&platdev->dev); + pm_runtime_set_active(&platdev->dev); + pm_runtime_enable(&platdev->dev); + return platdev; err: @@ -144,44 +154,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) static void lpe_audio_irq_unmask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask &= ~val; - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - POSTING_READ(VLV_IMR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static void lpe_audio_irq_mask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask |= val; - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - POSTING_READ(VLV_IIR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static struct irq_chip lpe_audio_irqchip = { @@ -325,8 +301,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_irq_mask(&desc->irq_data); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); @@ -337,53 +311,47 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) * intel_lpe_audio_notify() - notify lpe audio event * audio driver and i915 * @dev_priv: the i915 drm device private data + * @pipe: pipe + * @port: port * @eld : ELD data - * @pipe: pipe id - * @port: port id - * @tmds_clk_speed: tmds clock frequency in Hz + * @ls_clock: Link symbol clock in kHz + * @dp_output: Driving a DP output? * * Notify lpe audio driver of eld change. */ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int tmds_clk_speed, - bool dp_output, int link_rate) + enum pipe pipe, enum port port, + const void *eld, int ls_clock, bool dp_output) { - unsigned long irq_flags; - struct intel_hdmi_lpe_audio_pdata *pdata = NULL; + unsigned long irqflags; + struct intel_hdmi_lpe_audio_pdata *pdata; + struct intel_hdmi_lpe_audio_port_pdata *ppdata; u32 audio_enable; if (!HAS_LPE_AUDIO(dev_priv)) return; - pdata = dev_get_platdata( - &(dev_priv->lpe_audio.platdev->dev)); + pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev); + ppdata = &pdata->port[port - PORT_B]; - spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags); + spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags); audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port)); if (eld != NULL) { - memcpy(pdata->eld.eld_data, eld, - HDMI_MAX_ELD_BYTES); - pdata->eld.port_id = port; - pdata->eld.pipe_id = pipe; - pdata->hdmi_connected = true; - - pdata->dp_output = dp_output; - if (tmds_clk_speed) - pdata->tmds_clock_speed = tmds_clk_speed; - if (link_rate) - pdata->link_rate = link_rate; + memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES); + ppdata->pipe = pipe; + ppdata->ls_clock = ls_clock; + ppdata->dp_output = dp_output; /* Unmute the amp for both DP and HDMI */ I915_WRITE(VLV_AUD_PORT_EN_DBG(port), audio_enable & ~VLV_AMP_MUTE); - } else { - memset(pdata->eld.eld_data, 0, - HDMI_MAX_ELD_BYTES); - pdata->hdmi_connected = false; - pdata->dp_output = false; + memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES); + ppdata->pipe = -1; + ppdata->ls_clock = 0; + ppdata->dp_output = false; /* Mute the amp for both DP and HDMI */ I915_WRITE(VLV_AUD_PORT_EN_DBG(port), @@ -391,10 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, } if (pdata->notify_audio_lpe) - pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev); - else - pdata->notify_pending = true; + pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B); - spin_unlock_irqrestore(&pdata->lpe_audio_slock, - irq_flags); + spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8f7c631fc1f..0909549ad320 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -138,10 +138,6 @@ #include "i915_drv.h" #include "intel_mocs.h" -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) @@ -326,8 +322,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; - assert_ring_tail_valid(rq->ring, rq->tail); - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -515,6 +510,15 @@ static void intel_lrc_irq_handler(unsigned long data) struct execlist_port *port = engine->execlist_port; struct drm_i915_private *dev_priv = engine->i915; + /* We can skip acquiring intel_runtime_pm_get() here as it was taken + * on our behalf by the request (see i915_gem_mark_busy()) and it will + * not be relinquished until the device is idle (see + * i915_gem_idle_work_handler()). As a precaution, we make sure + * that all ELSP are drained i.e. we have processed the CSB, + * before allowing ourselves to idle and calling intel_runtime_pm_put(). + */ + GEM_BUG_ON(!dev_priv->gt.awake); + intel_uncore_forcewake_get(dev_priv, engine->fw_domains); /* Prefer doing test_and_clear_bit() as a two stage operation to avoid @@ -771,7 +775,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine, goto unpin_vma; } - ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias); + ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias); if (ret) goto unpin_map; @@ -1139,14 +1143,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u32 port_seqno(struct execlist_port *port) -{ - return port->request ? port->request->global_seqno : 0; -} - static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + unsigned int n; int ret; ret = intel_mocs_init_engine(engine); @@ -1167,16 +1168,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { - DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", - engine->name, - port_seqno(&engine->execlist_port[0]), - port_seqno(&engine->execlist_port[1])); - engine->execlist_port[0].count = 0; - engine->execlist_port[1].count = 0; - execlists_submit_ports(engine); + + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + if (!port[n].request) + break; + + DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", + engine->name, n, + port[n].request->global_seqno); + + /* Discard the current inflight count */ + port[n].count = 0; } + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) + execlists_submit_ports(engine); + return 0; } @@ -1907,44 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, return 0; } -/** - * intel_lr_context_size() - return the size of the context for an engine - * @engine: which engine to find the context size for - * - * Each engine may require a different amount of space for a context image, - * so when allocating (or copying) an image, this function can be used to - * find the right size for the specific engine. - * - * Return: size (in bytes) of an engine-specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) -{ - int ret = 0; - - WARN_ON(INTEL_GEN(engine->i915) < 8); - - switch (engine->id) { - case RCS: - if (INTEL_GEN(engine->i915) >= 9) - ret = GEN9_LR_CONTEXT_RENDER_SIZE; - else - ret = GEN8_LR_CONTEXT_RENDER_SIZE; - break; - case VCS: - case BCS: - case VECS: - case VCS2: - ret = GEN8_LR_CONTEXT_OTHER_SIZE; - break; - } - - return ret; -} - static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1957,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), - I915_GTT_PAGE_SIZE); + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; @@ -2036,8 +2004,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = ce->ring->tail = 0; - intel_ring_update_space(ce->ring); + intel_ring_reset(ce->ring, 0); } } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index e8015e7bf4e9..52b3a1fd4059 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_i915_private; struct i915_gem_context; -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); - void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 206ee4f0150e..647426c75b0a 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); struct intel_crtc_state *pipe_config; struct drm_atomic_state *state; + struct drm_modeset_acquire_ctx ctx; int ret = 0; - drm_modeset_lock_all(dev); + drm_modeset_acquire_init(&ctx, 0); + state = drm_atomic_state_alloc(dev); if (!state) { ret = -ENOMEM; goto unlock; } - state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx; + state->acquire_ctx = &ctx; + +retry: pipe_config = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(pipe_config)) { ret = PTR_ERR(pipe_config); @@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, ret = drm_atomic_commit(state); put_state: + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + drm_modeset_backoff(&ctx); + goto retry; + } + drm_atomic_state_put(state); unlock: WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret); - drm_modeset_unlock_all(dev); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 570bd603f401..cacb65fa2dd5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1029,7 +1029,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!plane_state->base.visible) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; cpp = plane_state->base.fb->format->cpp[0]; @@ -1039,7 +1039,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, if (WARN_ON(htotal == 0)) htotal = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { + if (plane->id == PLANE_CURSOR) { /* * FIXME the formula gives values that are * too big for the cursor FIFO, and hence we @@ -1203,7 +1203,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, int level; bool dirty = false; - if (!plane_state->base.visible) { + if (!intel_wm_plane_visible(crtc_state, plane_state)) { dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); goto out; } @@ -8135,9 +8135,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val I915_WRITE_FW(GEN6_PCODE_DATA1, 0); I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (intel_wait_for_register_fw(dev_priv, - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500)) { + if (__intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500, 0, NULL)) { DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); return -ETIMEDOUT; } @@ -8180,9 +8180,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN6_PCODE_DATA1, 0); I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (intel_wait_for_register_fw(dev_priv, - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500)) { + if (__intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500, 0, NULL)) { DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); return -ETIMEDOUT; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 66a2b8b83972..29b5afac7856 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); } static int @@ -538,9 +538,9 @@ static int init_ring_common(struct intel_engine_cs *engine) I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); /* If the head is still not zero, the ring is dead */ - if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base), - RING_VALID, RING_VALID, - 50)) { + if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 50)) { DRM_ERROR("%s initialization failed " "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", engine->name, @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); - assert_ring_tail_valid(request->ring, request->tail); - I915_WRITE_TAIL(request->engine, request->tail); + I915_WRITE_TAIL(request->engine, + intel_ring_set_tail(request->ring, request->tail)); } static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) @@ -1259,6 +1259,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(engine->id != RCS); + dev_priv->status_page_dmah = drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); if (!dev_priv->status_page_dmah) @@ -1270,17 +1272,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) +int intel_ring_pin(struct intel_ring *ring, + struct drm_i915_private *i915, + unsigned int offset_bias) { - unsigned int flags; - enum i915_map_type map; + enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; struct i915_vma *vma = ring->vma; + unsigned int flags; void *addr; int ret; GEM_BUG_ON(ring->vaddr); - map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; flags = PIN_GLOBAL; if (offset_bias) @@ -1316,11 +1319,23 @@ err: return PTR_ERR(addr); } +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + GEM_BUG_ON(!list_empty(&ring->request_list)); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + void intel_ring_unpin(struct intel_ring *ring) { GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->tail); + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else @@ -1338,7 +1353,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) obj = i915_gem_object_create_stolen(dev_priv, size); if (!obj) - obj = i915_gem_object_create(dev_priv, size); + obj = i915_gem_object_create_internal(dev_priv, size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1369,8 +1384,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) if (!ring) return ERR_PTR(-ENOMEM); - ring->engine = engine; - INIT_LIST_HEAD(&ring->request_list); ring->size = size; @@ -1424,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx) PIN_GLOBAL | PIN_HIGH); } +static struct i915_vma * +alloc_context_vma(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create(i915, engine->context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Try to make the context utilize L3 as well as LLC. + * + * On VLV we don't have L3 controls in the PTEs so we + * shouldn't touch the cache level, especially as that + * would make the object snooped which might have a + * negative performance impact. + * + * Snooping is required on non-llc platforms in execlist + * mode, but since all GGTT accesses use PAT entry 0 we + * get snooping anyway regardless of cache_level. + * + * This is only applicable for Ivy Bridge devices since + * later platforms don't have L3 control bits in the PTE. + */ + if (IS_IVYBRIDGE(i915)) { + /* Ignore any error, regard it as a simple optimisation */ + i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); + } + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + static int intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { @@ -1436,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + if (!ce->state && engine->context_size) { + struct i915_vma *vma; + + vma = alloc_context_vma(engine); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error; + } + + ce->state = vma; + } + if (ce->state) { ret = context_pin(ctx); if (ret) @@ -1481,78 +1544,70 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring; - int ret; - - WARN_ON(engine->buffer); + int err; intel_engine_setup_common(engine); - ret = intel_engine_init_common(engine); - if (ret) - goto error; + err = intel_engine_init_common(engine); + if (err) + goto err; + + if (HWS_NEEDS_PHYSICAL(engine->i915)) + err = init_phys_status_page(engine); + else + err = init_status_page(engine); + if (err) + goto err; ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error; - } - - if (HWS_NEEDS_PHYSICAL(dev_priv)) { - WARN_ON(engine->id != RCS); - ret = init_phys_status_page(engine); - if (ret) - goto error; - } else { - ret = init_status_page(engine); - if (ret) - goto error; + err = PTR_ERR(ring); + goto err_hws; } /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); - if (ret) { - intel_ring_free(ring); - goto error; - } + err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); + if (err) + goto err_ring; + + GEM_BUG_ON(engine->buffer); engine->buffer = ring; return 0; -error: - intel_engine_cleanup(engine); - return ret; +err_ring: + intel_ring_free(ring); +err_hws: + if (HWS_NEEDS_PHYSICAL(engine->i915)) + cleanup_phys_status_page(engine); + else + cleanup_status_page(engine); +err: + intel_engine_cleanup_common(engine); + return err; } void intel_engine_cleanup(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv; + struct drm_i915_private *dev_priv = engine->i915; - dev_priv = engine->i915; + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (I915_READ_MODE(engine) & MODE_IDLE) == 0); - if (engine->buffer) { - WARN_ON(INTEL_GEN(dev_priv) > 2 && - (I915_READ_MODE(engine) & MODE_IDLE) == 0); - - intel_ring_unpin(engine->buffer); - intel_ring_free(engine->buffer); - engine->buffer = NULL; - } + intel_ring_unpin(engine->buffer); + intel_ring_free(engine->buffer); if (engine->cleanup) engine->cleanup(engine); - if (HWS_NEEDS_PHYSICAL(dev_priv)) { - WARN_ON(engine->id != RCS); + if (HWS_NEEDS_PHYSICAL(dev_priv)) cleanup_phys_status_page(engine); - } else { + else cleanup_status_page(engine); - } intel_engine_cleanup_common(engine); - engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); } @@ -1562,8 +1617,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + /* Restart from the beginning of the rings for convenience */ for_each_engine(engine, dev_priv, id) - engine->buffer->head = engine->buffer->tail; + intel_ring_reset(engine->buffer, 0); } static int ring_request_alloc(struct drm_i915_gem_request *request) @@ -1616,7 +1672,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) unsigned space; /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ring->tail, + space = __intel_ring_space(target->postfix, ring->emit, ring->size); if (space >= bytes) break; @@ -1641,8 +1697,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; - int remain_actual = ring->size - ring->tail; - int remain_usable = ring->effective_size - ring->tail; + int remain_actual = ring->size - ring->emit; + int remain_usable = ring->effective_size - ring->emit; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -1678,17 +1734,18 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (unlikely(need_wrap)) { GEM_BUG_ON(remain_actual > ring->space); - GEM_BUG_ON(ring->tail + remain_actual > ring->size); + GEM_BUG_ON(ring->emit + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->tail, 0, remain_actual); - ring->tail = 0; + memset(ring->vaddr + ring->emit, 0, remain_actual); + ring->emit = 0; ring->space -= remain_actual; } - GEM_BUG_ON(ring->tail > ring->size - bytes); - cs = ring->vaddr + ring->tail; - ring->tail += bytes; + GEM_BUG_ON(ring->emit > ring->size - bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes)); + ring->emit += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); @@ -1699,7 +1756,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { int num_dwords = - (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); u32 *cs; if (num_dwords == 0) @@ -1736,11 +1793,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); /* Wait for the ring not to be idle, i.e. for it to wake up. */ - if (intel_wait_for_register_fw(dev_priv, - GEN6_BSD_SLEEP_PSMI_CONTROL, - GEN6_BSD_SLEEP_INDICATOR, - 0, - 50)) + if (__intel_wait_for_register_fw(dev_priv, + GEN6_BSD_SLEEP_PSMI_CONTROL, + GEN6_BSD_SLEEP_INDICATOR, + 0, + 1000, 0, NULL)) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ @@ -2182,20 +2239,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) return intel_init_ring_buffer(engine); } -/** - * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) - */ -int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - intel_ring_default_vfuncs(dev_priv, engine); - - engine->emit_flush = gen6_bsd_ring_flush; - - return intel_init_ring_buffer(engine); -} - int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a82a0807f64d..02d741ef99ad 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -139,12 +139,11 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct intel_engine_cs *engine; - struct list_head request_list; u32 head; u32 tail; + u32 emit; int space; int size; @@ -189,15 +188,22 @@ enum intel_engine_id { VECS }; +#define INTEL_ENGINE_CS_MAX_NAME 8 + struct intel_engine_cs { struct drm_i915_private *i915; - const char *name; + char name[INTEL_ENGINE_CS_MAX_NAME]; enum intel_engine_id id; - unsigned int exec_id; + unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; - u32 mmio_base; + + u8 class; + u8 instance; + u32 context_size; + u32 mmio_base; unsigned int irq_shift; + struct intel_ring *buffer; struct intel_timeline *timeline; @@ -487,7 +493,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); +int intel_ring_pin(struct intel_ring *ring, + struct drm_i915_private *i915, + unsigned int offset_bias); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +void intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); @@ -511,7 +521,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); + GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); } static inline u32 @@ -540,7 +550,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) GEM_BUG_ON(tail >= ring->size); } -void intel_ring_update_space(struct intel_ring *ring); +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_gem_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); @@ -551,7 +573,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); -int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); @@ -652,7 +673,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_enable_signaling(struct drm_i915_gem_request *request, + bool wakeup); void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index e077c2a9e694..784df024e230 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -48,41 +48,6 @@ struct intel_tv { struct intel_encoder base; int type; - const char *tv_format; - int margin[4]; - u32 save_TV_H_CTL_1; - u32 save_TV_H_CTL_2; - u32 save_TV_H_CTL_3; - u32 save_TV_V_CTL_1; - u32 save_TV_V_CTL_2; - u32 save_TV_V_CTL_3; - u32 save_TV_V_CTL_4; - u32 save_TV_V_CTL_5; - u32 save_TV_V_CTL_6; - u32 save_TV_V_CTL_7; - u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3; - - u32 save_TV_CSC_Y; - u32 save_TV_CSC_Y2; - u32 save_TV_CSC_U; - u32 save_TV_CSC_U2; - u32 save_TV_CSC_V; - u32 save_TV_CSC_V2; - u32 save_TV_CLR_KNOBS; - u32 save_TV_CLR_LEVEL; - u32 save_TV_WIN_POS; - u32 save_TV_WIN_SIZE; - u32 save_TV_FILTER_CTL_1; - u32 save_TV_FILTER_CTL_2; - u32 save_TV_FILTER_CTL_3; - - u32 save_TV_H_LUMA[60]; - u32 save_TV_H_CHROMA[60]; - u32 save_TV_V_LUMA[43]; - u32 save_TV_V_CHROMA[43]; - - u32 save_TV_DAC; - u32 save_TV_CTL; }; struct video_levels { @@ -873,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder, I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE); } -static const struct tv_mode * -intel_tv_mode_lookup(const char *tv_format) +static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state) { - int i; - - for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { - const struct tv_mode *tv_mode = &tv_modes[i]; + int format = conn_state->tv.mode; - if (!strcmp(tv_format, tv_mode->name)) - return tv_mode; - } - return NULL; -} - -static const struct tv_mode * -intel_tv_mode_find(struct intel_tv *intel_tv) -{ - return intel_tv_mode_lookup(intel_tv->tv_format); + return &tv_modes[format]; } static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; if (mode->clock > max_dotclk) @@ -925,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_tv *intel_tv = enc_to_tv(encoder); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); if (!tv_mode) return false; @@ -1032,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); u32 tv_ctl; u32 scctl1, scctl2, scctl3; int i, j; @@ -1135,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, else ysize = 2*tv_mode->nbr_end + 1; - xpos += intel_tv->margin[TV_MARGIN_LEFT]; - ypos += intel_tv->margin[TV_MARGIN_TOP]; - xsize -= (intel_tv->margin[TV_MARGIN_LEFT] + - intel_tv->margin[TV_MARGIN_RIGHT]); - ysize -= (intel_tv->margin[TV_MARGIN_TOP] + - intel_tv->margin[TV_MARGIN_BOTTOM]); + xpos += conn_state->tv.margins.left; + ypos += conn_state->tv.margins.top; + xsize -= (conn_state->tv.margins.left + + conn_state->tv.margins.right); + ysize -= (conn_state->tv.margins.top + + conn_state->tv.margins.bottom); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1288,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, static void intel_tv_find_better_format(struct drm_connector *connector) { struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == @@ -1304,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - intel_tv->tv_format = tv_mode->name; - drm_object_property_set_value(&connector->base, - connector->dev->mode_config.tv_mode_property, i); + connector->state->tv.mode = i; } /** @@ -1347,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector, connector_status_connected; } else status = connector_status_unknown; - } else - return connector->status; - if (status != connector_status_connected) - return status; - - intel_tv->type = type; - intel_tv_find_better_format(connector); + if (status == connector_status_connected) { + intel_tv->type = type; + intel_tv_find_better_format(connector); + } - return connector_status_connected; + return status; + } else + return connector->status; } static const struct input_res { @@ -1376,12 +1323,9 @@ static const struct input_res { * Chose preferred mode according to line number of TV format */ static void -intel_tv_chose_preferred_modes(struct drm_connector *connector, +intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, struct drm_display_mode *mode_ptr) { - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); - if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; else if (tv_mode->nbr_end > 480) { @@ -1404,8 +1348,7 @@ static int intel_tv_get_modes(struct drm_connector *connector) { struct drm_display_mode *mode_ptr; - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int j, count = 0; u64 tmp; @@ -1448,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr->clock = (int) tmp; mode_ptr->type = DRM_MODE_TYPE_DRIVER; - intel_tv_chose_preferred_modes(connector, mode_ptr); + intel_tv_choose_preferred_modes(tv_mode, mode_ptr); drm_mode_probed_add(connector, mode_ptr); count++; } @@ -1463,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector) kfree(connector); } - -static int -intel_tv_set_property(struct drm_connector *connector, struct drm_property *property, - uint64_t val) -{ - struct drm_device *dev = connector->dev; - struct intel_tv *intel_tv = intel_attached_tv(connector); - struct drm_crtc *crtc = intel_tv->base.base.crtc; - int ret = 0; - bool changed = false; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret < 0) - goto out; - - if (property == dev->mode_config.tv_left_margin_property && - intel_tv->margin[TV_MARGIN_LEFT] != val) { - intel_tv->margin[TV_MARGIN_LEFT] = val; - changed = true; - } else if (property == dev->mode_config.tv_right_margin_property && - intel_tv->margin[TV_MARGIN_RIGHT] != val) { - intel_tv->margin[TV_MARGIN_RIGHT] = val; - changed = true; - } else if (property == dev->mode_config.tv_top_margin_property && - intel_tv->margin[TV_MARGIN_TOP] != val) { - intel_tv->margin[TV_MARGIN_TOP] = val; - changed = true; - } else if (property == dev->mode_config.tv_bottom_margin_property && - intel_tv->margin[TV_MARGIN_BOTTOM] != val) { - intel_tv->margin[TV_MARGIN_BOTTOM] = val; - changed = true; - } else if (property == dev->mode_config.tv_mode_property) { - if (val >= ARRAY_SIZE(tv_modes)) { - ret = -EINVAL; - goto out; - } - if (!strcmp(intel_tv->tv_format, tv_modes[val].name)) - goto out; - - intel_tv->tv_format = tv_modes[val].name; - changed = true; - } else { - ret = -EINVAL; - goto out; - } - - if (changed && crtc) - intel_crtc_restore_mode(crtc); -out: - return ret; -} - static const struct drm_connector_funcs intel_tv_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_tv_destroy, - .set_property = intel_tv_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, }; +static int intel_tv_atomic_check(struct drm_connector *connector, + struct drm_connector_state *new_state) +{ + struct drm_crtc_state *new_crtc_state; + struct drm_connector_state *old_state; + + if (!new_state->crtc) + return 0; + + old_state = drm_atomic_get_old_connector_state(new_state->state, connector); + new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc); + + if (old_state->tv.mode != new_state->tv.mode || + old_state->tv.margins.left != new_state->tv.margins.left || + old_state->tv.margins.right != new_state->tv.margins.right || + old_state->tv.margins.top != new_state->tv.margins.top || + old_state->tv.margins.bottom != new_state->tv.margins.bottom) { + /* Force a modeset. */ + + new_crtc_state->connectors_changed = true; + } + + return 0; +} + static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = { .detect_ctx = intel_tv_detect, .mode_valid = intel_tv_mode_valid, .get_modes = intel_tv_get_modes, + .atomic_check = intel_tv_atomic_check, }; static const struct drm_encoder_funcs intel_tv_enc_funcs = { @@ -1548,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) u32 tv_dac_on, tv_dac_off, save_tv_dac; const char *tv_format_names[ARRAY_SIZE(tv_modes)]; int i, initial_mode = 0; + struct drm_connector_state *state; if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) return; @@ -1593,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder = &intel_tv->base; connector = &intel_connector->base; + state = connector->state; /* The documentation, for the older chipsets at least, recommend * using a polling method rather than hotplug detection for TVs. @@ -1630,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_tv->type = DRM_MODE_CONNECTOR_Unknown; /* BIOS margin values */ - intel_tv->margin[TV_MARGIN_LEFT] = 54; - intel_tv->margin[TV_MARGIN_TOP] = 36; - intel_tv->margin[TV_MARGIN_RIGHT] = 46; - intel_tv->margin[TV_MARGIN_BOTTOM] = 37; + state->tv.margins.left = 54; + state->tv.margins.top = 36; + state->tv.margins.right = 46; + state->tv.margins.bottom = 37; - intel_tv->tv_format = tv_modes[initial_mode].name; + state->tv.mode = initial_mode; drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs); connector->interlace_allowed = false; @@ -1649,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv) tv_format_names); drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, - initial_mode); + state->tv.mode); drm_object_attach_property(&connector->base, dev->mode_config.tv_left_margin_property, - intel_tv->margin[TV_MARGIN_LEFT]); + state->tv.margins.left); drm_object_attach_property(&connector->base, dev->mode_config.tv_top_margin_property, - intel_tv->margin[TV_MARGIN_TOP]); + state->tv.margins.top); drm_object_attach_property(&connector->base, dev->mode_config.tv_right_margin_property, - intel_tv->margin[TV_MARGIN_RIGHT]); + state->tv.margins.right); drm_object_attach_property(&connector->base, dev->mode_config.tv_bottom_margin_property, - intel_tv->margin[TV_MARGIN_BOTTOM]); + state->tv.margins.bottom); } diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c117424f1f50..7fd75ca031c3 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -99,7 +99,7 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_mmio; + guc->send = intel_guc_send_nop; } static void fetch_uc_fw(struct drm_i915_private *dev_priv, @@ -252,13 +252,27 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) __intel_uc_fw_fini(&dev_priv->huc.fw); } +static int guc_enable_communication(struct intel_guc *guc) +{ + /* XXX: placeholder for alternate setup */ + guc->send = intel_guc_send_mmio; + return 0; +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + guc->send = intel_guc_send_nop; +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { + struct intel_guc *guc = &dev_priv->guc; int ret, attempts; if (!i915.enable_guc_loading) return 0; + guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); /* We need to notify the guc whenever we change the GGTT */ @@ -274,6 +288,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_guc; } + /* init WOPCM */ + I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); + I915_WRITE(DMA_GUC_WOPCM_OFFSET, + GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC); + /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ if (IS_GEN9(dev_priv)) @@ -303,6 +322,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_submission; + ret = guc_enable_communication(guc); + if (ret) + goto err_submission; + intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { if (i915.guc_log_level >= 0) @@ -325,6 +348,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) * marks the GPU as wedged until reset). */ err_interrupts: + guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); err_submission: if (i915.enable_guc_submission) @@ -359,17 +383,10 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) i915_ggtt_disable_guc(dev_priv); } -/* - * Read GuC command/status register (SOFT_SCRATCH_0) - * Return true if it contains a response rather than a command - */ -static bool guc_recv(struct intel_guc *guc, u32 *status) +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - u32 val = I915_READ(SOFT_SCRATCH(0)); - *status = val; - return INTEL_GUC_RECV_IS_RESPONSE(val); + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; } /* @@ -399,13 +416,14 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); /* - * Fast commands should complete in less than 10us, so sample quickly - * up to that length of time, then switch to a slower sleep-wait loop. - * No inte_guc_send command should ever take longer than 10ms. + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. */ - ret = wait_for_us(guc_recv(guc, &status), 10); - if (ret) - ret = wait_for(guc_recv(guc, &status), 10); + ret = __intel_wait_for_register_fw(dev_priv, + SOFT_SCRATCH(0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); if (status != INTEL_GUC_STATUS_SUCCESS) { /* * Either the GuC explicitly returned an error (which diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 4b7f73aeddac..1e0eecdcedc0 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -227,6 +227,7 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { @@ -266,7 +267,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) /* intel_huc.c */ void intel_huc_select_fw(struct intel_huc *huc); -int intel_huc_init_hw(struct intel_huc *huc); +void intel_huc_init_hw(struct intel_huc *huc); void intel_guc_auth_huc(struct drm_i915_private *dev_priv); #endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6d1ea26b2493..aa9d3065853c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -29,6 +29,7 @@ #include <linux/pm_runtime.h> #define FORCEWAKE_ACK_TIMEOUT_MS 50 +#define GT_FIFO_TIMEOUT_MS 10 #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__)) @@ -172,22 +173,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv, __gen6_gt_wait_for_thread_c0(dev_priv); } -static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv) -{ - u32 gtfifodbg; - - gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG); - if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg)) - __raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg); -} - -static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv, - enum forcewake_domains fw_domains) -{ - fw_domains_put(dev_priv, fw_domains); - gen6_gt_check_fifodbg(dev_priv); -} - static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv) { u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL); @@ -195,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv) return count & GT_FIFO_FREE_ENTRIES_MASK; } -static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) +static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) { - int ret = 0; + u32 n; /* On VLV, FIFO will be shared by both SW and HW. * So, we need to read the FREE_ENTRIES everytime */ if (IS_VALLEYVIEW(dev_priv)) - dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv); - - if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { - int loop = 500; - u32 fifo = fifo_free_entries(dev_priv); - - while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { - udelay(10); - fifo = fifo_free_entries(dev_priv); + n = fifo_free_entries(dev_priv); + else + n = dev_priv->uncore.fifo_count; + + if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) { + if (wait_for_atomic((n = fifo_free_entries(dev_priv)) > + GT_FIFO_NUM_RESERVED_ENTRIES, + GT_FIFO_TIMEOUT_MS)) { + DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n); + return; } - if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES)) - ++ret; - dev_priv->uncore.fifo_count = fifo; } - dev_priv->uncore.fifo_count--; - return ret; + dev_priv->uncore.fifo_count = n - 1; } static enum hrtimer_restart @@ -384,15 +366,35 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) } static bool +gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv) +{ + u32 fifodbg; + + fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG); + + if (unlikely(fifodbg)) { + DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg); + __raw_i915_write32(dev_priv, GTFIFODBG, fifodbg); + } + + return fifodbg; +} + +static bool check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) { + bool ret = false; + if (HAS_FPGA_DBG_UNCLAIMED(dev_priv)) - return fpga_check_for_unclaimed_mmio(dev_priv); + ret |= fpga_check_for_unclaimed_mmio(dev_priv); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return vlv_check_for_unclaimed_mmio(dev_priv); + ret |= vlv_check_for_unclaimed_mmio(dev_priv); - return false; + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) + ret |= gen6_check_for_fifo_debug(dev_priv); + + return ret; } static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, @@ -404,11 +406,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, if (check_for_unclaimed_mmio(dev_priv)) DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); - /* clear out old GT FIFO errors */ - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) - __raw_i915_write32(dev_priv, GTFIFODBG, - __raw_i915_read32(dev_priv, GTFIFODBG)); - /* WaDisableShadowRegForCpd:chv */ if (IS_CHERRYVIEW(dev_priv)) { __raw_i915_write32(dev_priv, GTFIFOCTL, @@ -1047,15 +1044,10 @@ __gen2_write(32) #define __gen6_write(x) \ static void \ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - u32 __fifo_ret = 0; \ GEN6_WRITE_HEADER; \ - if (NEEDS_FORCE_WAKE(offset)) { \ - __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ - } \ + if (NEEDS_FORCE_WAKE(offset)) \ + __gen6_gt_wait_for_fifo(dev_priv); \ __raw_i915_write##x(dev_priv, reg, val); \ - if (unlikely(__fifo_ret)) { \ - gen6_gt_check_fifodbg(dev_priv); \ - } \ GEN6_WRITE_FOOTER; \ } @@ -1108,19 +1100,19 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define ASSIGN_WRITE_MMIO_VFUNCS(x) \ +#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \ do { \ - dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ - dev_priv->uncore.funcs.mmio_writew = x##_write16; \ - dev_priv->uncore.funcs.mmio_writel = x##_write32; \ + (i915)->uncore.funcs.mmio_writeb = x##_write8; \ + (i915)->uncore.funcs.mmio_writew = x##_write16; \ + (i915)->uncore.funcs.mmio_writel = x##_write32; \ } while (0) -#define ASSIGN_READ_MMIO_VFUNCS(x) \ +#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \ do { \ - dev_priv->uncore.funcs.mmio_readb = x##_read8; \ - dev_priv->uncore.funcs.mmio_readw = x##_read16; \ - dev_priv->uncore.funcs.mmio_readl = x##_read32; \ - dev_priv->uncore.funcs.mmio_readq = x##_read64; \ + (i915)->uncore.funcs.mmio_readb = x##_read8; \ + (i915)->uncore.funcs.mmio_readw = x##_read16; \ + (i915)->uncore.funcs.mmio_readl = x##_read32; \ + (i915)->uncore.funcs.mmio_readq = x##_read64; \ } while (0) @@ -1190,11 +1182,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get; - if (!IS_CHERRYVIEW(dev_priv)) - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; - else - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_VLV, FORCEWAKE_ACK_VLV); fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA, @@ -1202,11 +1190,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - if (IS_HASWELL(dev_priv)) - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; - else - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_ACK_HSW); } else if (IS_IVYBRIDGE(dev_priv)) { @@ -1223,8 +1207,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) */ dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; /* We need to init first for ECOBUS access and then * determine later if we want to reinit, in case of MT access is @@ -1242,7 +1225,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) spin_lock_irq(&dev_priv->uncore.lock); fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); ecobus = __raw_i915_read32(dev_priv, ECOBUS); - fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); + fw_domains_put(dev_priv, FORCEWAKE_RENDER); spin_unlock_irq(&dev_priv->uncore.lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { @@ -1254,8 +1237,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } else if (IS_GEN6(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); } @@ -1310,34 +1292,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) i915_pmic_bus_access_notifier; if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen2); - ASSIGN_READ_MMIO_VFUNCS(gen2); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2); } else if (IS_GEN5(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen5); - ASSIGN_READ_MMIO_VFUNCS(gen5); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5); } else if (IS_GEN(dev_priv, 6, 7)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen6); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6); if (IS_VALLEYVIEW(dev_priv)) { ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); } else { - ASSIGN_READ_MMIO_VFUNCS(gen6); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } } else if (IS_GEN8(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) { ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(fwtable); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); } else { - ASSIGN_WRITE_MMIO_VFUNCS(gen8); - ASSIGN_READ_MMIO_VFUNCS(gen6); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } } else { ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(fwtable); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); if (HAS_DECOUPLED_MMIO(dev_priv)) { dev_priv->uncore.funcs.mmio_readl = gen9_decoupled_read32; @@ -1353,8 +1335,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } -#undef ASSIGN_WRITE_MMIO_VFUNCS -#undef ASSIGN_READ_MMIO_VFUNCS void intel_uncore_fini(struct drm_i915_private *dev_priv) { @@ -1534,7 +1514,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, */ __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); - /* Spin waiting for the device to ack the reset requests */ + /* Wait for the device to ack the reset requests */ return intel_wait_for_register_fw(dev_priv, GEN6_GDRST, hw_domain_mask, 0, 500); @@ -1585,19 +1565,23 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, } /** - * intel_wait_for_register_fw - wait until register matches expected state + * __intel_wait_for_register_fw - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read * @mask: mask to apply to register value * @value: expected value - * @timeout_ms: timeout in millisecond + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * @out_value: optional placeholder to hold registry value * * This routine waits until the target register @reg contains the expected * @value after applying the @mask, i.e. it waits until :: * * (I915_READ_FW(reg) & mask) == value * - * Otherwise, the wait will timeout after @timeout_ms milliseconds. + * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. + * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us + * must be not larger than 20,0000 microseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is * not suitable for very long waits. See intel_wait_for_register() if you @@ -1606,16 +1590,31 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. */ -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, - i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms) -{ -#define done ((I915_READ_FW(reg) & mask) == value) - int ret = wait_for_us(done, 2); +int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value) +{ + u32 reg_value; +#define done (((reg_value = I915_READ_FW(reg)) & mask) == value) + int ret; + + /* Catch any overuse of this function */ + might_sleep_if(slow_timeout_ms); + GEM_BUG_ON(fast_timeout_us > 20000); + + ret = -ETIMEDOUT; + if (fast_timeout_us && fast_timeout_us <= 20000) + ret = _wait_for_atomic(done, fast_timeout_us, 0); if (ret) - ret = wait_for(done, timeout_ms); + ret = wait_for(done, slow_timeout_ms); + + if (out_value) + *out_value = reg_value; + return ret; #undef done } @@ -1639,18 +1638,26 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, */ int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms) + u32 mask, + u32 value, + unsigned int timeout_ms) { - unsigned fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); int ret; - intel_uncore_forcewake_get(dev_priv, fw); - ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2); - intel_uncore_forcewake_put(dev_priv, fw); + might_sleep(); + + spin_lock_irq(&dev_priv->uncore.lock); + intel_uncore_forcewake_get__locked(dev_priv, fw); + + ret = __intel_wait_for_register_fw(dev_priv, + reg, mask, value, + 2, 0, NULL); + + intel_uncore_forcewake_put__locked(dev_priv, fw); + spin_unlock_irq(&dev_priv->uncore.lock); + if (ret) ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, timeout_ms); @@ -1658,7 +1665,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, return ret; } -static int gen8_request_engine_reset(struct intel_engine_cs *engine) +static int gen8_reset_engine_start(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; @@ -1677,7 +1684,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine) return ret; } -static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine) +static void gen8_reset_engine_cancel(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1692,14 +1699,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, unsigned int tmp; for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - if (gen8_request_engine_reset(engine)) + if (gen8_reset_engine_start(engine)) goto not_ready; return gen6_reset_engines(dev_priv, engine_mask); not_ready: for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen8_unrequest_engine_reset(engine); + gen8_reset_engine_cancel(engine); return -EIO; } @@ -1754,17 +1761,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) int intel_guc_reset(struct drm_i915_private *dev_priv) { int ret; - unsigned long irqflags; if (!HAS_GUC(dev_priv)) return -EINVAL; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; @@ -1873,5 +1875,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index f08d0179b3df..95d4aebc0181 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj, typeof(v) *map; int err; - /* XXX GTT write followed by WC write go missing */ - i915_gem_object_flush_gtt_write_domain(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, true); + err = i915_gem_object_set_to_wc_domain(obj, true); if (err) return err; @@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj, typeof(v) map; int err; - /* XXX WC write followed by GTT write go missing */ - i915_gem_object_flush_gtt_write_domain(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, false); + err = i915_gem_object_set_to_wc_domain(obj, false); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 67d82bf1407f..8f011c447e41 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg) } mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); err = make_obj_busy(obj); + intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 98b7aac41eec..6664cb2eb0b8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (err) goto err; - err = i915_gem_object_set_to_gtt_domain(obj, true); + err = i915_gem_object_set_to_wc_domain(obj, true); if (err) goto err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c new file mode 100644 index 000000000000..6df00cc02c12 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c @@ -0,0 +1,301 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_timeline.h" + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct intel_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct intel_timeline *tl; + int order, offset; + int ret; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_destroy(tl); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_destroy(tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32; +} + +static int bench_sync(void *arg) +{ +#define M (1 << 20) + struct rnd_state prng; + struct intel_timeline *tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = ktime_to_ns(kt) * M / count; + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __intel_timeline_sync_set(tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__intel_timeline_sync_is_later(tl, id, 0)) { + mock_timeline_destroy(tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_destroy(tl); + cond_resched(); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __intel_timeline_sync_set(tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_destroy(tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_destroy(tl); + cond_resched(); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__intel_timeline_sync_is_later(tl, id, seqno)) + __intel_timeline_sync_set(tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_destroy(tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __intel_timeline_sync_is_later(tl, id, 0); + __intel_timeline_sync_set(tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_destroy(tl); + cond_resched(); + } + + return 0; +#undef M +} + +int i915_gem_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index be9a9ebf5692..76c1f149a0a0 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,8 +10,10 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(timelines, i915_gem_timeline_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index c17c83c30637..d044bf9a6feb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -30,6 +30,17 @@ #include "i915_random.h" +u64 i915_prandom_u64_state(struct rnd_state *rnd) +{ + u64 x; + + x = prandom_u32_state(rnd); + x <<= 32; + x |= prandom_u32_state(rnd); + + return x; +} + static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) { return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index b9c334ce6cd9..6c9379871384 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -41,6 +41,8 @@ #define I915_RND_SUBSTATE(name__, parent__) \ struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) +u64 i915_prandom_u64_state(struct rnd_state *rnd); + unsigned int *i915_random_order(unsigned int count, struct rnd_state *state); void i915_random_reorder(unsigned int *order, diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c new file mode 100644 index 000000000000..bcab3d00a785 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -0,0 +1,616 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +static char * +__sync_print(struct i915_syncmap *p, + char *buf, unsigned long *sz, + unsigned int depth, + unsigned int last, + unsigned int idx) +{ + unsigned long len; + unsigned int i, X; + + if (depth) { + unsigned int d; + + for (d = 0; d < depth - 1; d++) { + if (last & BIT(depth - d - 1)) + len = scnprintf(buf, *sz, "| "); + else + len = scnprintf(buf, *sz, " "); + buf += len; + *sz -= len; + } + len = scnprintf(buf, *sz, "%x-> ", idx); + buf += len; + *sz -= len; + } + + /* We mark bits after the prefix as "X" */ + len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT); + buf += len; + *sz -= len; + X = (p->height + SHIFT) / 4; + scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX"); + + if (!p->height) { + for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { + len = scnprintf(buf, *sz, " %x:%x,", + i, __sync_seqno(p)[i]); + buf += len; + *sz -= len; + } + buf -= 1; + *sz += 1; + } + + len = scnprintf(buf, *sz, "\n"); + buf += len; + *sz -= len; + + if (p->height) { + for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { + buf = __sync_print(__sync_child(p)[i], buf, sz, + depth + 1, + last << 1 | !!(p->bitmap >> (i + 1)), + i); + } + } + + return buf; +} + +static bool +i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz) +{ + if (!p) + return false; + + while (p->parent) + p = p->parent; + + __sync_print(p, buf, &sz, 0, 1, 0); + return true; +} + +static int check_syncmap_free(struct i915_syncmap **sync) +{ + i915_syncmap_free(sync); + if (*sync) { + pr_err("sync not cleared after free\n"); + return -EINVAL; + } + + return 0; +} + +static int dump_syncmap(struct i915_syncmap *sync, int err) +{ + char *buf; + + if (!err) + return check_syncmap_free(&sync); + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + goto skip; + + if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE)) + pr_err("%s", buf); + + kfree(buf); + +skip: + i915_syncmap_free(&sync); + return err; +} + +static int igt_syncmap_init(void *arg) +{ + struct i915_syncmap *sync = (void *)~0ul; + + /* + * Cursory check that we can initialise a random pointer and transform + * it into the root pointer of a syncmap. + */ + + i915_syncmap_init(&sync); + return check_syncmap_free(&sync); +} + +static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno) +{ + if (leaf->height) { + pr_err("%s: not a leaf, height is %d\n", + __func__, leaf->height); + return -EINVAL; + } + + if (__sync_seqno(leaf)[idx] != seqno) { + pr_err("%s: seqno[%d], found %x, expected %x\n", + __func__, idx, __sync_seqno(leaf)[idx], seqno); + return -EINVAL; + } + + return 0; +} + +static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno) +{ + int err; + + err = i915_syncmap_set(sync, context, seqno); + if (err) + return err; + + if ((*sync)->height) { + pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, (*sync)->height, (*sync)->prefix); + return -EINVAL; + } + + if ((*sync)->parent) { + pr_err("Inserting first context=%llx created branches!\n", + context); + return -EINVAL; + } + + if (hweight32((*sync)->bitmap) != 1) { + pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n", + (*sync)->bitmap, hweight32((*sync)->bitmap)); + return -EINVAL; + } + + err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno); + if (err) + return err; + + if (!i915_syncmap_is_later(sync, context, seqno)) { + pr_err("Lookup of first context=%llx/seqno=%x failed!\n", + context, seqno); + return -EINVAL; + } + + return 0; +} + +static int igt_syncmap_one(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + unsigned long max = 1; + int err; + + /* + * Check that inserting a new id, creates a leaf and only that leaf. + */ + + i915_syncmap_init(&sync); + + do { + u64 context = i915_prandom_u64_state(&prng); + unsigned long loop; + + err = check_syncmap_free(&sync); + if (err) + goto out; + + for (loop = 0; loop <= max; loop++) { + err = check_one(&sync, context, + prandom_u32_state(&prng)); + if (err) + goto out; + } + max++; + } while (!__igt_timeout(end_time, NULL)); + pr_debug("%s: Completed %lu single insertions\n", + __func__, max * (max - 1) / 2); +out: + return dump_syncmap(sync, err); +} + +static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno) +{ + int err; + + err = i915_syncmap_set(sync, context, seqno); + if (err) + return err; + + if ((*sync)->height) { + pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, (*sync)->height, (*sync)->prefix); + return -EINVAL; + } + + if (hweight32((*sync)->bitmap) != 1) { + pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n", + context, (*sync)->bitmap, hweight32((*sync)->bitmap)); + return -EINVAL; + } + + err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno); + if (err) + return err; + + if (!i915_syncmap_is_later(sync, context, seqno)) { + pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n", + context, seqno); + return -EINVAL; + } + + return 0; +} + +static int igt_syncmap_join_above(void *arg) +{ + struct i915_syncmap *sync; + unsigned int pass, order; + int err; + + i915_syncmap_init(&sync); + + /* + * When we have a new id that doesn't fit inside the existing tree, + * we need to add a new layer above. + * + * 1: 0x00000001 + * 2: 0x00000010 + * 3: 0x00000100 + * 4: 0x00001000 + * ... + * Each pass the common prefix shrinks and we have to insert a join. + * Each join will only contain two branches, the latest of which + * is always a leaf. + * + * If we then reuse the same set of contexts, we expect to build an + * identical tree. + */ + for (pass = 0; pass < 3; pass++) { + for (order = 0; order < 64; order += SHIFT) { + u64 context = BIT_ULL(order); + struct i915_syncmap *join; + + err = check_leaf(&sync, context, 0); + if (err) + goto out; + + join = sync->parent; + if (!join) /* very first insert will have no parents */ + continue; + + if (!join->height) { + pr_err("Parent with no height!\n"); + err = -EINVAL; + goto out; + } + + if (hweight32(join->bitmap) != 2) { + pr_err("Join does not have 2 children: %x (%d)\n", + join->bitmap, hweight32(join->bitmap)); + err = -EINVAL; + goto out; + } + + if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) { + pr_err("Leaf misplaced in parent!\n"); + err = -EINVAL; + goto out; + } + } + } +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_join_below(void *arg) +{ + struct i915_syncmap *sync; + unsigned int step, order, idx; + int err; + + i915_syncmap_init(&sync); + + /* + * Check that we can split a compacted branch by replacing it with + * a join. + */ + for (step = 0; step < KSYNCMAP; step++) { + for (order = 64 - SHIFT; order > 0; order -= SHIFT) { + u64 context = step * BIT_ULL(order); + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n", + context, order, step, sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + } + } + + for (step = 0; step < KSYNCMAP; step++) { + for (order = SHIFT; order < 64; order += SHIFT) { + u64 context = step * BIT_ULL(order); + + if (!i915_syncmap_is_later(&sync, context, 0)) { + pr_err("1: context %llx (order=%d, step=%d) not found\n", + context, order, step); + err = -EINVAL; + goto out; + } + + for (idx = 1; idx < KSYNCMAP; idx++) { + if (i915_syncmap_is_later(&sync, context + idx, 0)) { + pr_err("1: context %llx (order=%d, step=%d) should not exist\n", + context + idx, order, step); + err = -EINVAL; + goto out; + } + } + } + } + + for (order = SHIFT; order < 64; order += SHIFT) { + for (step = 0; step < KSYNCMAP; step++) { + u64 context = step * BIT_ULL(order); + + if (!i915_syncmap_is_later(&sync, context, 0)) { + pr_err("2: context %llx (order=%d, step=%d) not found\n", + context, order, step); + err = -EINVAL; + goto out; + } + } + } + +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_neighbours(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + int err; + + /* + * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP + * neighbouring ids, they all fit into the same leaf. + */ + + i915_syncmap_init(&sync); + do { + u64 context = i915_prandom_u64_state(&prng) & ~MASK; + unsigned int idx; + + if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */ + continue; + + for (idx = 0; idx < KSYNCMAP; idx++) { + err = i915_syncmap_set(&sync, context + idx, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + + if (sync->bitmap != BIT(idx + 1) - 1) { + pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n", + context, idx, + sync->bitmap, hweight32(sync->bitmap), + BIT(idx + 1) - 1, idx + 1); + err = -EINVAL; + goto out; + } + } + } while (!__igt_timeout(end_time, NULL)); +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_compact(void *arg) +{ + struct i915_syncmap *sync; + unsigned int idx, order; + int err; + + i915_syncmap_init(&sync); + + /* + * The syncmap are "space efficient" compressed radix trees - any + * branch with only one child is skipped and replaced by the child. + * + * If we construct a tree with ids that are neighbouring at a non-zero + * height, we form a join but each child of that join is directly a + * leaf holding the single id. + */ + for (order = SHIFT; order < 64; order += SHIFT) { + err = check_syncmap_free(&sync); + if (err) + goto out; + + /* Create neighbours in the parent */ + for (idx = 0; idx < KSYNCMAP; idx++) { + u64 context = idx * BIT_ULL(order) + idx; + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n", + context, order, idx, + sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + } + + sync = sync->parent; + if (sync->parent) { + pr_err("Parent (join) of last leaf was not the sync!\n"); + err = -EINVAL; + goto out; + } + + if (sync->height != order) { + pr_err("Join does not have the expected height, found %d, expected %d\n", + sync->height, order); + err = -EINVAL; + goto out; + } + + if (sync->bitmap != BIT(KSYNCMAP) - 1) { + pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n", + sync->bitmap, hweight32(sync->bitmap), + BIT(KSYNCMAP) - 1, KSYNCMAP); + err = -EINVAL; + goto out; + } + + /* Each of our children should be a leaf */ + for (idx = 0; idx < KSYNCMAP; idx++) { + struct i915_syncmap *leaf = __sync_child(sync)[idx]; + + if (leaf->height) { + pr_err("Child %d is a not leaf!\n", idx); + err = -EINVAL; + goto out; + } + + if (leaf->parent != sync) { + pr_err("Child %d is not attached to us!\n", + idx); + err = -EINVAL; + goto out; + } + + if (!is_power_of_2(leaf->bitmap)) { + pr_err("Child %d holds more than one id, found %x (%d)\n", + idx, leaf->bitmap, hweight32(leaf->bitmap)); + err = -EINVAL; + goto out; + } + + if (leaf->bitmap != BIT(idx)) { + pr_err("Child %d has wrong seqno idx, found %d, expected %d\n", + idx, ilog2(leaf->bitmap), idx); + err = -EINVAL; + goto out; + } + } + } +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_random(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + unsigned long count, phase, i; + u32 seqno; + int err; + + i915_syncmap_init(&sync); + + /* + * Having tried to test the individual operations within i915_syncmap, + * run a smoketest exploring the entire u64 space with random + * insertions. + */ + + count = 0; + phase = jiffies + HZ/100 + 1; + do { + u64 context = i915_prandom_u64_state(&prng); + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + count++; + } while (!time_after(jiffies, phase)); + seqno = 0; + + phase = 0; + do { + I915_RND_STATE(ctx); + u32 last_seqno = seqno; + bool expect; + + seqno = prandom_u32_state(&prng); + expect = seqno_later(last_seqno, seqno); + + for (i = 0; i < count; i++) { + u64 context = i915_prandom_u64_state(&ctx); + + if (i915_syncmap_is_later(&sync, context, seqno) != expect) { + pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n", + context, last_seqno, seqno, expect); + err = -EINVAL; + goto out; + } + + err = i915_syncmap_set(&sync, context, seqno); + if (err) + goto out; + } + + phase++; + } while (!__igt_timeout(end_time, NULL)); + pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count); +out: + return dump_syncmap(sync, err); +} + +int i915_syncmap_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_syncmap_init), + SUBTEST(igt_syncmap_one), + SUBTEST(igt_syncmap_join_above), + SUBTEST(igt_syncmap_join_below), + SUBTEST(igt_syncmap_neighbours), + SUBTEST(igt_syncmap_compact), + SUBTEST(igt_syncmap_random), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 0ad624a1db90..b8e53bdc3cc4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->engine = engine; ring->size = sz; ring->effective_size = sz; ring->vaddr = (void *)(ring + 1); @@ -141,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; - engine->base.name = name; + snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id++; engine->base.status_page.page_addr = (void *)(engine + 1); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 6a8258eacdcb..f321bdfe0b5b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -30,6 +30,7 @@ #include "mock_gem_device.h" #include "mock_gem_object.h" #include "mock_gtt.h" +#include "mock_uncore.h" void mock_device_flush(struct drm_i915_private *i915) { @@ -143,6 +144,7 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; spin_lock_init(&i915->mm.object_stat_lock); + mock_uncore_init(i915); init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c new file mode 100644 index 000000000000..47b1f47c5812 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -0,0 +1,45 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_timeline.h" + +struct intel_timeline *mock_timeline(u64 context) +{ + static struct lock_class_key class; + struct intel_timeline *tl; + + tl = kzalloc(sizeof(*tl), GFP_KERNEL); + if (!tl) + return NULL; + + __intel_timeline_init(tl, NULL, context, &class, "mock"); + + return tl; +} + +void mock_timeline_destroy(struct intel_timeline *tl) +{ + __intel_timeline_fini(tl); + kfree(tl); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h new file mode 100644 index 000000000000..c27ff4639b8b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +#include "../i915_gem_timeline.h" + +struct intel_timeline *mock_timeline(u64 context); +void mock_timeline_destroy(struct intel_timeline *tl); + +#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c new file mode 100644 index 000000000000..8ef14c7e5e38 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -0,0 +1,46 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_uncore.h" + +#define __nop_write(x) \ +static void \ +nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { } +__nop_write(8) +__nop_write(16) +__nop_write(32) + +#define __nop_read(x) \ +static u##x \ +nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; } +__nop_read(8) +__nop_read(16) +__nop_read(32) +__nop_read(64) + +void mock_uncore_init(struct drm_i915_private *i915) +{ + ASSIGN_WRITE_MMIO_VFUNCS(i915, nop); + ASSIGN_READ_MMIO_VFUNCS(i915, nop); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h new file mode 100644 index 000000000000..d79aa3ca4d51 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_UNCORE_H +#define __MOCK_UNCORE_H + +void mock_uncore_init(struct drm_i915_private *i915); + +#endif /* !__MOCK_UNCORE_H */ diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index e9892b4c3af1..b6121c8fe539 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -31,20 +31,20 @@ struct platform_device; #define HDMI_MAX_ELD_BYTES 128 -struct intel_hdmi_lpe_audio_eld { - int port_id; - int pipe_id; - unsigned char eld_data[HDMI_MAX_ELD_BYTES]; +struct intel_hdmi_lpe_audio_port_pdata { + u8 eld[HDMI_MAX_ELD_BYTES]; + int port; + int pipe; + int ls_clock; + bool dp_output; }; struct intel_hdmi_lpe_audio_pdata { - bool notify_pending; - int tmds_clock_speed; - bool hdmi_connected; - bool dp_output; - int link_rate; - struct intel_hdmi_lpe_audio_eld eld; - void (*notify_audio_lpe)(struct platform_device *pdev); + struct intel_hdmi_lpe_audio_port_pdata port[3]; /* for ports B,C,D */ + int num_ports; + int num_pipes; + + void (*notify_audio_lpe)(struct platform_device *pdev, int port); /* port: 0==B,1==C,2==D */ spinlock_t lpe_audio_slock; }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3554495bef13..f24a80d2d42e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE 44 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + typedef struct drm_i915_getparam { __s32 param; /* @@ -666,6 +672,8 @@ struct drm_i915_gem_relocation_entry { #define I915_GEM_DOMAIN_VERTEX 0x00000020 /** GTT domain - aperture and scanout */ #define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 /** @} */ struct drm_i915_gem_exec_object { @@ -773,8 +781,15 @@ struct drm_i915_gem_exec_object2 { * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. */ #define EXEC_OBJECT_ASYNC (1<<6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1<<7) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) __u64 flags; union { diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index c505b019e09c..909391d5270c 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -42,6 +42,11 @@ #include <drm/intel_lpe_audio.h> #include "intel_hdmi_audio.h" +#define for_each_pipe(card_ctx, pipe) \ + for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++) +#define for_each_port(card_ctx, port) \ + for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++) + /*standard module options for ALSA. This module supports only one card*/ static int hdmi_card_index = SNDRV_DEFAULT_IDX1; static char *hdmi_card_id = SNDRV_DEFAULT_STR1; @@ -189,15 +194,30 @@ static void had_substream_put(struct snd_intelhad *intelhaddata) spin_unlock_irqrestore(&intelhaddata->had_spinlock, flags); } +static u32 had_config_offset(int pipe) +{ + switch (pipe) { + default: + case 0: + return AUDIO_HDMI_CONFIG_A; + case 1: + return AUDIO_HDMI_CONFIG_B; + case 2: + return AUDIO_HDMI_CONFIG_C; + } +} + /* Register access functions */ -static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg) +static u32 had_read_register_raw(struct snd_intelhad_card *card_ctx, + int pipe, u32 reg) { - return ioread32(ctx->mmio_start + ctx->had_config_offset + reg); + return ioread32(card_ctx->mmio_start + had_config_offset(pipe) + reg); } -static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val) +static void had_write_register_raw(struct snd_intelhad_card *card_ctx, + int pipe, u32 reg, u32 val) { - iowrite32(val, ctx->mmio_start + ctx->had_config_offset + reg); + iowrite32(val, card_ctx->mmio_start + had_config_offset(pipe) + reg); } static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val) @@ -205,13 +225,13 @@ static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val) if (!ctx->connected) *val = 0; else - *val = had_read_register_raw(ctx, reg); + *val = had_read_register_raw(ctx->card_ctx, ctx->pipe, reg); } static void had_write_register(struct snd_intelhad *ctx, u32 reg, u32 val) { if (ctx->connected) - had_write_register_raw(ctx, reg, val); + had_write_register_raw(ctx->card_ctx, ctx->pipe, reg, val); } /* @@ -1358,6 +1378,9 @@ static void had_process_hot_plug(struct snd_intelhad *intelhaddata) return; } + /* Disable Audio */ + had_enable_audio(intelhaddata, false); + intelhaddata->connected = true; dev_dbg(intelhaddata->dev, "%s @ %d:DEBUG PLUG/UNPLUG : HAD_DRV_CONNECTED\n", @@ -1519,22 +1542,32 @@ static const struct snd_kcontrol_new had_controls[] = { */ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) { - struct snd_intelhad *ctx = dev_id; - u32 audio_stat; + struct snd_intelhad_card *card_ctx = dev_id; + u32 audio_stat[3] = {}; + int pipe, port; + + for_each_pipe(card_ctx, pipe) { + /* use raw register access to ack IRQs even while disconnected */ + audio_stat[pipe] = had_read_register_raw(card_ctx, pipe, + AUD_HDMI_STATUS) & + (HDMI_AUDIO_UNDERRUN | HDMI_AUDIO_BUFFER_DONE); + + if (audio_stat[pipe]) + had_write_register_raw(card_ctx, pipe, + AUD_HDMI_STATUS, audio_stat[pipe]); + } - /* use raw register access to ack IRQs even while disconnected */ - audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + int pipe = ctx->pipe; - if (audio_stat & HDMI_AUDIO_UNDERRUN) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_UNDERRUN); - had_process_buffer_underrun(ctx); - } + if (pipe < 0) + continue; - if (audio_stat & HDMI_AUDIO_BUFFER_DONE) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_BUFFER_DONE); - had_process_buffer_done(ctx); + if (audio_stat[pipe] & HDMI_AUDIO_BUFFER_DONE) + had_process_buffer_done(ctx); + if (audio_stat[pipe] & HDMI_AUDIO_UNDERRUN) + had_process_buffer_underrun(ctx); } return IRQ_HANDLED; @@ -1543,9 +1576,10 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) /* * monitor plug/unplug notification from i915; just kick off the work */ -static void notify_audio_lpe(struct platform_device *pdev) +static void notify_audio_lpe(struct platform_device *pdev, int port) { - struct snd_intelhad *ctx = platform_get_drvdata(pdev); + struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; schedule_work(&ctx->hdmi_audio_wq); } @@ -1556,47 +1590,51 @@ static void had_audio_wq(struct work_struct *work) struct snd_intelhad *ctx = container_of(work, struct snd_intelhad, hdmi_audio_wq); struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; + struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port]; pm_runtime_get_sync(ctx->dev); mutex_lock(&ctx->mutex); - if (!pdata->hdmi_connected) { - dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n", - __func__); + if (ppdata->pipe < 0) { + dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n", + __func__, ctx->port); + memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */ + + ctx->dp_output = false; + ctx->tmds_clock_speed = 0; + ctx->link_rate = 0; + + /* Shut down the stream */ had_process_hot_unplug(ctx); - } else { - struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld; + ctx->pipe = -1; + } else { dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n", - __func__, eld->port_id, pdata->tmds_clock_speed); + __func__, ctx->port, ppdata->ls_clock); - switch (eld->pipe_id) { - case 0: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; - break; - case 1: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_B; - break; - case 2: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_C; - break; - default: - dev_dbg(ctx->dev, "Invalid pipe %d\n", - eld->pipe_id); - break; - } - - memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld)); + memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld)); - ctx->dp_output = pdata->dp_output; - ctx->tmds_clock_speed = pdata->tmds_clock_speed; - ctx->link_rate = pdata->link_rate; + ctx->dp_output = ppdata->dp_output; + if (ctx->dp_output) { + ctx->tmds_clock_speed = 0; + ctx->link_rate = ppdata->ls_clock; + } else { + ctx->tmds_clock_speed = ppdata->ls_clock; + ctx->link_rate = 0; + } + /* + * Shut down the stream before we change + * the pipe assignment for this pcm device + */ had_process_hot_plug(ctx); - /* Process mode change if stream is active */ + ctx->pipe = ppdata->pipe; + + /* Restart the stream if necessary */ had_process_mode_change(ctx); } + mutex_unlock(&ctx->mutex); pm_runtime_mark_last_busy(ctx->dev); pm_runtime_put_autosuspend(ctx->dev); @@ -1605,11 +1643,17 @@ static void had_audio_wq(struct work_struct *work) /* * Jack interface */ -static int had_create_jack(struct snd_intelhad *ctx) +static int had_create_jack(struct snd_intelhad *ctx, + struct snd_pcm *pcm) { + char hdmi_str[32]; int err; - err = snd_jack_new(ctx->card, "HDMI/DP", SND_JACK_AVOUT, &ctx->jack, + snprintf(hdmi_str, sizeof(hdmi_str), + "HDMI/DP,pcm=%d", pcm->device); + + err = snd_jack_new(ctx->card_ctx->card, hdmi_str, + SND_JACK_AVOUT, &ctx->jack, true, false); if (err < 0) return err; @@ -1623,13 +1667,18 @@ static int had_create_jack(struct snd_intelhad *ctx) static int hdmi_lpe_audio_runtime_suspend(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); - struct snd_pcm_substream *substream; + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); + int port; - substream = had_substream_get(ctx); - if (substream) { - snd_pcm_suspend(substream); - had_substream_put(ctx); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + struct snd_pcm_substream *substream; + + substream = had_substream_get(ctx); + if (substream) { + snd_pcm_suspend(substream); + had_substream_put(ctx); + } } return 0; @@ -1637,12 +1686,12 @@ static int hdmi_lpe_audio_runtime_suspend(struct device *dev) static int __maybe_unused hdmi_lpe_audio_suspend(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); int err; err = hdmi_lpe_audio_runtime_suspend(dev); if (!err) - snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D3hot); + snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D3hot); return err; } @@ -1654,24 +1703,34 @@ static int hdmi_lpe_audio_runtime_resume(struct device *dev) static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); hdmi_lpe_audio_runtime_resume(dev); - snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D0); + snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D0); return 0; } /* release resources */ static void hdmi_lpe_audio_free(struct snd_card *card) { - struct snd_intelhad *ctx = card->private_data; + struct snd_intelhad_card *card_ctx = card->private_data; + struct intel_hdmi_lpe_audio_pdata *pdata = card_ctx->dev->platform_data; + int port; - cancel_work_sync(&ctx->hdmi_audio_wq); + spin_lock_irq(&pdata->lpe_audio_slock); + pdata->notify_audio_lpe = NULL; + spin_unlock_irq(&pdata->lpe_audio_slock); - if (ctx->mmio_start) - iounmap(ctx->mmio_start); - if (ctx->irq >= 0) - free_irq(ctx->irq, ctx); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + + cancel_work_sync(&ctx->hdmi_audio_wq); + } + + if (card_ctx->mmio_start) + iounmap(card_ctx->mmio_start); + if (card_ctx->irq >= 0) + free_irq(card_ctx->irq, card_ctx); } /* @@ -1683,12 +1742,12 @@ static void hdmi_lpe_audio_free(struct snd_card *card) static int hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; - struct snd_intelhad *ctx; + struct snd_intelhad_card *card_ctx; struct snd_pcm *pcm; struct intel_hdmi_lpe_audio_pdata *pdata; int irq; struct resource *res_mmio; - int i, ret; + int port, ret; pdata = pdev->dev.platform_data; if (!pdata) { @@ -1711,39 +1770,30 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* create a card instance with ALSA framework */ ret = snd_card_new(&pdev->dev, hdmi_card_index, hdmi_card_id, - THIS_MODULE, sizeof(*ctx), &card); + THIS_MODULE, sizeof(*card_ctx), &card); if (ret) return ret; - ctx = card->private_data; - spin_lock_init(&ctx->had_spinlock); - mutex_init(&ctx->mutex); - ctx->connected = false; - ctx->dev = &pdev->dev; - ctx->card = card; - ctx->aes_bits = SNDRV_PCM_DEFAULT_CON_SPDIF; + card_ctx = card->private_data; + card_ctx->dev = &pdev->dev; + card_ctx->card = card; strcpy(card->driver, INTEL_HAD); strcpy(card->shortname, "Intel HDMI/DP LPE Audio"); strcpy(card->longname, "Intel HDMI/DP LPE Audio"); - ctx->irq = -1; - ctx->tmds_clock_speed = DIS_SAMPLE_RATE_148_5; - INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); + card_ctx->irq = -1; card->private_free = hdmi_lpe_audio_free; - /* assume pipe A as default */ - ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; - - platform_set_drvdata(pdev, ctx); + platform_set_drvdata(pdev, card_ctx); dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n", __func__, (unsigned int)res_mmio->start, (unsigned int)res_mmio->end); - ctx->mmio_start = ioremap_nocache(res_mmio->start, - (size_t)(resource_size(res_mmio))); - if (!ctx->mmio_start) { + card_ctx->mmio_start = ioremap_nocache(res_mmio->start, + (size_t)(resource_size(res_mmio))); + if (!card_ctx->mmio_start) { dev_err(&pdev->dev, "Could not get ioremap\n"); ret = -EACCES; goto err; @@ -1751,54 +1801,79 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup interrupt handler */ ret = request_irq(irq, display_pipe_interrupt_handler, 0, - pdev->name, ctx); + pdev->name, card_ctx); if (ret < 0) { dev_err(&pdev->dev, "request_irq failed\n"); goto err; } - ctx->irq = irq; - - ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS, - MAX_CAP_STREAMS, &pcm); - if (ret) - goto err; - - /* setup private data which can be retrieved when required */ - pcm->private_data = ctx; - pcm->info_flags = 0; - strncpy(pcm->name, card->shortname, strlen(card->shortname)); - /* setup the ops for playabck */ - snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); + card_ctx->irq = irq; /* only 32bit addressable */ dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - /* allocate dma pages; - * try to allocate 600k buffer as default which is large enough - */ - snd_pcm_lib_preallocate_pages_for_all(pcm, - SNDRV_DMA_TYPE_DEV, NULL, - HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER); + init_channel_allocations(); - /* create controls */ - for (i = 0; i < ARRAY_SIZE(had_controls); i++) { - ret = snd_ctl_add(card, snd_ctl_new1(&had_controls[i], ctx)); - if (ret < 0) + card_ctx->num_pipes = pdata->num_pipes; + card_ctx->num_ports = pdata->num_ports; + + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + int i; + + ctx->card_ctx = card_ctx; + ctx->dev = card_ctx->dev; + ctx->port = port; + ctx->pipe = -1; + + INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); + + ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS, + MAX_CAP_STREAMS, &pcm); + if (ret) goto err; - } - init_channel_allocations(); + /* setup private data which can be retrieved when required */ + pcm->private_data = ctx; + pcm->info_flags = 0; + strncpy(pcm->name, card->shortname, strlen(card->shortname)); + /* setup the ops for playabck */ + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); - /* Register channel map controls */ - ret = had_register_chmap_ctls(ctx, pcm); - if (ret < 0) - goto err; + /* allocate dma pages; + * try to allocate 600k buffer as default which is large enough + */ + snd_pcm_lib_preallocate_pages_for_all(pcm, + SNDRV_DMA_TYPE_DEV, NULL, + HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER); + + /* create controls */ + for (i = 0; i < ARRAY_SIZE(had_controls); i++) { + struct snd_kcontrol *kctl; + + kctl = snd_ctl_new1(&had_controls[i], ctx); + if (!kctl) { + ret = -ENOMEM; + goto err; + } - ret = had_create_jack(ctx); - if (ret < 0) - goto err; + kctl->id.device = pcm->device; + + ret = snd_ctl_add(card, kctl); + if (ret < 0) + goto err; + } + + /* Register channel map controls */ + ret = had_register_chmap_ctls(ctx, pcm); + if (ret < 0) + goto err; + + ret = had_create_jack(ctx, pcm); + if (ret < 0) + goto err; + } ret = snd_card_register(card); if (ret) @@ -1806,19 +1881,18 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) spin_lock_irq(&pdata->lpe_audio_slock); pdata->notify_audio_lpe = notify_audio_lpe; - pdata->notify_pending = false; spin_unlock_irq(&pdata->lpe_audio_slock); - /* runtime PM isn't enabled as default, since it won't save much on - * BYT/CHT devices; user who want the runtime PM should adjust the - * power/ontrol and power/autosuspend_delay_ms sysfs entries instead - */ pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_set_active(&pdev->dev); dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__); - schedule_work(&ctx->hdmi_audio_wq); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + + schedule_work(&ctx->hdmi_audio_wq); + } return 0; @@ -1834,9 +1908,9 @@ err: */ static int hdmi_lpe_audio_remove(struct platform_device *pdev) { - struct snd_intelhad *ctx = platform_get_drvdata(pdev); + struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); - snd_card_free(ctx->card); + snd_card_free(card_ctx->card); return 0; } diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h index 2d3e389f76b3..0d91bb5dbab7 100644 --- a/sound/x86/intel_hdmi_audio.h +++ b/sound/x86/intel_hdmi_audio.h @@ -32,7 +32,6 @@ #include "intel_hdmi_lpe_audio.h" -#define PCM_INDEX 0 #define MAX_PB_STREAMS 1 #define MAX_CAP_STREAMS 0 #define BYTES_PER_WORD 0x4 @@ -101,7 +100,7 @@ struct pcm_stream_info { * @chmap: holds channel map info */ struct snd_intelhad { - struct snd_card *card; + struct snd_intelhad_card *card_ctx; bool connected; struct pcm_stream_info stream_info; unsigned char eld[HDMI_MAX_ELD_BYTES]; @@ -112,6 +111,8 @@ struct snd_intelhad { struct snd_pcm_chmap *chmap; int tmds_clock_speed; int link_rate; + int port; /* fixed */ + int pipe; /* can change dynamically */ /* ring buffer (BD) position index */ unsigned int bd_head; @@ -123,9 +124,6 @@ struct snd_intelhad { unsigned int period_bytes; /* PCM period size in bytes */ /* internal stuff */ - int irq; - void __iomem *mmio_start; - unsigned int had_config_offset; union aud_cfg aud_config; /* AUD_CONFIG reg value cache */ struct work_struct hdmi_audio_wq; struct mutex mutex; /* for protecting chmap and eld */ @@ -133,4 +131,16 @@ struct snd_intelhad { struct snd_jack *jack; }; +struct snd_intelhad_card { + struct snd_card *card; + struct device *dev; + + /* internal stuff */ + int irq; + void __iomem *mmio_start; + int num_pipes; + int num_ports; + struct snd_intelhad pcm_ctx[3]; /* one for each port */ +}; + #endif /* _INTEL_HDMI_AUDIO_ */ |