diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-25 11:48:26 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-25 11:48:26 -0800 |
commit | 4971f090aa7f6ce5daa094ce4334f6618f93a7eb (patch) | |
tree | 45d75782b7dedbec76a3ab82d2769f7707668071 /drivers/gpu/drm/i915/i915_scheduler.c | |
parent | c76cd634eb5bfd497617ea224a54a03b545c8c4d (diff) | |
parent | 2a3c83f5fe0770d13bbb71b23674886ff4111f44 (diff) |
Merge tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Core:
- shared fencing staging removal
- drop transactional atomic helpers and move helpers to new location
- DP/MST atomic cleanup
- Leasing cleanups and drop EXPORT_SYMBOL
- Convert drivers to atomic helpers and generic fbdev.
- removed deprecated obj_ref/unref in favour of get/put
- Improve dumb callback documentation
- MODESET_LOCK_BEGIN/END helpers
panels:
- CDTech panels, Banana Pi Panel, DLC1010GIG,
- Olimex LCD-O-LinuXino, Samsung S6D16D0, Truly NT35597 WQXGA,
- Himax HX8357D, simulated RTSM AEMv8.
- GPD Win2 panel
- AUO G101EVN010
vgem:
- render node support
ttm:
- move global init out of drivers
- fix LRU handling for ghost objects
- Support for simultaneous submissions to multiple engines
scheduler:
- timeout/fault handling changes to help GPU recovery
- helpers for hw with preemption support
i915:
- Scaler/Watermark fixes
- DP MST + powerwell fixes
- PSR fixes
- Break long get/put shmemfs pages
- Icelake fixes
- Icelake DSI video mode enablement
- Engine workaround improvements
amdgpu:
- freesync support
- GPU reset enabled on CI, VI, SOC15 dGPUs
- ABM support in DC
- KFD support for vega12/polaris12
- SDMA paging queue on vega
- More amdkfd code sharing
- DCC scanout on GFX9
- DC kerneldoc
- Updated SMU firmware for GFX8 chips
- XGMI PSP + hive reset support
- GPU reset
- DC trace support
- Powerplay updates for newer Polaris
- Cursor plane update fast path
- kfd dma-buf support
virtio-gpu:
- add EDID support
vmwgfx:
- pageflip with damage support
nouveau:
- Initial Turing TU104/TU106 modesetting support
msm:
- a2xx gpu support for apq8060 and imx5
- a2xx gpummu support
- mdp4 display support for apq8060
- DPU fixes and cleanups
- enhanced profiling support
- debug object naming interface
- get_iova/page pinning decoupling
tegra:
- Tegra194 host1x, VIC and display support enabled
- Audio over HDMI for Tegra186 and Tegra194
exynos:
- DMA/IOMMU refactoring
- plane alpha + blend mode support
- Color format fixes for mixer driver
rcar-du:
- R8A7744 and R8A77470 support
- R8A77965 LVDS support
imx:
- fbdev emulation fix
- multi-tiled scalling fixes
- SPDX identifiers
rockchip
- dw_hdmi support
- dw-mipi-dsi + dual dsi support
- mailbox read size fix
qxl:
- fix cursor pinning
vc4:
- YUV support (scaling + cursor)
v3d:
- enable TFU (Texture Formatting Unit)
mali-dp:
- add support for linear tiled formats
sun4i:
- Display Engine 3 support
- H6 DE3 mixer 0 support
- H6 display engine support
- dw-hdmi support
- H6 HDMI phy support
- implicit fence waiting
- BGRX8888 support
meson:
- Overlay plane support
- implicit fence waiting
- HDMI 1.4 4k modes
bridge:
- i2c fixes for sii902x"
* tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm: (1403 commits)
drm/amd/display: Add fast path for cursor plane updates
drm/amdgpu: Enable GPU recovery by default for CI
drm/amd/display: Fix duplicating scaling/underscan connector state
drm/amd/display: Fix unintialized max_bpc state values
Revert "drm/amd/display: Set RMX_ASPECT as default"
drm/amdgpu: Fix stub function name
drm/msm/dpu: Fix clock issue after bind failure
drm/msm/dpu: Clean up dpu_media_info.h static inline functions
drm/msm/dpu: Further cleanups for static inline functions
drm/msm/dpu: Cleanup the debugfs functions
drm/msm/dpu: Remove dpu_irq and unused functions
drm/msm: Make irq_postinstall optional
drm/msm/dpu: Cleanup callers of dpu_hw_blk_init
drm/msm/dpu: Remove unused functions
drm/msm/dpu: Remove dpu_crtc_is_enabled()
drm/msm/dpu: Remove dpu_crtc_get_mixer_height
drm/msm/dpu: Remove dpu_dbg
drm/msm: dpu: Remove crtc_lock
drm/msm: dpu: Remove vblank_requested flag from dpu_crtc
drm/msm: dpu: Separate crtc assignment from vblank enable
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_scheduler.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_scheduler.c | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c new file mode 100644 index 000000000000..340faea6c08a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -0,0 +1,399 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include <linux/mutex.h> + +#include "i915_drv.h" +#include "i915_request.h" +#include "i915_scheduler.h" + +static DEFINE_SPINLOCK(schedule_lock); + +static const struct i915_request * +node_to_request(const struct i915_sched_node *node) +{ + return container_of(node, const struct i915_request, sched); +} + +static inline bool node_signaled(const struct i915_sched_node *node) +{ + return i915_request_completed(node_to_request(node)); +} + +void i915_sched_node_init(struct i915_sched_node *node) +{ + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; +} + +static struct i915_dependency * +i915_dependency_alloc(struct drm_i915_private *i915) +{ + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct drm_i915_private *i915, + struct i915_dependency *dep) +{ + kmem_cache_free(i915->dependencies, dep); +} + +bool __i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + bool ret = false; + + spin_lock(&schedule_lock); + + if (!node_signaled(signal)) { + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &node->signalers_list); + dep->signaler = signal; + dep->flags = flags; + + ret = true; + } + + spin_unlock(&schedule_lock); + + return ret; +} + +int i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(i915); + if (!dep) + return -ENOMEM; + + if (!__i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC)) + i915_dependency_free(i915, dep); + + return 0; +} + +void i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) +{ + struct i915_dependency *dep, *tmp; + + GEM_BUG_ON(!list_empty(&node->link)); + + spin_lock(&schedule_lock); + + /* + * Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!node_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + spin_unlock(&schedule_lock); +} + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static void assert_priolists(struct intel_engine_execlists * const execlists, + long queue_priority) +{ + struct rb_node *rb; + long last_prio, i; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + GEM_BUG_ON(rb_first_cached(&execlists->queue) != + rb_first(&execlists->queue.rb_root)); + + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + const struct i915_priolist *p = to_priolist(rb); + + GEM_BUG_ON(p->priority >= last_prio); + last_prio = p->priority; + + GEM_BUG_ON(!p->used); + for (i = 0; i < ARRAY_SIZE(p->requests); i++) { + if (list_empty(&p->requests[i])) + continue; + + GEM_BUG_ON(!(p->used & BIT(i))); + } + } +} + +struct list_head * +i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_priolist *p; + struct rb_node **parent, *rb; + bool first = true; + int idx, i; + + lockdep_assert_held(&engine->timeline.lock); + assert_priolists(execlists, INT_MAX); + + /* buckets sorted from highest [in slot 0] to lowest priority */ + idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; + prio >>= I915_USER_PRIORITY_SHIFT; + if (unlikely(execlists->no_priolist)) + prio = I915_PRIORITY_NORMAL; + +find_priolist: + /* most positive priority is scheduled first, equal priorities fifo */ + rb = NULL; + parent = &execlists->queue.rb_root.rb_node; + while (*parent) { + rb = *parent; + p = to_priolist(rb); + if (prio > p->priority) { + parent = &rb->rb_left; + } else if (prio < p->priority) { + parent = &rb->rb_right; + first = false; + } else { + goto out; + } + } + + if (prio == I915_PRIORITY_NORMAL) { + p = &execlists->default_priolist; + } else { + p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); + /* Convert an allocation failure to a priority bump */ + if (unlikely(!p)) { + prio = I915_PRIORITY_NORMAL; /* recurses just once */ + + /* To maintain ordering with all rendering, after an + * allocation failure we have to disable all scheduling. + * Requests will then be executed in fifo, and schedule + * will ensure that dependencies are emitted in fifo. + * There will be still some reordering with existing + * requests, so if userspace lied about their + * dependencies that reordering may be visible. + */ + execlists->no_priolist = true; + goto find_priolist; + } + } + + p->priority = prio; + for (i = 0; i < ARRAY_SIZE(p->requests); i++) + INIT_LIST_HEAD(&p->requests[i]); + rb_link_node(&p->node, rb, parent); + rb_insert_color_cached(&p->node, &execlists->queue, first); + p->used = 0; + +out: + p->used |= BIT(idx); + return &p->requests[idx]; +} + +static struct intel_engine_cs * +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) +{ + struct intel_engine_cs *engine = node_to_request(node)->engine; + + GEM_BUG_ON(!locked); + + if (engine != locked) { + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); + } + + return engine; +} + +static void __i915_schedule(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ + struct list_head *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; + struct i915_dependency *dep, *p; + struct i915_dependency stack; + const int prio = attr->priority; + LIST_HEAD(dfs); + + /* Needed in order to use the temporary link inside i915_dependency */ + lockdep_assert_held(&schedule_lock); + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + + if (i915_request_completed(rq)) + return; + + if (prio <= READ_ONCE(rq->sched.attr.priority)) + return; + + stack.signaler = &rq->sched; + list_add(&stack.dfs_link, &dfs); + + /* + * Recursively bump all dependent priorities to match the new request. + * + * A naive approach would be to use recursion: + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) + * update_priorities(dep->signal, prio) + * queue_request(node); + * } + * but that may have unlimited recursion depth and so runs a very + * real risk of overunning the kernel stack. Instead, we build + * a flat list of all dependencies starting with the current request. + * As we walk the list of dependencies, we add all of its dependencies + * to the end of the list (this may include an already visited + * request) and continue to walk onwards onto the new dependencies. The + * end result is a topological list of requests in reverse order, the + * last element in the list is the request we must execute first. + */ + list_for_each_entry(dep, &dfs, dfs_link) { + struct i915_sched_node *node = dep->signaler; + + /* + * Within an engine, there can be no cycle, but we may + * refer to the same dependency chain multiple times + * (redundant dependencies are not eliminated) and across + * engines. + */ + list_for_each_entry(p, &node->signalers_list, signal_link) { + GEM_BUG_ON(p == dep); /* no cycles! */ + + if (node_signaled(p->signaler)) + continue; + + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) + list_move_tail(&p->dfs_link, &dfs); + } + } + + /* + * If we didn't need to bump any existing priorities, and we haven't + * yet submitted this request (i.e. there is no potential race with + * execlists_submit_request()), we can set our own priority and skip + * acquiring the engine locks. + */ + if (rq->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&rq->sched.link)); + rq->sched.attr = *attr; + + if (stack.dfs_link.next == stack.dfs_link.prev) + return; + + __list_del_entry(&stack.dfs_link); + } + + last = NULL; + engine = rq->engine; + spin_lock_irq(&engine->timeline.lock); + + /* Fifo and depth-first replacement ensure our deps execute before us */ + list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_sched_node *node = dep->signaler; + + INIT_LIST_HEAD(&dep->dfs_link); + + engine = sched_lock_engine(node, engine); + + /* Recheck after acquiring the engine->timeline.lock */ + if (prio <= node->attr.priority || node_signaled(node)) + continue; + + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = i915_sched_lookup_priolist(engine, prio); + last = engine; + } + list_move_tail(&node->link, pl); + } else { + /* + * If the request is not in the priolist queue because + * it is not yet runnable, then it doesn't contribute + * to our preemption decisions. On the other hand, + * if the request is on the HW, it too is not in the + * queue; but in that case we may still need to reorder + * the inflight requests. + */ + if (!i915_sw_fence_done(&node_to_request(node)->submit)) + continue; + } + + if (prio <= engine->execlists.queue_priority) + continue; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (node_to_request(node)->global_seqno && + i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, + node_to_request(node)->global_seqno)) + continue; + + /* Defer (tasklet) submission until after all of our updates. */ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->timeline.lock); +} + +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +{ + spin_lock(&schedule_lock); + __i915_schedule(rq, attr); + spin_unlock(&schedule_lock); +} + +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) +{ + struct i915_sched_attr attr; + + GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); + + if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + return; + + spin_lock_bh(&schedule_lock); + + attr = rq->sched.attr; + attr.priority |= bump; + __i915_schedule(rq, &attr); + + spin_unlock_bh(&schedule_lock); +} |