diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 15:49:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 15:49:32 -0700 |
commit | f377ea88b862bf7151be96d276f4cb740f8e1c41 (patch) | |
tree | 6205913431c012e285316281b6221a20d4a92128 /drivers/gpu/drm/i915/intel_ringbuffer.c | |
parent | 51e771c0d25b43d0f12b2c7c01939942becbbe28 (diff) | |
parent | 73bf1b7be7aab60d7c651402441dd0b0b4991098 (diff) |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main pull request for the drm for 4.3. Nouveau is
probably the biggest amount of changes in here, since it missed 4.2.
Highlights below, along with the usual bunch of fixes.
All stuff outside drm should have applicable acks.
Highlights:
- new drivers:
freescale dcu kms driver
- core:
more atomic fixes
disable some dri1 interfaces on kms drivers
drop fb panic handling, this was just getting more broken, as more locking was required.
new core fbdev Kconfig support - instead of each driver enable/disabling it
struct_mutex cleanups
- panel:
more new panels
cleanup Kconfig
- i915:
Skylake support enabled by default
legacy modesetting using atomic infrastructure
Skylake fixes
GEN9 workarounds
- amdgpu:
Fiji support
CGS support for amdgpu
Initial GPU scheduler - off by default
Lots of bug fixes and optimisations.
- radeon:
DP fixes
misc fixes
- amdkfd:
Add Carrizo support for amdkfd using amdgpu.
- nouveau:
long pending cleanup to complete driver,
fully bisectable which makes it larger,
perfmon work
more reclocking improvements
maxwell displayport fixes
- vmwgfx:
new DX device support, supports OpenGL 3.3
screen targets support
- mgag200:
G200eW support
G200e new revision support
- msm:
dragonboard 410c support, msm8x94 support, msm8x74v1 support
yuv format support
dma plane support
mdp5 rotation
initial hdcp
- sti:
atomic support
- exynos:
lots of cleanups
atomic modesetting/pageflipping support
render node support
- tegra:
tegra210 support (dc, dsi, dp/hdmi)
dpms with atomic modesetting support
- atmel:
support for 3 more atmel SoCs
new input formats, PRIME support.
- dwhdmi:
preparing to add audio support
- rockchip:
yuv plane support"
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1369 commits)
drm/amdgpu: rename gmc_v8_0_init_compute_vmid
drm/amdgpu: fix vce3 instance handling
drm/amdgpu: remove ib test for the second VCE Ring
drm/amdgpu: properly enable VM fault interrupts
drm/amdgpu: fix warning in scheduler
drm/amdgpu: fix buffer placement under memory pressure
drm/amdgpu/cz: fix cz_dpm_update_low_memory_pstate logic
drm/amdgpu: fix typo in dce11 watermark setup
drm/amdgpu: fix typo in dce10 watermark setup
drm/amdgpu: use top down allocation for non-CPU accessible vram
drm/amdgpu: be explicit about cpu vram access for driver BOs (v2)
drm/amdgpu: set MEC doorbell range for Fiji
drm/amdgpu: implement burst NOP for SDMA
drm/amdgpu: add insert_nop ring func and default implementation
drm/amdgpu: add amdgpu_get_sdma_instance helper function
drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES
drm/amdgpu: add burst_nop flag for sdma
drm/amdgpu: add count field for the SDMA NOP packet v2
drm/amdgpu: use PT for VM sync on unmap
drm/amdgpu: make wait_event uninterruptible in push_job
...
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 408 |
1 files changed, 260 insertions, 148 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3817a6f00d9e..6e6b8db996ef 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -81,7 +81,7 @@ bool intel_ring_stopped(struct intel_engine_cs *ring) return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); } -void __intel_ring_advance(struct intel_engine_cs *ring) +static void __intel_ring_advance(struct intel_engine_cs *ring) { struct intel_ringbuffer *ringbuf = ring->buffer; ringbuf->tail &= ringbuf->size - 1; @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring) } static int -gen2_render_ring_flush(struct intel_engine_cs *ring, +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 cmd; int ret; @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, } static int -gen4_render_ring_flush(struct intel_engine_cs *ring, +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; u32 cmd; int ret; @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, (IS_G4X(dev) || IS_GEN5(dev))) cmd |= MI_INVALIDATE_ISP; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -212,13 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; - - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -231,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -247,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) } static int -gen6_render_ring_flush(struct intel_engine_cs *ring, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, + u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(ring); + ret = intel_emit_post_sync_nonzero_flush(req); if (ret) return ret; @@ -285,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -299,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, } static int -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -318,9 +322,10 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) } static int -gen7_render_ring_flush(struct intel_engine_cs *ring, +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -362,10 +367,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(ring); + gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -379,12 +384,13 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, } static int -gen8_emit_pipe_control(struct intel_engine_cs *ring, +gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -400,11 +406,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring, } static int -gen8_render_ring_flush(struct intel_engine_cs *ring, +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { u32 flags = 0; - u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; flags |= PIPE_CONTROL_CS_STALL; @@ -424,7 +430,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(ring, + ret = gen8_emit_pipe_control(req, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD, 0); @@ -432,7 +438,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, return ret; } - return gen8_emit_pipe_control(ring, flags, scratch_addr); + return gen8_emit_pipe_control(req, flags, scratch_addr); } static void ring_write_tail(struct intel_engine_cs *ring, @@ -703,10 +709,10 @@ err: return ret; } -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, - struct intel_context *ctx) +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct i915_workarounds *w = &dev_priv->workarounds; @@ -715,11 +721,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; ring->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(ring); + ret = intel_ring_flush_all_caches(req); if (ret) return ret; - ret = intel_ring_begin(ring, (w->count * 2 + 2)); + ret = intel_ring_begin(req, (w->count * 2 + 2)); if (ret) return ret; @@ -733,7 +739,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, intel_ring_advance(ring); ring->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(ring); + ret = intel_ring_flush_all_caches(req); if (ret) return ret; @@ -742,16 +748,15 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; } -static int intel_rcs_ctx_init(struct intel_engine_cs *ring, - struct intel_context *ctx) +static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; - ret = intel_ring_workarounds_emit(ring, ctx); + ret = intel_ring_workarounds_emit(req); if (ret != 0) return ret; - ret = i915_gem_render_state_init(ring); + ret = i915_gem_render_state_init(req); if (ret) DRM_ERROR("init render state: %d\n", ret); @@ -775,11 +780,11 @@ static int wa_add(struct drm_i915_private *dev_priv, return 0; } -#define WA_REG(addr, mask, val) { \ +#define WA_REG(addr, mask, val) do { \ const int r = wa_add(dev_priv, (addr), (mask), (val)); \ if (r) \ return r; \ - } + } while (0) #define WA_SET_BIT_MASKED(addr, mask) \ WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) @@ -800,6 +805,11 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -861,6 +871,11 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + /* WaDisablePartialInstShootdown:chv */ /* WaDisableThreadStallDopClockGating:chv */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -931,8 +946,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */ WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, GEN9_RHWO_OPTIMIZATION_DISABLE); - WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0, - DISABLE_PIXEL_MASK_CAMMING); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ } if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) || @@ -1023,13 +1041,6 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HIZ_CHICKEN, BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); - if (INTEL_REVID(dev) == SKL_REVID_C0 || - INTEL_REVID(dev) == SKL_REVID_D0) - /* WaBarrierPerformanceFixDisable:skl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE | - HDC_BARRIER_PERFORMANCE_DISABLE); - if (INTEL_REVID(dev) <= SKL_REVID_D0) { /* *Use Force Non-Coherent whenever executing a 3D context. This @@ -1041,6 +1052,20 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) HDC_FORCE_NON_COHERENT); } + if (INTEL_REVID(dev) == SKL_REVID_C0 || + INTEL_REVID(dev) == SKL_REVID_D0) + /* WaBarrierPerformanceFixDisable:skl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE | + HDC_BARRIER_PERFORMANCE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:skl */ + if (INTEL_REVID(dev) <= SKL_REVID_F0) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + return skl_tune_iz_hashing(ring); } @@ -1105,9 +1130,9 @@ static int init_render_ring(struct intel_engine_cs *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * - * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv */ - if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9) + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); /* Required for the hardware to program scanline values for waiting */ @@ -1132,7 +1157,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); } - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); if (HAS_L3_DPF(dev)) @@ -1155,10 +1180,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) intel_fini_pipe_control(ring); } -static int gen8_rcs_signal(struct intel_engine_cs *signaller, +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 8 + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *waiter; @@ -1168,7 +1194,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; @@ -1178,8 +1204,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | @@ -1196,10 +1221,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, return 0; } -static int gen8_xcs_signal(struct intel_engine_cs *signaller, +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 6 + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *waiter; @@ -1209,7 +1235,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; @@ -1219,8 +1245,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); intel_ring_emit(signaller, lower_32_bits(gtt_offset) | @@ -1235,9 +1260,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, return 0; } -static int gen6_signal(struct intel_engine_cs *signaller, +static int gen6_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *useless; @@ -1248,15 +1274,14 @@ static int gen6_signal(struct intel_engine_cs *signaller, num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; for_each_ring(useless, dev_priv, i) { u32 mbox_reg = signaller->semaphore.mbox.signal[i]; if (mbox_reg != GEN6_NOSYNC) { - u32 seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + u32 seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(signaller, mbox_reg); intel_ring_emit(signaller, seqno); @@ -1272,30 +1297,29 @@ static int gen6_signal(struct intel_engine_cs *signaller, /** * gen6_add_request - Update the semaphore mailbox registers - * - * @ring - ring that is adding a request - * @seqno - return seqno stuck into the ring + * + * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ static int -gen6_add_request(struct intel_engine_cs *ring) +gen6_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; if (ring->semaphore.signal) - ret = ring->semaphore.signal(ring, 4); + ret = ring->semaphore.signal(req, 4); else - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, MI_USER_INTERRUPT); __intel_ring_advance(ring); @@ -1318,14 +1342,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, */ static int -gen8_ring_sync(struct intel_engine_cs *waiter, +gen8_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { + struct intel_engine_cs *waiter = waiter_req->ring; struct drm_i915_private *dev_priv = waiter->dev->dev_private; int ret; - ret = intel_ring_begin(waiter, 4); + ret = intel_ring_begin(waiter_req, 4); if (ret) return ret; @@ -1343,10 +1368,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter, } static int -gen6_ring_sync(struct intel_engine_cs *waiter, +gen6_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { + struct intel_engine_cs *waiter = waiter_req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; @@ -1361,7 +1387,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter, WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(waiter, 4); + ret = intel_ring_begin(waiter_req, 4); if (ret) return ret; @@ -1392,8 +1418,9 @@ do { \ } while (0) static int -pc_render_add_request(struct intel_engine_cs *ring) +pc_render_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -1405,7 +1432,7 @@ pc_render_add_request(struct intel_engine_cs *ring) * incoherence by flushing the 6 PIPE_NOTIFY buffers out to * memory before requesting an interrupt. */ - ret = intel_ring_begin(ring, 32); + ret = intel_ring_begin(req, 32); if (ret) return ret; @@ -1413,8 +1440,7 @@ pc_render_add_request(struct intel_engine_cs *ring) PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, 0); PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ @@ -1433,8 +1459,7 @@ pc_render_add_request(struct intel_engine_cs *ring) PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_NOTIFY); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, 0); __intel_ring_advance(ring); @@ -1585,13 +1610,14 @@ i8xx_ring_put_irq(struct intel_engine_cs *ring) } static int -bsd_ring_flush(struct intel_engine_cs *ring, +bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -1602,18 +1628,18 @@ bsd_ring_flush(struct intel_engine_cs *ring, } static int -i9xx_add_request(struct intel_engine_cs *ring) +i9xx_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, MI_USER_INTERRUPT); __intel_ring_advance(ring); @@ -1745,13 +1771,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) } static int -i965_dispatch_execbuffer(struct intel_engine_cs *ring, +i965_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -1771,14 +1798,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_dispatch_execbuffer(struct intel_engine_cs *ring, +i830_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; u32 cs_offset = ring->scratch.gtt_offset; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -1795,7 +1823,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(ring, 6 + 2); + ret = intel_ring_begin(req, 6 + 2); if (ret) return ret; @@ -1818,7 +1846,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, offset = cs_offset; } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -1833,13 +1861,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -i915_dispatch_execbuffer(struct intel_engine_cs *ring, +i915_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -2082,7 +2111,6 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) intel_unpin_ringbuffer_obj(ringbuf); intel_destroy_ringbuffer_obj(ringbuf); - i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); if (ring->cleanup) ring->cleanup(ring); @@ -2106,6 +2134,9 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) if (intel_ring_space(ringbuf) >= n) return 0; + /* The whole point of reserving space is to not wait! */ + WARN_ON(ringbuf->reserved_in_use); + list_for_each_entry(request, &ring->request_list, list) { space = __intel_ring_space(request->postfix, ringbuf->tail, ringbuf->size); @@ -2124,18 +2155,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) return 0; } -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) { uint32_t __iomem *virt; - struct intel_ringbuffer *ringbuf = ring->buffer; int rem = ringbuf->size - ringbuf->tail; - if (ringbuf->space < rem) { - int ret = ring_wait_for_space(ring, rem); - if (ret) - return ret; - } - virt = ringbuf->virtual_start + ringbuf->tail; rem /= 4; while (rem--) @@ -2143,21 +2167,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) ringbuf->tail = 0; intel_ring_update_space(ringbuf); - - return 0; } int intel_ring_idle(struct intel_engine_cs *ring) { struct drm_i915_gem_request *req; - int ret; - - /* We need to add any requests required to flush the objects and ring */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request(ring); - if (ret) - return ret; - } /* Wait upon the last request to be completed */ if (list_empty(&ring->request_list)) @@ -2180,33 +2194,126 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) return 0; } -static int __intel_ring_prepare(struct intel_engine_cs *ring, - int bytes) +int intel_ring_reserve_space(struct drm_i915_gem_request *request) +{ + /* + * The first call merely notes the reserve request and is common for + * all back ends. The subsequent localised _begin() call actually + * ensures that the reservation is available. Without the begin, if + * the request creator immediately submitted the request without + * adding any commands to it then there might not actually be + * sufficient room for the submission commands. + */ + intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST); + + return intel_ring_begin(request, 0); +} + +void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size) +{ + WARN_ON(ringbuf->reserved_size); + WARN_ON(ringbuf->reserved_in_use); + + ringbuf->reserved_size = size; +} + +void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf) +{ + WARN_ON(ringbuf->reserved_in_use); + + ringbuf->reserved_size = 0; + ringbuf->reserved_in_use = false; +} + +void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf) +{ + WARN_ON(ringbuf->reserved_in_use); + + ringbuf->reserved_in_use = true; + ringbuf->reserved_tail = ringbuf->tail; +} + +void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf) +{ + WARN_ON(!ringbuf->reserved_in_use); + if (ringbuf->tail > ringbuf->reserved_tail) { + WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size, + "request reserved size too small: %d vs %d!\n", + ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size); + } else { + /* + * The ring was wrapped while the reserved space was in use. + * That means that some unknown amount of the ring tail was + * no-op filled and skipped. Thus simply adding the ring size + * to the tail and doing the above space check will not work. + * Rather than attempt to track how much tail was skipped, + * it is much simpler to say that also skipping the sanity + * check every once in a while is not a big issue. + */ + } + + ringbuf->reserved_size = 0; + ringbuf->reserved_in_use = false; +} + +static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes) { struct intel_ringbuffer *ringbuf = ring->buffer; - int ret; + int remain_usable = ringbuf->effective_size - ringbuf->tail; + int remain_actual = ringbuf->size - ringbuf->tail; + int ret, total_bytes, wait_bytes = 0; + bool need_wrap = false; - if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { - ret = intel_wrap_ring_buffer(ring); - if (unlikely(ret)) - return ret; + if (ringbuf->reserved_in_use) + total_bytes = bytes; + else + total_bytes = bytes + ringbuf->reserved_size; + + if (unlikely(bytes > remain_usable)) { + /* + * Not enough space for the basic request. So need to flush + * out the remainder and then wait for base + reserved. + */ + wait_bytes = remain_actual + total_bytes; + need_wrap = true; + } else { + if (unlikely(total_bytes > remain_usable)) { + /* + * The base request will fit but the reserved space + * falls off the end. So only need to to wait for the + * reserved size after flushing out the remainder. + */ + wait_bytes = remain_actual + ringbuf->reserved_size; + need_wrap = true; + } else if (total_bytes > ringbuf->space) { + /* No wrapping required, just waiting. */ + wait_bytes = total_bytes; + } } - if (unlikely(ringbuf->space < bytes)) { - ret = ring_wait_for_space(ring, bytes); + if (wait_bytes) { + ret = ring_wait_for_space(ring, wait_bytes); if (unlikely(ret)) return ret; + + if (need_wrap) + __wrap_ring_buffer(ringbuf); } return 0; } -int intel_ring_begin(struct intel_engine_cs *ring, +int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_engine_cs *ring; + struct drm_i915_private *dev_priv; int ret; + WARN_ON(req == NULL); + ring = req->ring; + dev_priv = ring->dev->dev_private; + ret = i915_gem_check_wedge(&dev_priv->gpu_error, dev_priv->mm.interruptible); if (ret) @@ -2216,18 +2323,14 @@ int intel_ring_begin(struct intel_engine_cs *ring, if (ret) return ret; - /* Preallocate the olr before touching the ring */ - ret = i915_gem_request_alloc(ring, ring->default_context); - if (ret) - return ret; - ring->buffer->space -= num_dwords * sizeof(uint32_t); return 0; } /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct intel_engine_cs *ring) +int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; @@ -2235,7 +2338,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(ring, num_dwords); + ret = intel_ring_begin(req, num_dwords); if (ret) return ret; @@ -2252,8 +2355,6 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - BUG_ON(ring->outstanding_lazy_request); - if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) { I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); @@ -2298,13 +2399,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); } -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { + struct intel_engine_cs *ring = req->ring; uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -2342,20 +2444,23 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, } static int -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; bool ppgtt = USES_PPGTT(ring->dev) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2365,20 +2470,23 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); @@ -2387,13 +2495,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -2410,14 +2519,15 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct intel_engine_cs *ring, +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -2818,26 +2928,28 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) } int -intel_ring_flush_all_caches(struct intel_engine_cs *ring) +intel_ring_flush_all_caches(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; if (!ring->gpu_caches_dirty) return 0; - ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS); if (ret) return ret; - trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); + trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); ring->gpu_caches_dirty = false; return 0; } int -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; uint32_t flush_domains; int ret; @@ -2845,11 +2957,11 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) if (ring->gpu_caches_dirty) flush_domains = I915_GEM_GPU_DOMAINS; - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); if (ret) return ret; - trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); ring->gpu_caches_dirty = false; return 0; |