diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 15:49:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 15:49:32 -0700 |
commit | f377ea88b862bf7151be96d276f4cb740f8e1c41 (patch) | |
tree | 6205913431c012e285316281b6221a20d4a92128 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 51e771c0d25b43d0f12b2c7c01939942becbbe28 (diff) | |
parent | 73bf1b7be7aab60d7c651402441dd0b0b4991098 (diff) |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main pull request for the drm for 4.3. Nouveau is
probably the biggest amount of changes in here, since it missed 4.2.
Highlights below, along with the usual bunch of fixes.
All stuff outside drm should have applicable acks.
Highlights:
- new drivers:
freescale dcu kms driver
- core:
more atomic fixes
disable some dri1 interfaces on kms drivers
drop fb panic handling, this was just getting more broken, as more locking was required.
new core fbdev Kconfig support - instead of each driver enable/disabling it
struct_mutex cleanups
- panel:
more new panels
cleanup Kconfig
- i915:
Skylake support enabled by default
legacy modesetting using atomic infrastructure
Skylake fixes
GEN9 workarounds
- amdgpu:
Fiji support
CGS support for amdgpu
Initial GPU scheduler - off by default
Lots of bug fixes and optimisations.
- radeon:
DP fixes
misc fixes
- amdkfd:
Add Carrizo support for amdkfd using amdgpu.
- nouveau:
long pending cleanup to complete driver,
fully bisectable which makes it larger,
perfmon work
more reclocking improvements
maxwell displayport fixes
- vmwgfx:
new DX device support, supports OpenGL 3.3
screen targets support
- mgag200:
G200eW support
G200e new revision support
- msm:
dragonboard 410c support, msm8x94 support, msm8x74v1 support
yuv format support
dma plane support
mdp5 rotation
initial hdcp
- sti:
atomic support
- exynos:
lots of cleanups
atomic modesetting/pageflipping support
render node support
- tegra:
tegra210 support (dc, dsi, dp/hdmi)
dpms with atomic modesetting support
- atmel:
support for 3 more atmel SoCs
new input formats, PRIME support.
- dwhdmi:
preparing to add audio support
- rockchip:
yuv plane support"
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1369 commits)
drm/amdgpu: rename gmc_v8_0_init_compute_vmid
drm/amdgpu: fix vce3 instance handling
drm/amdgpu: remove ib test for the second VCE Ring
drm/amdgpu: properly enable VM fault interrupts
drm/amdgpu: fix warning in scheduler
drm/amdgpu: fix buffer placement under memory pressure
drm/amdgpu/cz: fix cz_dpm_update_low_memory_pstate logic
drm/amdgpu: fix typo in dce11 watermark setup
drm/amdgpu: fix typo in dce10 watermark setup
drm/amdgpu: use top down allocation for non-CPU accessible vram
drm/amdgpu: be explicit about cpu vram access for driver BOs (v2)
drm/amdgpu: set MEC doorbell range for Fiji
drm/amdgpu: implement burst NOP for SDMA
drm/amdgpu: add insert_nop ring func and default implementation
drm/amdgpu: add amdgpu_get_sdma_instance helper function
drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES
drm/amdgpu: add burst_nop flag for sdma
drm/amdgpu: add count field for the SDMA NOP packet v2
drm/amdgpu: use PT for VM sync on unmap
drm/amdgpu: make wait_event uninterruptible in push_job
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 861 |
1 files changed, 298 insertions, 563 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7a918d1c12ba..4d631a946481 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,11 +46,6 @@ static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj); static void i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj); -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -66,18 +61,6 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) -{ - if (obj->tiling_mode) - i915_gem_release_mmap(obj); - - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} - /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -149,14 +132,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_get_aperture *args = data; - struct drm_i915_gem_object *obj; + struct i915_gtt *ggtt = &dev_priv->gtt; + struct i915_vma *vma; size_t pinned; pinned = 0; mutex_lock(&dev->struct_mutex); - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - pinned += i915_gem_obj_ggtt_size(obj); + list_for_each_entry(vma, &ggtt->base.active_list, mm_list) + if (vma->pin_count) + pinned += vma->node.size; + list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) + if (vma->pin_count) + pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); args->aper_size = dev_priv->gtt.base.total; @@ -347,7 +334,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { unsigned long unwritten; @@ -368,7 +355,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, i915_gem_chipset_flush(dev); out: - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; } @@ -801,7 +788,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); + intel_fb_obj_invalidate(obj, ORIGIN_GTT); while (remain > 0) { /* Operation in this page @@ -832,7 +819,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, } out_flush: - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_GTT); out_unpin: i915_gem_object_ggtt_unpin(obj); out: @@ -945,7 +932,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, if (ret) return ret; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); i915_gem_object_pin_pages(obj); @@ -1025,7 +1012,7 @@ out: if (needs_clflush_after) i915_gem_chipset_flush(dev); - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; } @@ -1146,23 +1133,6 @@ i915_gem_check_wedge(struct i915_gpu_error *error, return 0; } -/* - * Compare arbitrary request against outstanding lazy request. Emit on match. - */ -int -i915_gem_check_olr(struct drm_i915_gem_request *req) -{ - int ret; - - WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); - - ret = 0; - if (req == req->ring->outstanding_lazy_request) - ret = i915_add_request(req->ring); - - return ret; -} - static void fake_irq(unsigned long data) { wake_up_process((struct task_struct *)data); @@ -1334,6 +1304,33 @@ out: return ret; } +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->ring->dev->dev_private; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + req->pid = get_pid(task_pid(current)); + + return 0; +} + static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { @@ -1346,6 +1343,9 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) list_del(&request->client_list); request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); + + put_pid(request->pid); + request->pid = NULL; } static void i915_gem_request_retire(struct drm_i915_gem_request *request) @@ -1365,8 +1365,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(&request->list); i915_gem_request_remove_from_client(request); - put_pid(request->pid); - i915_gem_request_unreference(request); } @@ -1415,10 +1413,6 @@ i915_wait_request(struct drm_i915_gem_request *req) if (ret) return ret; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - ret = __i915_wait_request(req, atomic_read(&dev_priv->gpu_error.reset_counter), interruptible, NULL, NULL); @@ -1518,10 +1512,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) return 0; - ret = i915_gem_check_olr(req); - if (ret) - goto err; - requests[n++] = i915_gem_request_reference(req); } else { for (i = 0; i < I915_NUM_RINGS; i++) { @@ -1531,10 +1521,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) continue; - ret = i915_gem_check_olr(req); - if (ret) - goto err; - requests[n++] = i915_gem_request_reference(req); } } @@ -1545,7 +1531,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, NULL, rps); mutex_lock(&dev->struct_mutex); -err: for (i = 0; i < n; i++) { if (ret == 0) i915_gem_object_retire_request(obj, requests[i]); @@ -1613,6 +1598,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + if (write_domain != 0) + intel_fb_obj_invalidate(obj, + write_domain == I915_GEM_DOMAIN_GTT ? + ORIGIN_GTT : ORIGIN_CPU); + unref: drm_gem_object_unreference(&obj->base); unlock: @@ -2349,9 +2339,12 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } void i915_vma_move_to_active(struct i915_vma *vma, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_engine_cs *ring; + + ring = i915_gem_request_get_ring(req); /* Add a reference if we're newly entering the active list. */ if (obj->active == 0) @@ -2359,8 +2352,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->active |= intel_ring_flag(ring); list_move_tail(&obj->ring_list[ring->id], &ring->active_list); - i915_gem_request_assign(&obj->last_read_req[ring->id], - intel_ring_get_request(ring)); + i915_gem_request_assign(&obj->last_read_req[ring->id], req); list_move_tail(&vma->mm_list, &vma->vm->active_list); } @@ -2372,7 +2364,7 @@ i915_gem_object_retire__write(struct drm_i915_gem_object *obj) RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); i915_gem_request_assign(&obj->last_write_req, NULL); - intel_fb_obj_flush(obj, true); + intel_fb_obj_flush(obj, true, ORIGIN_CS); } static void @@ -2393,6 +2385,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) if (obj->active) return; + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.bound_list); + list_for_each_entry(vma, &obj->vma_list, vma_link) { if (!list_empty(&vma->mm_list)) list_move_tail(&vma->mm_list, &vma->vm->inactive_list); @@ -2472,24 +2471,34 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } -int __i915_add_request(struct intel_engine_cs *ring, - struct drm_file *file, - struct drm_i915_gem_object *obj) +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, + struct drm_i915_gem_object *obj, + bool flush_caches) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct drm_i915_gem_request *request; + struct intel_engine_cs *ring; + struct drm_i915_private *dev_priv; struct intel_ringbuffer *ringbuf; u32 request_start; int ret; - request = ring->outstanding_lazy_request; if (WARN_ON(request == NULL)) - return -ENOMEM; + return; - if (i915.enable_execlists) { - ringbuf = request->ctx->engine[ring->id].ringbuf; - } else - ringbuf = ring->buffer; + ring = request->ring; + dev_priv = ring->dev->dev_private; + ringbuf = request->ringbuf; + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + intel_ring_reserved_space_use(ringbuf); request_start = intel_ring_get_tail(ringbuf); /* @@ -2499,14 +2508,13 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (i915.enable_execlists) { - ret = logical_ring_flush_all_caches(ringbuf, request->ctx); - if (ret) - return ret; - } else { - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; + if (flush_caches) { + if (i915.enable_execlists) + ret = logical_ring_flush_all_caches(request); + else + ret = intel_ring_flush_all_caches(request); + /* Not allowed to fail! */ + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); } /* Record the position of the start of the request so that @@ -2516,17 +2524,15 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->postfix = intel_ring_get_tail(ringbuf); - if (i915.enable_execlists) { - ret = ring->emit_request(ringbuf, request); - if (ret) - return ret; - } else { - ret = ring->add_request(ring); - if (ret) - return ret; + if (i915.enable_execlists) + ret = ring->emit_request(request); + else { + ret = ring->add_request(request); request->tail = intel_ring_get_tail(ringbuf); } + /* Not allowed to fail! */ + WARN(ret, "emit|add_request failed: %d!\n", ret); request->head = request_start; @@ -2538,34 +2544,11 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->batch_obj = obj; - if (!i915.enable_execlists) { - /* Hold a reference to the current context so that we can inspect - * it later in case a hangcheck error event fires. - */ - request->ctx = ring->last_context; - if (request->ctx) - i915_gem_context_reference(request->ctx); - } - request->emitted_jiffies = jiffies; ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); - request->file_priv = NULL; - - if (file) { - struct drm_i915_file_private *file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - request->file_priv = file_priv; - list_add_tail(&request->client_list, - &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - request->pid = get_pid(task_pid(current)); - } trace_i915_gem_request_add(request); - ring->outstanding_lazy_request = NULL; i915_queue_hangcheck(ring->dev); @@ -2574,7 +2557,8 @@ int __i915_add_request(struct intel_engine_cs *ring, round_jiffies_up_relative(HZ)); intel_mark_busy(dev_priv->dev); - return 0; + /* Sanity check that the reserved size was large enough. */ + intel_ring_reserved_space_end(ringbuf); } static bool i915_context_is_banned(struct drm_i915_private *dev_priv, @@ -2628,12 +2612,13 @@ void i915_gem_request_free(struct kref *req_ref) typeof(*req), ref); struct intel_context *ctx = req->ctx; + if (req->file_priv) + i915_gem_request_remove_from_client(req); + if (ctx) { if (i915.enable_execlists) { - struct intel_engine_cs *ring = req->ring; - - if (ctx != ring->default_context) - intel_lr_context_unpin(ring, ctx); + if (ctx != req->ring->default_context) + intel_lr_context_unpin(req); } i915_gem_context_unreference(ctx); @@ -2643,36 +2628,63 @@ void i915_gem_request_free(struct kref *req_ref) } int i915_gem_request_alloc(struct intel_engine_cs *ring, - struct intel_context *ctx) + struct intel_context *ctx, + struct drm_i915_gem_request **req_out) { struct drm_i915_private *dev_priv = to_i915(ring->dev); struct drm_i915_gem_request *req; int ret; - if (ring->outstanding_lazy_request) - return 0; + if (!req_out) + return -EINVAL; + + *req_out = NULL; req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (req == NULL) return -ENOMEM; - kref_init(&req->ref); - req->i915 = dev_priv; - ret = i915_gem_get_seqno(ring->dev, &req->seqno); if (ret) goto err; + kref_init(&req->ref); + req->i915 = dev_priv; req->ring = ring; + req->ctx = ctx; + i915_gem_context_reference(req->ctx); if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req, ctx); + ret = intel_logical_ring_alloc_request_extras(req); else ret = intel_ring_alloc_request_extras(req); - if (ret) + if (ret) { + i915_gem_context_unreference(req->ctx); goto err; + } + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + if (i915.enable_execlists) + ret = intel_logical_ring_reserve_space(req); + else + ret = intel_ring_reserve_space(req); + if (ret) { + /* + * At this point, the request is fully allocated even if not + * fully prepared. Thus it can be cleaned up using the proper + * free code. + */ + i915_gem_request_cancel(req); + return ret; + } - ring->outstanding_lazy_request = req; + *req_out = req; return 0; err: @@ -2680,6 +2692,13 @@ err: return ret; } +void i915_gem_request_cancel(struct drm_i915_gem_request *req) +{ + intel_ring_reserved_space_cancel(req->ringbuf); + + i915_gem_request_unreference(req); +} + struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *ring) { @@ -2741,7 +2760,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, list_del(&submit_req->execlist_link); if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(ring, submit_req->ctx); + intel_lr_context_unpin(submit_req); i915_gem_request_unreference(submit_req); } @@ -2762,30 +2781,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_request_retire(request); } - - /* This may not have been flushed before the reset, so clean it now */ - i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); -} - -void i915_gem_restore_fences(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - - /* - * Commit delayed tiling changes if we have an object still - * attached to the fence, otherwise just clear the fence. - */ - if (reg->obj) { - i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); - } else { - i915_gem_write_fence(dev, i, NULL); - } - } } void i915_gem_reset(struct drm_device *dev) @@ -2947,7 +2942,7 @@ i915_gem_idle_work_handler(struct work_struct *work) static int i915_gem_object_flush_active(struct drm_i915_gem_object *obj) { - int ret, i; + int i; if (!obj->active) return 0; @@ -2962,10 +2957,6 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (list_empty(&req->list)) goto retire; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - if (i915_gem_request_completed(req, true)) { __i915_gem_request_retire__upto(req); retire: @@ -3068,25 +3059,22 @@ out: static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, - struct drm_i915_gem_request *req) + struct drm_i915_gem_request *from_req, + struct drm_i915_gem_request **to_req) { struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_ring(req); + from = i915_gem_request_get_ring(from_req); if (to == from) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(from_req, true)) return 0; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - if (!i915_semaphore_is_enabled(obj->base.dev)) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(req, + ret = __i915_wait_request(from_req, atomic_read(&i915->gpu_error.reset_counter), i915->mm.interruptible, NULL, @@ -3094,16 +3082,24 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (ret) return ret; - i915_gem_object_retire_request(obj, req); + i915_gem_object_retire_request(obj, from_req); } else { int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(req); + u32 seqno = i915_gem_request_get_seqno(from_req); + + WARN_ON(!to_req); if (seqno <= from->semaphore.sync_seqno[idx]) return 0; - trace_i915_gem_ring_sync_to(from, to, req); - ret = to->semaphore.sync_to(to, from, seqno); + if (*to_req == NULL) { + ret = i915_gem_request_alloc(to, to->default_context, to_req); + if (ret) + return ret; + } + + trace_i915_gem_ring_sync_to(*to_req, from, from_req); + ret = to->semaphore.sync_to(*to_req, from, seqno); if (ret) return ret; @@ -3123,11 +3119,14 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * * @obj: object which may be in use on another ring. * @to: ring we wish to use the object on. May be NULL. + * @to_req: request we wish to use the object for. See below. + * This will be allocated and returned if a request is + * required but not passed in. * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow on engine to write + * between engines inside the GPU. We only allow one engine to write * into a buffer at any time, but multiple readers. To ensure each has * a coherent view of memory, we must: * @@ -3138,11 +3137,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * + * For CPU synchronisation (NULL to) no request is required. For syncing with + * rings to_req must be non-NULL. However, a request does not have to be + * pre-allocated. If *to_req is NULL and sync commands will be emitted then a + * request will be allocated automatically and returned through *to_req. Note + * that it is not guaranteed that commands will be emitted (because the system + * might already be idle). Hence there is no need to create a request that + * might never have any work submitted. Note further that if a request is + * returned in *to_req, it is the responsibility of the caller to submit + * that request (after potentially adding more work to it). + * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to) + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req) { const bool readonly = obj->base.pending_write_domain == 0; struct drm_i915_gem_request *req[I915_NUM_RINGS]; @@ -3164,7 +3174,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, req[n++] = obj->last_read_req[i]; } for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i]); + ret = __i915_gem_object_sync(obj, to, req[i], to_req); if (ret) return ret; } @@ -3275,354 +3285,27 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { if (!i915.enable_execlists) { - ret = i915_switch_context(ring, ring->default_context); + struct drm_i915_gem_request *req; + + ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) return ret; - } - ret = intel_ring_idle(ring); - if (ret) - return ret; - } - - WARN_ON(i915_verify_lists(dev)); - return 0; -} - -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int fence_reg; - int fence_pitch_shift; - - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg = FENCE_REG_SANDYBRIDGE_0; - fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; - } else { - fence_reg = FENCE_REG_965_0; - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; - } - - fence_reg += reg * 8; + ret = i915_switch_context(req); + if (ret) { + i915_gem_request_cancel(req); + return ret; + } - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg, 0); - POSTING_READ(fence_reg); - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint64_t val; - - /* Adjust fence size to match tiled area */ - if (obj->tiling_mode != I915_TILING_NONE) { - uint32_t row_size = obj->stride * - (obj->tiling_mode == I915_TILING_Y ? 32 : 8); - size = (size / row_size) * row_size; + i915_add_request_no_flush(req); } - val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & - 0xfffff000) << 32; - val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; - val |= I965_FENCE_REG_VALID; - - I915_WRITE(fence_reg + 4, val >> 32); - POSTING_READ(fence_reg + 4); - - I915_WRITE(fence_reg + 0, val); - POSTING_READ(fence_reg); - } else { - I915_WRITE(fence_reg + 4, 0); - POSTING_READ(fence_reg + 4); - } -} - -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - int pitch_val; - int tile_width; - - WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", - i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); - - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) - tile_width = 128; - else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; - pitch_val = ffs(pitch_val) - 1; - - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I915_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - if (reg < 8) - reg = FENCE_REG_830_0 + reg * 4; - else - reg = FENCE_REG_945_8 + (reg - 8) * 4; - - I915_WRITE(reg, val); - POSTING_READ(reg); -} - -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val; - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint32_t pitch_val; - - WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", - i915_gem_obj_ggtt_offset(obj), size); - - pitch_val = obj->stride / 128; - pitch_val = ffs(pitch_val) - 1; - - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I830_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - I915_WRITE(FENCE_REG_830_0 + reg * 4, val); - POSTING_READ(FENCE_REG_830_0 + reg * 4); -} - -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) -{ - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; -} - -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - WARN(obj && (!obj->stride || !obj->tiling_mode), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); - - if (IS_GEN2(dev)) - i830_write_fence_reg(dev, reg, obj); - else if (IS_GEN3(dev)) - i915_write_fence_reg(dev, reg, obj); - else if (INTEL_INFO(dev)->gen >= 4) - i965_write_fence_reg(dev, reg, obj); - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. - */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} - -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; -} - -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int reg = fence_number(dev_priv, fence); - - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); - - if (enable) { - obj->fence_reg = reg; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); - } - obj->fence_dirty = false; -} - -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); - if (ret) - return ret; - - i915_gem_request_assign(&obj->last_fenced_req, NULL); - } - - return 0; -} - -int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct drm_i915_fence_reg *fence; - int ret; - - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - - if (obj->fence_reg == I915_FENCE_REG_NONE) - return 0; - - fence = &dev_priv->fence_regs[obj->fence_reg]; - - if (WARN_ON(fence->pin_count)) - return -EBUSY; - - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; -} - -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - goto deadlock; - - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) - continue; - - return reg; - } - -deadlock: - /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(dev)) - return ERR_PTR(-EAGAIN); - - return ERR_PTR(-EDEADLK); -} - -/** - * i915_gem_object_get_fence - set up fencing for an object - * @obj: object to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - */ -int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - bool enable = obj->tiling_mode != I915_TILING_NONE; - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); + ret = intel_ring_idle(ring); if (ret) return ret; } - /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - return 0; - } - } else if (enable) { - if (WARN_ON(!obj->map_and_fenceable)) - return -EINVAL; - - reg = i915_find_fence_reg(dev); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } - } else - return 0; - - i915_gem_object_update_fence(obj, reg, enable); - + WARN_ON(i915_verify_lists(dev)); return 0; } @@ -3673,9 +3356,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - unsigned long start = + u64 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - unsigned long end = + u64 end = flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3731,7 +3414,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, size, flags & PIN_MAPPABLE ? "mappable" : "total", @@ -3853,7 +3536,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_GTT); trace_i915_gem_object_change_domain(obj, obj->base.read_domains, @@ -3875,7 +3558,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); trace_i915_gem_object_change_domain(obj, obj->base.read_domains, @@ -3937,9 +3620,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->dirty = 1; } - if (write) - intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); - trace_i915_gem_object_change_domain(obj, old_read_domains, old_write_domain); @@ -4094,12 +3774,13 @@ int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request, const struct i915_ggtt_view *view) { u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_sync(obj, pipelined); + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); if (ret) return ret; @@ -4210,9 +3891,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - if (write) - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); - trace_i915_gem_object_change_domain(obj, old_read_domains, old_write_domain); @@ -4253,6 +3931,13 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (time_after_eq(request->emitted_jiffies, recent_enough)) break; + /* + * Note that the request might not have been submitted yet. + * In which case emitted_jiffies will be zero. + */ + if (!request->emitted_jiffies) + continue; + target = request; } reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); @@ -4423,32 +4108,6 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, --vma->pin_count; } -bool -i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); - - WARN_ON(!ggtt_vma || - dev_priv->fence_regs[obj->fence_reg].pin_count > - ggtt_vma->pin_count); - dev_priv->fence_regs[obj->fence_reg].pin_count++; - return true; - } else - return false; -} - -void -i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); - dev_priv->fence_regs[obj->fence_reg].pin_count--; - } -} - int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -4810,8 +4469,9 @@ err: return ret; } -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); @@ -4821,7 +4481,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) if (!HAS_L3_DPF(dev) || !remap_info) return 0; - ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); if (ret) return ret; @@ -4967,7 +4627,7 @@ i915_gem_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int ret, i; + int ret, i, j; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -5004,27 +4664,55 @@ i915_gem_init_hw(struct drm_device *dev) */ init_unused_rings(dev); + BUG_ON(!dev_priv->ring[RCS].default_context); + + ret = i915_ppgtt_init_hw(dev); + if (ret) { + DRM_ERROR("PPGTT enable HW failed %d\n", ret); + goto out; + } + + /* Need to do basic initialisation of all rings first: */ for_each_ring(ring, dev_priv, i) { ret = ring->init_hw(ring); if (ret) goto out; } - for (i = 0; i < NUM_L3_SLICES(dev); i++) - i915_gem_l3_remap(&dev_priv->ring[RCS], i); + /* Now it is safe to go back round and do everything else: */ + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_request *req; - ret = i915_ppgtt_init_hw(dev); - if (ret && ret != -EIO) { - DRM_ERROR("PPGTT enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); - } + WARN_ON(!ring->default_context); + + ret = i915_gem_request_alloc(ring, ring->default_context, &req); + if (ret) { + i915_gem_cleanup_ringbuffer(dev); + goto out; + } - ret = i915_gem_context_enable(dev_priv); - if (ret && ret != -EIO) { - DRM_ERROR("Context enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); + if (ring->id == RCS) { + for (j = 0; j < NUM_L3_SLICES(dev); j++) + i915_gem_l3_remap(req, j); + } - goto out; + ret = i915_ppgtt_init_ring(req); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); + i915_gem_request_cancel(req); + i915_gem_cleanup_ringbuffer(dev); + goto out; + } + + ret = i915_gem_context_enable(req); + if (ret && ret != -EIO) { + DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); + i915_gem_request_cancel(req); + i915_gem_cleanup_ringbuffer(dev); + goto out; + } + + i915_add_request_no_flush(req); } out: @@ -5111,6 +4799,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); + + if (i915.enable_execlists) + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. + */ + intel_gpu_reset(dev); } static void @@ -5388,3 +5084,42 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) return false; } +/* Allocate a new GEM object and fill it with the supplied data */ +struct drm_i915_gem_object * +i915_gem_object_create_from_data(struct drm_device *dev, + const void *data, size_t size) +{ + struct drm_i915_gem_object *obj; + struct sg_table *sg; + size_t bytes; + int ret; + + obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); + if (IS_ERR_OR_NULL(obj)) + return obj; + + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto fail; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto fail; + + i915_gem_object_pin_pages(obj); + sg = obj->pages; + bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); + i915_gem_object_unpin_pages(obj); + + if (WARN_ON(bytes != size)) { + DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); + ret = -EFAULT; + goto fail; + } + + return obj; + +fail: + drm_gem_object_unreference(&obj->base); + return ERR_PTR(ret); +} |