diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_gem.c')
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 326 |
1 files changed, 115 insertions, 211 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 445d3bab89e0..ab16164b5eda 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -29,6 +29,8 @@ #include <linux/sched/signal.h> #include <linux/dma-fence-array.h> +#include <drm/drm_exec.h> +#include <drm/drm_print.h> #include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" @@ -76,6 +78,9 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, u32 i; int ret = 0; + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n"); return -ENODEV; @@ -123,7 +128,7 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, goto err_delete_handle; } bo_state[i].handle = handle; - bo_state[i].paddr = vc4_bo->base.paddr; + bo_state[i].paddr = vc4_bo->base.dma_addr; bo_state[i].size = vc4_bo->base.base.size; } @@ -196,7 +201,7 @@ vc4_save_hang_state(struct drm_device *dev) continue; for (j = 0; j < exec[i]->bo_count; j++) { - bo = to_vc4_bo(&exec[i]->bo[j]->base); + bo = to_vc4_bo(exec[i]->bo[j]); /* Retain BOs just in case they were marked purgeable. * This prevents the BO from being purged before @@ -204,8 +209,8 @@ vc4_save_hang_state(struct drm_device *dev) */ WARN_ON(!refcount_read(&bo->usecnt)); refcount_inc(&bo->usecnt); - drm_gem_object_get(&exec[i]->bo[j]->base); - kernel_state->bo[k++] = &exec[i]->bo[j]->base; + drm_gem_object_get(exec[i]->bo[j]); + kernel_state->bo[k++] = exec[i]->bo[j]; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { @@ -322,7 +327,7 @@ vc4_reset_work(struct work_struct *work) static void vc4_hangcheck_elapsed(struct timer_list *t) { - struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer); + struct vc4_dev *vc4 = timer_container_of(vc4, t, hangcheck.timer); struct drm_device *dev = &vc4->base; uint32_t ct0ca, ct1ca; unsigned long irqflags; @@ -386,6 +391,9 @@ vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, unsigned long timeout_expire; DEFINE_WAIT(wait); + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; + if (vc4->finished_seqno >= seqno) return 0; @@ -468,6 +476,9 @@ vc4_submit_next_bin_job(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec; + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return; + again: exec = vc4_first_bin_job(vc4); if (!exec) @@ -485,6 +496,8 @@ again: * immediately move it to the to-be-rendered queue. */ if (exec->ct0ca != exec->ct0ea) { + trace_vc4_submit_cl(dev, false, exec->seqno, exec->ct0ca, + exec->ct0ea); submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); } else { struct vc4_exec_info *next; @@ -511,6 +524,9 @@ vc4_submit_next_render_job(struct drm_device *dev) if (!exec) return; + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return; + /* A previous RCL may have written to one of our textures, and * our full cache flush at bin time may have occurred before * that RCL completed. Flush the texture cache now, but not @@ -519,6 +535,7 @@ vc4_submit_next_render_job(struct drm_device *dev) */ vc4_flush_texture_caches(dev); + trace_vc4_submit_cl(dev, true, exec->seqno, exec->ct1ca, exec->ct1ea); submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); } @@ -528,52 +545,33 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) struct vc4_dev *vc4 = to_vc4_dev(dev); bool was_empty = list_empty(&vc4->render_job_list); + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return; + list_move_tail(&exec->head, &vc4->render_job_list); if (was_empty) vc4_submit_next_render_job(dev); } static void -vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) +vc4_attach_fences(struct vc4_exec_info *exec) { struct vc4_bo *bo; unsigned i; for (i = 0; i < exec->bo_count; i++) { - bo = to_vc4_bo(&exec->bo[i]->base); - bo->seqno = seqno; - - dma_resv_add_shared_fence(bo->base.base.resv, exec->fence); - } - - list_for_each_entry(bo, &exec->unref_list, unref_head) { - bo->seqno = seqno; + bo = to_vc4_bo(exec->bo[i]); + dma_resv_add_fence(bo->base.base.resv, exec->fence, + DMA_RESV_USAGE_READ); } for (i = 0; i < exec->rcl_write_bo_count; i++) { bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); - bo->write_seqno = seqno; - - dma_resv_add_excl_fence(bo->base.base.resv, exec->fence); + dma_resv_add_fence(bo->base.base.resv, exec->fence, + DMA_RESV_USAGE_WRITE); } } -static void -vc4_unlock_bo_reservations(struct drm_device *dev, - struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) -{ - int i; - - for (i = 0; i < exec->bo_count; i++) { - struct drm_gem_object *bo = &exec->bo[i]->base; - - dma_resv_unlock(bo->resv); - } - - ww_acquire_fini(acquire_ctx); -} - /* Takes the reservation lock on all the BOs being referenced, so that * at queue submit time we can update the reservations. * @@ -582,70 +580,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, * to vc4, so we don't attach dma-buf fences to them. */ static int -vc4_lock_bo_reservations(struct drm_device *dev, - struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) +vc4_lock_bo_reservations(struct vc4_exec_info *exec, + struct drm_exec *exec_ctx) { - int contended_lock = -1; - int i, ret; - struct drm_gem_object *bo; - - ww_acquire_init(acquire_ctx, &reservation_ww_class); - -retry: - if (contended_lock != -1) { - bo = &exec->bo[contended_lock]->base; - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); - if (ret) { - ww_acquire_done(acquire_ctx); - return ret; - } - } - - for (i = 0; i < exec->bo_count; i++) { - if (i == contended_lock) - continue; - - bo = &exec->bo[i]->base; - - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); - if (ret) { - int j; - - for (j = 0; j < i; j++) { - bo = &exec->bo[j]->base; - dma_resv_unlock(bo->resv); - } - - if (contended_lock != -1 && contended_lock >= i) { - bo = &exec->bo[contended_lock]->base; - - dma_resv_unlock(bo->resv); - } - - if (ret == -EDEADLK) { - contended_lock = i; - goto retry; - } - - ww_acquire_done(acquire_ctx); - return ret; - } - } - - ww_acquire_done(acquire_ctx); + int ret; /* Reserve space for our shared (read-only) fence references, * before we commit the CL to the hardware. */ - for (i = 0; i < exec->bo_count; i++) { - bo = &exec->bo[i]->base; + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); + drm_exec_until_all_locked(exec_ctx) { + ret = drm_exec_prepare_array(exec_ctx, exec->bo, + exec->bo_count, 1); + } - ret = dma_resv_reserve_shared(bo->resv, 1); - if (ret) { - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); - return ret; - } + if (ret) { + drm_exec_fini(exec_ctx); + return ret; } return 0; @@ -662,7 +613,7 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx, + struct drm_exec *exec_ctx, struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -689,9 +640,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, if (out_sync) drm_syncobj_replace_fence(out_sync, exec->fence); - vc4_update_bo_seqnos(exec, seqno); + vc4_attach_fences(exec); - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + drm_exec_fini(exec_ctx); list_add_tail(&exec->head, &vc4->bin_job_list); @@ -729,7 +680,6 @@ vc4_cl_lookup_bos(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; - uint32_t *handles; int ret = 0; int i; @@ -743,54 +693,18 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = kvmalloc_array(exec->bo_count, - sizeof(struct drm_gem_cma_object *), - GFP_KERNEL | __GFP_ZERO); - if (!exec->bo) { - DRM_ERROR("Failed to allocate validated BO pointers\n"); - return -ENOMEM; - } - - handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); - if (!handles) { - ret = -ENOMEM; - DRM_ERROR("Failed to allocate incoming GEM handles\n"); - goto fail; - } - - if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles), - exec->bo_count * sizeof(uint32_t))) { - ret = -EFAULT; - DRM_ERROR("Failed to copy in GEM handles\n"); - goto fail; - } - - spin_lock(&file_priv->table_lock); - for (i = 0; i < exec->bo_count; i++) { - struct drm_gem_object *bo = idr_find(&file_priv->object_idr, - handles[i]); - if (!bo) { - DRM_DEBUG("Failed to look up GEM BO %d: %d\n", - i, handles[i]); - ret = -EINVAL; - break; - } - - drm_gem_object_get(bo); - exec->bo[i] = (struct drm_gem_cma_object *)bo; - } - spin_unlock(&file_priv->table_lock); + ret = drm_gem_objects_lookup(file_priv, u64_to_user_ptr(args->bo_handles), + exec->bo_count, &exec->bo); if (ret) goto fail_put_bo; for (i = 0; i < exec->bo_count; i++) { - ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); + ret = vc4_bo_inc_usecnt(to_vc4_bo(exec->bo[i])); if (ret) goto fail_dec_usecnt; } - kvfree(handles); return 0; fail_dec_usecnt: @@ -803,15 +717,13 @@ fail_dec_usecnt: * step. */ for (i-- ; i >= 0; i--) - vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); + vc4_bo_dec_usecnt(to_vc4_bo(exec->bo[i])); fail_put_bo: /* Release any reference to acquired objects. */ for (i = 0; i < exec->bo_count && exec->bo[i]; i++) - drm_gem_object_put(&exec->bo[i]->base); + drm_gem_object_put(exec->bo[i]); -fail: - kvfree(handles); kvfree(exec->bo); exec->bo = NULL; return ret; @@ -854,8 +766,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) */ temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); if (!temp) { - DRM_ERROR("Failed to allocate storage for copying " - "in bin/render CLs.\n"); + drm_err(dev, "Failed to allocate storage for copying " + "in bin/render CLs.\n"); ret = -ENOMEM; goto fail; } @@ -888,7 +800,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL); if (IS_ERR(bo)) { - DRM_ERROR("Couldn't allocate BO for binning\n"); + drm_err(dev, "Couldn't allocate BO for binning\n"); ret = PTR_ERR(bo); goto fail; } @@ -897,16 +809,16 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, &exec->unref_list); - exec->ct0ca = exec->exec_bo->paddr + bin_offset; + exec->ct0ca = exec->exec_bo->dma_addr + bin_offset; exec->bin_u = bin; exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; - exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; + exec->shader_rec_p = exec->exec_bo->dma_addr + shader_rec_offset; exec->shader_rec_size = args->shader_rec_size; exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; - exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; + exec->uniforms_p = exec->exec_bo->dma_addr + uniforms_offset; exec->uniforms_size = args->uniforms_size; ret = vc4_validate_bin_cl(dev, @@ -926,12 +838,6 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } - /* Block waiting on any previous rendering into the CS's VBO, - * IB, or textures, so that pixels are actually written by the - * time we try to read them. - */ - ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); - fail: kvfree(temp); return ret; @@ -954,10 +860,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) if (exec->bo) { for (i = 0; i < exec->bo_count; i++) { - struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + struct vc4_bo *bo = to_vc4_bo(exec->bo[i]); vc4_bo_dec_usecnt(bo); - drm_gem_object_put(&exec->bo[i]->base); + drm_gem_object_put(exec->bo[i]); } kvfree(exec->bo); } @@ -990,7 +896,9 @@ void vc4_job_handle_completed(struct vc4_dev *vc4) { unsigned long irqflags; - struct vc4_seqno_cb *cb, *cb_temp; + + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return; spin_lock_irqsave(&vc4->job_lock, irqflags); while (!list_empty(&vc4->job_done_list)) { @@ -1004,45 +912,9 @@ vc4_job_handle_completed(struct vc4_dev *vc4) spin_lock_irqsave(&vc4->job_lock, irqflags); } - list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { - if (cb->seqno <= vc4->finished_seqno) { - list_del_init(&cb->work.entry); - schedule_work(&cb->work); - } - } - spin_unlock_irqrestore(&vc4->job_lock, irqflags); } -static void vc4_seqno_cb_work(struct work_struct *work) -{ - struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); - - cb->func(cb); -} - -int vc4_queue_seqno_cb(struct drm_device *dev, - struct vc4_seqno_cb *cb, uint64_t seqno, - void (*func)(struct vc4_seqno_cb *cb)) -{ - struct vc4_dev *vc4 = to_vc4_dev(dev); - unsigned long irqflags; - - cb->func = func; - INIT_WORK(&cb->work, vc4_seqno_cb_work); - - spin_lock_irqsave(&vc4->job_lock, irqflags); - if (seqno > vc4->finished_seqno) { - cb->seqno = seqno; - list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); - } else { - schedule_work(&cb->work); - } - spin_unlock_irqrestore(&vc4->job_lock, irqflags); - - return 0; -} - /* Scheduled when any job has been completed, this walks the list of * jobs that had completed and unrefs their BOs and frees their exec * structs. @@ -1078,8 +950,12 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_wait_seqno *args = data; + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; + return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, &args->timeout_ns); } @@ -1088,25 +964,32 @@ int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; struct drm_vc4_wait_bo *args = data; - struct drm_gem_object *gem_obj; - struct vc4_bo *bo; + unsigned long timeout_jiffies = + usecs_to_jiffies(div_u64(args->timeout_ns, 1000)); + ktime_t start = ktime_get(); + u64 delta_ns; + + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; if (args->pad != 0) return -EINVAL; - gem_obj = drm_gem_object_lookup(file_priv, args->handle); - if (!gem_obj) { - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); - return -EINVAL; - } - bo = to_vc4_bo(gem_obj); + ret = drm_gem_dma_resv_wait(file_priv, args->handle, + true, timeout_jiffies); - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, - &args->timeout_ns); + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; - drm_gem_object_put(gem_obj); return ret; } @@ -1131,10 +1014,17 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_vc4_submit_cl *args = data; struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; - struct ww_acquire_ctx acquire_ctx; + struct drm_exec exec_ctx; struct dma_fence *in_fence; int ret = 0; + trace_vc4_submit_cl_ioctl(dev, args->bin_cl_size, + args->shader_rec_size, + args->bo_handle_count); + + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n"); return -ENODEV; @@ -1154,10 +1044,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); - if (!exec) { - DRM_ERROR("malloc failure on exec struct\n"); + if (!exec) return -ENOMEM; - } + + exec->dev = vc4; ret = vc4_v3d_pm_get(vc4); if (ret) { @@ -1217,7 +1107,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); + ret = vc4_lock_bo_reservations(exec, &exec_ctx); if (ret) goto fail; @@ -1225,7 +1115,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, out_sync = drm_syncobj_find(file_priv, args->out_sync); if (!out_sync) { ret = -EINVAL; - goto fail; + goto fail_unreserve; } /* We replace the fence in out_sync in vc4_queue_submit since @@ -1240,7 +1130,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); /* The syncobj isn't part of the exec data and we need to free our * reference even if job submission failed. @@ -1249,13 +1139,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, drm_syncobj_put(out_sync); if (ret) - goto fail; + goto fail_unreserve; /* Return the seqno for our job. */ args->seqno = vc4->emit_seqno; return 0; +fail_unreserve: + drm_exec_fini(&exec_ctx); fail: vc4_complete_exec(&vc4->base, exec); @@ -1266,13 +1158,16 @@ static void vc4_gem_destroy(struct drm_device *dev, void *unused); int vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + int ret; + + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; vc4->dma_fence_context = dma_fence_context_alloc(1); INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); - INIT_LIST_HEAD(&vc4->seqno_cb_list); spin_lock_init(&vc4->job_lock); INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); @@ -1280,10 +1175,15 @@ int vc4_gem_init(struct drm_device *dev) INIT_WORK(&vc4->job_done_work, vc4_job_done_work); - mutex_init(&vc4->power_lock); + ret = drmm_mutex_init(dev, &vc4->power_lock); + if (ret) + return ret; INIT_LIST_HEAD(&vc4->purgeable.list); - mutex_init(&vc4->purgeable.lock); + + ret = drmm_mutex_init(dev, &vc4->purgeable.lock); + if (ret) + return ret; return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL); } @@ -1312,11 +1212,15 @@ static void vc4_gem_destroy(struct drm_device *dev, void *unused) int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_gem_madvise *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; int ret; + if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) + return -ENODEV; + switch (args->madv) { case VC4_MADV_DONTNEED: case VC4_MADV_WILLNEED: |
