summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/ttm
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2019-11-11 14:42:13 +0100
committerChristian König <christian.koenig@amd.com>2020-02-12 13:03:27 +0100
commit1ec39923ef24f55eaa12cbdbdd9bb7e4ba29f6a7 (patch)
treedb1e8f34b59de4896c52a252fd785cdef0510907 /drivers/gpu/drm/ttm
parentb1df3a2b24a917f8853d43fe9683c0e360d2c33a (diff)
drm/ttm: rework BO delayed delete. v2
This patch reworks the whole delayed deletion of BOs which aren't idle. Instead of having two counters for the BO structure we resurrect the BO when we find that a deleted BO is not idle yet. This has many advantages, especially that we don't need to increment/decrement the BOs reference counter any more when it moves on the LRUs. v2: remove duplicate ttm_tt_destroy, fix holde lock for LRU move Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: xinhui pan <xinhui.pan@amd.com> Link: https://patchwork.freedesktop.org/patch/352912/
Diffstat (limited to 'drivers/gpu/drm/ttm')
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c217
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c1
2 files changed, 94 insertions, 124 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e12fc2c2d165..28fd91ca6806 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type)
return 1 << (type);
}
-static void ttm_bo_release_list(struct kref *list_kref)
-{
- struct ttm_buffer_object *bo =
- container_of(list_kref, struct ttm_buffer_object, list_kref);
- size_t acc_size = bo->acc_size;
-
- BUG_ON(kref_read(&bo->list_kref));
- BUG_ON(kref_read(&bo->kref));
- BUG_ON(bo->mem.mm_node != NULL);
- BUG_ON(!list_empty(&bo->lru));
- BUG_ON(!list_empty(&bo->ddestroy));
- ttm_tt_destroy(bo->ttm);
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
- dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
-}
-
static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem)
{
@@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type];
list_add_tail(&bo->lru, &man->lru[bo->priority]);
- kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
!(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))) {
list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
- kref_get(&bo->list_kref);
}
}
-static void ttm_bo_ref_bug(struct kref *list_kref)
-{
- BUG();
-}
-
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
@@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) {
list_del_init(&bo->swap);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
if (!list_empty(&bo->lru)) {
list_del_init(&bo->lru);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
- dma_resv_unlock(&bo->base._resv);
+ dma_resv_unlock(&bo->base._resv);
return r;
}
@@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
rcu_read_unlock();
}
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
-{
- struct ttm_bo_device *bdev = bo->bdev;
- int ret;
-
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
- /* Last resort, if we fail to allocate memory for the
- * fences block for the BO to become idle
- */
- dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
- 30 * HZ);
- spin_lock(&ttm_bo_glob.lru_lock);
- goto error;
- }
-
- spin_lock(&ttm_bo_glob.lru_lock);
- ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
- if (!ret) {
- if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
- ttm_bo_del_from_lru(bo);
- spin_unlock(&ttm_bo_glob.lru_lock);
- if (bo->base.resv != &bo->base._resv)
- dma_resv_unlock(&bo->base._resv);
-
- ttm_bo_cleanup_memtype_use(bo);
- dma_resv_unlock(bo->base.resv);
- return;
- }
-
- ttm_bo_flush_all_fences(bo);
-
- /*
- * Make NO_EVICT bos immediately available to
- * shrinkers, now that they are queued for
- * destruction.
- */
- if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
- bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
- ttm_bo_move_to_lru_tail(bo, NULL);
- }
-
- dma_resv_unlock(bo->base.resv);
- }
- if (bo->base.resv != &bo->base._resv) {
- ttm_bo_flush_all_fences(bo);
- dma_resv_unlock(&bo->base._resv);
- }
-
-error:
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
-
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
-}
-
/**
* function ttm_bo_cleanup_refs
- * If bo idle, remove from delayed- and lru lists, and unref.
- * If not idle, do nothing.
+ * If bo idle, remove from lru lists, and unref.
+ * If not idle, block if possible.
*
* Must be called with lru_lock and reservation held, this function
* will drop the lru lock and optionally the reservation lock before returning.
@@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
-
spin_unlock(&ttm_bo_glob.lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
+ ttm_bo_put(bo);
+
return 0;
}
@@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
ddestroy);
- kref_get(&bo->list_kref);
list_move_tail(&bo->ddestroy, &removed);
+ if (!ttm_bo_get_unless_zero(bo))
+ continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
@@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
spin_unlock(&glob->lru_lock);
}
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
@@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref)
container_of(kref, struct ttm_buffer_object, kref);
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+ size_t acc_size = bo->acc_size;
+ int ret;
- if (bo->bdev->driver->release_notify)
- bo->bdev->driver->release_notify(bo);
+ if (!bo->deleted) {
+ if (bo->bdev->driver->release_notify)
+ bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
- ttm_mem_io_lock(man, false);
- ttm_mem_io_free_vm(bo);
- ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
+ ttm_mem_io_lock(man, false);
+ ttm_mem_io_free_vm(bo);
+ ttm_mem_io_unlock(man);
+
+ ret = ttm_bo_individualize_resv(bo);
+ if (ret) {
+ /* Last resort, if we fail to allocate memory for the
+ * fences block for the BO to become idle
+ */
+ dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
+ 30 * HZ);
+ }
+ }
+
+ if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
+ /* The BO is not idle, resurrect it for delayed destroy */
+ ttm_bo_flush_all_fences(bo);
+ bo->deleted = true;
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+
+ /*
+ * Make NO_EVICT bos immediately available to
+ * shrinkers, now that they are queued for
+ * destruction.
+ */
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
+ bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
+ ttm_bo_move_to_lru_tail(bo, NULL);
+ }
+
+ kref_init(&bo->kref);
+ list_add_tail(&bo->ddestroy, &bdev->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ schedule_delayed_work(&bdev->wq,
+ ((HZ / 100) < 1) ? 1 : HZ / 100);
+ return;
+ }
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+ ttm_bo_del_from_lru(bo);
+ list_del(&bo->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ ttm_bo_cleanup_memtype_use(bo);
+
+ BUG_ON(bo->mem.mm_node != NULL);
+ atomic_dec(&ttm_bo_glob.bo_count);
+ dma_fence_put(bo->moving);
+ if (!ttm_bo_uses_embedded_gem_object(bo))
+ dma_resv_fini(&bo->base._resv);
+ bo->destroy(bo);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
}
void ttm_bo_put(struct ttm_buffer_object *bo)
@@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) {
dma_resv_assert_held(bo->base.resv);
- if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
- || !list_empty(&bo->ddestroy))
+ if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
ret = true;
*locked = false;
if (busy)
@@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
dma_resv_unlock(bo->base.resv);
continue;
}
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
+ }
break;
}
@@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
}
if (!bo) {
- if (busy_bo)
- kref_get(&busy_bo->list_kref);
+ if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
+ busy_bo = NULL;
spin_unlock(&ttm_bo_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
- kref_put(&busy_bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(busy_bo);
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
ctx->no_wait_gpu, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
if (locked)
ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref);
- kref_init(&bo->list_kref);
INIT_LIST_HEAD(&bo->lru);
INIT_LIST_HEAD(&bo->ddestroy);
INIT_LIST_HEAD(&bo->swap);
@@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL)) {
- ret = 0;
- break;
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+ NULL))
+ continue;
+
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
}
+
+ ret = 0;
+ break;
}
if (!ret)
break;
@@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, false, false, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1877,7 +1848,7 @@ out:
*/
if (locked)
dma_resv_unlock(bo->base.resv);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
EXPORT_SYMBOL(ttm_bo_swapout);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 86d152472f38..c8e359ded1df 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref);
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;