summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/panthor
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2024-06-19 11:38:31 +0300
committerJani Nikula <jani.nikula@intel.com>2024-06-19 11:38:31 +0300
commitd754ed2821fd9675d203cb73c4afcd593e28b7d0 (patch)
treecd16683cd956a7c334d7e1b3baf02e2e7baa729c /drivers/gpu/drm/panthor
parentdcaacff03a9fa2838f936e1009b4b7ad56807152 (diff)
parent1ddaaa244021aba8496536a6627b4ad2bc0f936a (diff)
Merge drm/drm-next into drm-intel-next
Sync to v6.10-rc3. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/panthor')
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.c8
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.h1
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.c12
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.c8
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.h8
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.c36
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c95
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.h2
8 files changed, 117 insertions, 53 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index 75276cbeba20..4082c8f2951d 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct *work)
panthor_gpu_l2_power_on(ptdev);
panthor_mmu_post_reset(ptdev);
ret = panthor_fw_post_reset(ptdev);
- if (ret)
- goto out_dev_exit;
-
atomic_set(&ptdev->reset.pending, 0);
- panthor_sched_post_reset(ptdev);
-
-out_dev_exit:
+ panthor_sched_post_reset(ptdev, ret != 0);
drm_dev_exit(cookie);
if (ret) {
@@ -293,6 +288,7 @@ static const struct panthor_exception_info panthor_exception_infos[] = {
PANTHOR_EXCEPTION(ACTIVE),
PANTHOR_EXCEPTION(CS_RES_TERM),
PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
+ PANTHOR_EXCEPTION(CS_UNRECOVERABLE),
PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
PANTHOR_EXCEPTION(CS_BUS_FAULT),
PANTHOR_EXCEPTION(CS_INSTR_INVALID),
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 2fdd671b38fd..e388c0472ba7 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -216,6 +216,7 @@ enum drm_panthor_exception_type {
DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
+ DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index 181395e2859a..857f3f11258a 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -453,7 +453,7 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
ret = panthor_kernel_bo_vmap(mem);
if (ret) {
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
+ panthor_kernel_bo_destroy(mem);
return ERR_PTR(ret);
}
@@ -1083,10 +1083,11 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
if (!ret)
goto out;
- /* Force a disable, so we get a fresh boot on the next
- * panthor_fw_start() call.
+ /* Forcibly reset the MCU and force a slow reset, so we get a
+ * fresh boot on the next panthor_fw_start() call.
*/
- gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+ panthor_fw_stop(ptdev);
+ ptdev->fw->fast_reset = false;
drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
}
@@ -1133,7 +1134,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
panthor_fw_stop(ptdev);
list_for_each_entry(section, &ptdev->fw->sections, node)
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
+ panthor_kernel_bo_destroy(section->mem);
/* We intentionally don't call panthor_vm_idle() and let
* panthor_mmu_unplug() release the AS we acquired with
@@ -1141,6 +1142,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
* state to keep the active_refcnt balanced.
*/
panthor_vm_put(ptdev->fw->vm);
+ ptdev->fw->vm = NULL;
panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
}
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index d6483266d0c2..38f560864879 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -26,18 +26,18 @@ static void panthor_gem_free_object(struct drm_gem_object *obj)
/**
* panthor_kernel_bo_destroy() - Destroy a kernel buffer object
- * @vm: The VM this BO was mapped to.
* @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
* is skipped.
*/
-void panthor_kernel_bo_destroy(struct panthor_vm *vm,
- struct panthor_kernel_bo *bo)
+void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
{
+ struct panthor_vm *vm;
int ret;
if (IS_ERR_OR_NULL(bo))
return;
+ vm = bo->vm;
panthor_kernel_bo_vunmap(bo);
if (drm_WARN_ON(bo->obj->dev,
@@ -53,6 +53,7 @@ void panthor_kernel_bo_destroy(struct panthor_vm *vm,
drm_gem_object_put(bo->obj);
out_free_bo:
+ panthor_vm_put(vm);
kfree(bo);
}
@@ -106,6 +107,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
if (ret)
goto err_free_va;
+ kbo->vm = panthor_vm_get(vm);
bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
drm_gem_object_get(bo->exclusive_vm_root_gem);
bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
index 3bccba394d00..e43021cf6d45 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.h
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -62,6 +62,11 @@ struct panthor_kernel_bo {
struct drm_gem_object *obj;
/**
+ * @vm: VM this private buffer is attached to.
+ */
+ struct panthor_vm *vm;
+
+ /**
* @va_node: VA space allocated to this GEM.
*/
struct drm_mm_node va_node;
@@ -136,7 +141,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
size_t size, u32 bo_flags, u32 vm_map_flags,
u64 gpu_va);
-void panthor_kernel_bo_destroy(struct panthor_vm *vm,
- struct panthor_kernel_bo *bo);
+void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo);
#endif /* __PANTHOR_GEM_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index 143fa35f2e74..3796a9eb22af 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -127,7 +127,7 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm,
heap->chunk_count--;
mutex_unlock(&heap->lock);
- panthor_kernel_bo_destroy(vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
}
@@ -183,7 +183,7 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
return 0;
err_destroy_bo:
- panthor_kernel_bo_destroy(vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
err_free_chunk:
kfree(chunk);
@@ -253,8 +253,8 @@ int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
* @pool: Pool to instantiate the heap context from.
* @initial_chunk_count: Number of chunk allocated at initialization time.
* Must be at least 1.
- * @chunk_size: The size of each chunk. Must be a power of two between 256k
- * and 2M.
+ * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
+ * [128k:8M] range.
* @max_chunks: Maximum number of chunks that can be allocated.
* @target_in_flight: Maximum number of in-flight render passes.
* @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
@@ -281,8 +281,11 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
if (initial_chunk_count == 0)
return -EINVAL;
- if (hweight32(chunk_size) != 1 ||
- chunk_size < SZ_256K || chunk_size > SZ_2M)
+ if (initial_chunk_count > max_chunks)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
+ chunk_size < SZ_128K || chunk_size > SZ_8M)
return -EINVAL;
down_read(&pool->lock);
@@ -320,7 +323,8 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
if (!pool->vm) {
ret = -EINVAL;
} else {
- ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
+ ret = xa_alloc(&pool->xa, &id, heap,
+ XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
if (!ret) {
void *gpu_ctx = panthor_get_heap_ctx(pool, id);
@@ -391,7 +395,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
mutex_unlock(&heap->lock);
if (removed) {
- panthor_kernel_bo_destroy(pool->vm, chunk->bo);
+ panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
ret = 0;
} else {
@@ -410,6 +414,13 @@ out_unlock:
* @renderpasses_in_flight: Number of render passes currently in-flight.
* @pending_frag_count: Number of fragment jobs waiting for execution/completion.
* @new_chunk_gpu_va: Pointer used to return the chunk VA.
+ *
+ * Return:
+ * - 0 if a new heap was allocated
+ * - -ENOMEM if the tiler context reached the maximum number of chunks
+ * or if too many render passes are in-flight
+ * or if the allocation failed
+ * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
*/
int panthor_heap_grow(struct panthor_heap_pool *pool,
u64 heap_gpu_va,
@@ -439,10 +450,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
* handler provided by the userspace driver, if any).
*/
if (renderpasses_in_flight > heap->target_in_flight ||
- (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
- ret = -EBUSY;
- goto out_unlock;
- } else if (heap->chunk_count >= heap->max_chunks) {
+ heap->chunk_count >= heap->max_chunks) {
ret = -ENOMEM;
goto out_unlock;
}
@@ -536,7 +544,7 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
pool->vm = vm;
pool->ptdev = ptdev;
init_rwsem(&pool->lock);
- xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
kref_init(&pool->refcount);
pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
@@ -587,7 +595,7 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
if (!IS_ERR_OR_NULL(pool->gpu_contexts))
- panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
+ panthor_kernel_bo_destroy(pool->gpu_contexts);
/* Reflects the fact the pool has been destroyed. */
pool->vm = NULL;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index b3a51a6de523..79ffcbc41d78 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -490,6 +490,18 @@ enum panthor_group_state {
* Can no longer be scheduled. The only allowed action is a destruction.
*/
PANTHOR_CS_GROUP_TERMINATED,
+
+ /**
+ * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
+ *
+ * The FW returned an inconsistent state. The group is flagged unusable
+ * and can no longer be scheduled. The only allowed action is a
+ * destruction.
+ *
+ * When that happens, we also schedule a FW reset, to start from a fresh
+ * state.
+ */
+ PANTHOR_CS_GROUP_UNKNOWN_STATE,
};
/**
@@ -814,8 +826,8 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_queue_put_syncwait_obj(queue);
- panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
- panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
+ panthor_kernel_bo_destroy(queue->ringbuf);
+ panthor_kernel_bo_destroy(queue->iface.mem);
kfree(queue);
}
@@ -825,15 +837,14 @@ static void group_release_work(struct work_struct *work)
struct panthor_group *group = container_of(work,
struct panthor_group,
release_work);
- struct panthor_device *ptdev = group->ptdev;
u32 i;
for (i = 0; i < group->queue_count; i++)
group_free_queue(group, group->queues[i]);
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
- panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
- panthor_kernel_bo_destroy(group->vm, group->syncobjs);
+ panthor_kernel_bo_destroy(group->suspend_buf);
+ panthor_kernel_bo_destroy(group->protm_suspend_buf);
+ panthor_kernel_bo_destroy(group->syncobjs);
panthor_vm_put(group->vm);
kfree(group);
@@ -1127,6 +1138,7 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
struct panthor_fw_csg_iface *csg_iface;
struct panthor_group *group;
enum panthor_group_state new_state, old_state;
+ u32 csg_state;
lockdep_assert_held(&ptdev->scheduler->lock);
@@ -1137,7 +1149,8 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
return;
old_state = group->state;
- switch (csg_iface->output->ack & CSG_STATE_MASK) {
+ csg_state = csg_iface->output->ack & CSG_STATE_MASK;
+ switch (csg_state) {
case CSG_STATE_START:
case CSG_STATE_RESUME:
new_state = PANTHOR_CS_GROUP_ACTIVE;
@@ -1148,11 +1161,28 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
case CSG_STATE_SUSPEND:
new_state = PANTHOR_CS_GROUP_SUSPENDED;
break;
+ default:
+ /* The unknown state might be caused by a FW state corruption,
+ * which means the group metadata can't be trusted anymore, and
+ * the SUSPEND operation might propagate the corruption to the
+ * suspend buffers. Flag the group state as unknown to make
+ * sure it's unusable after that point.
+ */
+ drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
+ csg_id, csg_state);
+ new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
+ break;
}
if (old_state == new_state)
return;
+ /* The unknown state might be caused by a FW issue, reset the FW to
+ * take a fresh start.
+ */
+ if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
+ panthor_device_schedule_reset(ptdev);
+
if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
csg_slot_sync_queues_state_locked(ptdev, csg_id);
@@ -1250,7 +1280,16 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
if (group)
group->fatal_queues |= BIT(cs_id);
- sched_queue_delayed_work(sched, tick, 0);
+ if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
+ /* If this exception is unrecoverable, queue a reset, and make
+ * sure we stop scheduling groups until the reset has happened.
+ */
+ panthor_device_schedule_reset(ptdev);
+ cancel_delayed_work(&sched->tick_work);
+ } else {
+ sched_queue_delayed_work(sched, tick, 0);
+ }
+
drm_warn(&ptdev->base,
"CSG slot %d CS slot: %d\n"
"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
@@ -1354,7 +1393,12 @@ static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
pending_frag_count, &new_chunk_va);
}
- if (ret && ret != -EBUSY) {
+ /* If the heap context doesn't have memory for us, we want to let the
+ * FW try to reclaim memory by waiting for fragment jobs to land or by
+ * executing the tiler OOM exception handler, which is supposed to
+ * implement incremental rendering.
+ */
+ if (ret && ret != -ENOMEM) {
drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
group->fatal_queues |= BIT(cs_id);
sched_queue_delayed_work(sched, tick, 0);
@@ -1783,6 +1827,7 @@ static bool
group_can_run(struct panthor_group *group)
{
return group->state != PANTHOR_CS_GROUP_TERMINATED &&
+ group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
!group->destroyed && group->fatal_queues == 0 &&
!group->timedout;
}
@@ -2546,8 +2591,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slots_upd_ctx upd_ctx;
- u32 suspended_slots, faulty_slots;
struct panthor_group *group;
+ u32 suspended_slots;
u32 i;
mutex_lock(&sched->lock);
@@ -2557,7 +2602,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
if (csg_slot->group) {
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
- CSG_STATE_SUSPEND,
+ group_can_run(csg_slot->group) ?
+ CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
CSG_STATE_MASK);
}
}
@@ -2566,10 +2612,9 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
suspended_slots &= ~upd_ctx.timedout_mask;
- faulty_slots = upd_ctx.timedout_mask;
- if (faulty_slots) {
- u32 slot_mask = faulty_slots;
+ if (upd_ctx.timedout_mask) {
+ u32 slot_mask = upd_ctx.timedout_mask;
drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
csgs_upd_ctx_init(&upd_ctx);
@@ -2620,9 +2665,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
slot_mask &= ~BIT(csg_id);
}
-
- if (flush_caches_failed)
- faulty_slots |= suspended_slots;
}
for (i = 0; i < sched->csg_slot_count; i++) {
@@ -2691,15 +2733,22 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev)
mutex_unlock(&sched->reset.lock);
}
-void panthor_sched_post_reset(struct panthor_device *ptdev)
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group, *group_tmp;
mutex_lock(&sched->reset.lock);
- list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
+ list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
+ /* Consider all previously running group as terminated if the
+ * reset failed.
+ */
+ if (reset_failed)
+ group->state = PANTHOR_CS_GROUP_TERMINATED;
+
panthor_group_start(group);
+ }
/* We're done resetting the GPU, clear the reset.in_progress bit so we can
* kick the scheduler.
@@ -2707,9 +2756,11 @@ void panthor_sched_post_reset(struct panthor_device *ptdev)
atomic_set(&sched->reset.in_progress, false);
mutex_unlock(&sched->reset.lock);
- sched_queue_delayed_work(sched, tick, 0);
-
- sched_queue_work(sched, sync_upd);
+ /* No need to queue a tick and update syncs if the reset failed. */
+ if (!reset_failed) {
+ sched_queue_delayed_work(sched, tick, 0);
+ sched_queue_work(sched, sync_upd);
+ }
}
static void group_sync_upd_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 66438b1f331f..3a30d2328b30 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile);
int panthor_sched_init(struct panthor_device *ptdev);
void panthor_sched_unplug(struct panthor_device *ptdev);
void panthor_sched_pre_reset(struct panthor_device *ptdev);
-void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed);
void panthor_sched_suspend(struct panthor_device *ptdev);
void panthor_sched_resume(struct panthor_device *ptdev);