summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/panthor/panthor_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/panthor/panthor_mmu.c')
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c459
1 files changed, 259 insertions, 200 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index fa0a002b1016..d4839d282689 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -7,6 +7,7 @@
#include <drm/drm_exec.h>
#include <drm/drm_gpuvm.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
@@ -29,6 +30,7 @@
#include "panthor_device.h"
#include "panthor_gem.h"
+#include "panthor_gpu.h"
#include "panthor_heap.h"
#include "panthor_mmu.h"
#include "panthor_regs.h"
@@ -53,26 +55,27 @@ struct panthor_mmu {
/** @irq: The MMU irq. */
struct panthor_irq irq;
- /** @as: Address space related fields.
+ /**
+ * @as: Address space related fields.
*
* The GPU has a limited number of address spaces (AS) slots, forcing
* us to re-assign them to re-assign slots on-demand.
*/
struct {
- /** @slots_lock: Lock protecting access to all other AS fields. */
+ /** @as.slots_lock: Lock protecting access to all other AS fields. */
struct mutex slots_lock;
- /** @alloc_mask: Bitmask encoding the allocated slots. */
+ /** @as.alloc_mask: Bitmask encoding the allocated slots. */
unsigned long alloc_mask;
- /** @faulty_mask: Bitmask encoding the faulty slots. */
+ /** @as.faulty_mask: Bitmask encoding the faulty slots. */
unsigned long faulty_mask;
- /** @slots: VMs currently bound to the AS slots. */
+ /** @as.slots: VMs currently bound to the AS slots. */
struct panthor_as_slot slots[MAX_AS_SLOTS];
/**
- * @lru_list: List of least recently used VMs.
+ * @as.lru_list: List of least recently used VMs.
*
* We use this list to pick a VM to evict when all slots are
* used.
@@ -87,16 +90,16 @@ struct panthor_mmu {
/** @vm: VMs management fields */
struct {
- /** @lock: Lock protecting access to list. */
+ /** @vm.lock: Lock protecting access to list. */
struct mutex lock;
- /** @list: List containing all VMs. */
+ /** @vm.list: List containing all VMs. */
struct list_head list;
- /** @reset_in_progress: True if a reset is in progress. */
+ /** @vm.reset_in_progress: True if a reset is in progress. */
bool reset_in_progress;
- /** @wq: Workqueue used for the VM_BIND queues. */
+ /** @vm.wq: Workqueue used for the VM_BIND queues. */
struct workqueue_struct *wq;
} vm;
};
@@ -143,14 +146,14 @@ struct panthor_vma {
struct panthor_vm_op_ctx {
/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
struct {
- /** @count: Number of pages reserved. */
+ /** @rsvd_page_tables.count: Number of pages reserved. */
u32 count;
- /** @ptr: Point to the first unused page in the @pages table. */
+ /** @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. */
u32 ptr;
/**
- * @page: Array of pages that can be used for an MMU page table update.
+ * @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update.
*
* After an VM operation, there might be free pages left in this array.
* They should be returned to the pt_cache as part of the op_ctx cleanup.
@@ -172,37 +175,23 @@ struct panthor_vm_op_ctx {
/** @va: Virtual range targeted by the VM operation. */
struct {
- /** @addr: Start address. */
+ /** @va.addr: Start address. */
u64 addr;
- /** @range: Range size. */
+ /** @va.range: Range size. */
u64 range;
} va;
- /**
- * @returned_vmas: List of panthor_vma objects returned after a VM operation.
- *
- * For unmap operations, this will contain all VMAs that were covered by the
- * specified VA range.
- *
- * For map operations, this will contain all VMAs that previously mapped to
- * the specified VA range.
- *
- * Those VMAs, and the resources they point to will be released as part of
- * the op_ctx cleanup operation.
- */
- struct list_head returned_vmas;
-
/** @map: Fields specific to a map operation. */
struct {
- /** @vm_bo: Buffer object to map. */
+ /** @map.vm_bo: Buffer object to map. */
struct drm_gpuvm_bo *vm_bo;
- /** @bo_offset: Offset in the buffer object. */
+ /** @map.bo_offset: Offset in the buffer object. */
u64 bo_offset;
/**
- * @sgt: sg-table pointing to pages backing the GEM object.
+ * @map.sgt: sg-table pointing to pages backing the GEM object.
*
* This is gathered at job creation time, such that we don't have
* to allocate in ::run_job().
@@ -210,7 +199,7 @@ struct panthor_vm_op_ctx {
struct sg_table *sgt;
/**
- * @new_vma: The new VMA object that will be inserted to the VA tree.
+ * @map.new_vma: The new VMA object that will be inserted to the VA tree.
*/
struct panthor_vma *new_vma;
} map;
@@ -304,27 +293,27 @@ struct panthor_vm {
/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
struct {
- /** @start: Start of the automatic VA-range for kernel BOs. */
+ /** @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. */
u64 start;
- /** @size: Size of the automatic VA-range for kernel BOs. */
+ /** @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. */
u64 end;
} kernel_auto_va;
/** @as: Address space related fields. */
struct {
/**
- * @id: ID of the address space this VM is bound to.
+ * @as.id: ID of the address space this VM is bound to.
*
* A value of -1 means the VM is inactive/not bound.
*/
int id;
- /** @active_cnt: Number of active users of this VM. */
+ /** @as.active_cnt: Number of active users of this VM. */
refcount_t active_cnt;
/**
- * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
+ * @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
*
* Active VMs should not be inserted in the LRU list.
*/
@@ -336,13 +325,13 @@ struct panthor_vm {
*/
struct {
/**
- * @pool: The heap pool attached to this VM.
+ * @heaps.pool: The heap pool attached to this VM.
*
* Will stay NULL until someone creates a heap context on this VM.
*/
struct panthor_heap_pool *pool;
- /** @lock: Lock used to protect access to @pool. */
+ /** @heaps.lock: Lock used to protect access to @pool. */
struct mutex lock;
} heaps;
@@ -408,7 +397,7 @@ struct panthor_vm_bind_job {
struct panthor_vm_op_ctx ctx;
};
-/**
+/*
* @pt_cache: Cache used to allocate MMU page tables.
*
* The pre-allocation pattern forces us to over-allocate to plan for
@@ -478,7 +467,7 @@ static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
}
/**
- * @free_pt() - Custom page table free function
+ * free_pt() - Custom page table free function
* @cookie: Cookie passed at page table allocation time.
* @data: Page table to free.
* @size: Size of the page table. This size should be fixed,
@@ -509,9 +498,9 @@ static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
/* Wait for the MMU status to indicate there is no active command, in
* case one is pending.
*/
- ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr),
- val, !(val & AS_STATUS_AS_ACTIVE),
- 10, 100000);
+ ret = gpu_read_relaxed_poll_timeout_atomic(ptdev, AS_STATUS(as_nr), val,
+ !(val & AS_STATUS_AS_ACTIVE),
+ 10, 100000);
if (ret) {
panthor_device_schedule_reset(ptdev);
@@ -563,26 +552,58 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr,
region = region_width | region_start;
/* Lock the region that needs to be updated */
- gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
- gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
+ gpu_write64(ptdev, AS_LOCKADDR(as_nr), region);
write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
}
static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
u64 iova, u64 size, u32 op)
{
+ const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV;
+ u32 lsc_flush_op;
+ int ret;
+
lockdep_assert_held(&ptdev->mmu->as.slots_lock);
+ switch (op) {
+ case AS_COMMAND_FLUSH_MEM:
+ lsc_flush_op = CACHE_CLEAN | CACHE_INV;
+ break;
+ case AS_COMMAND_FLUSH_PT:
+ lsc_flush_op = 0;
+ break;
+ default:
+ drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op);
+ return -EINVAL;
+ }
+
if (as_nr < 0)
return 0;
- if (op != AS_COMMAND_UNLOCK)
- lock_region(ptdev, as_nr, iova, size);
+ /*
+ * If the AS number is greater than zero, then we can be sure
+ * the device is up and running, so we don't need to explicitly
+ * power it up
+ */
+
+ lock_region(ptdev, as_nr, iova, size);
+
+ ret = wait_ready(ptdev, as_nr);
+ if (ret)
+ return ret;
- /* Run the MMU operation */
- write_cmd(ptdev, as_nr, op);
+ ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0);
+ if (ret)
+ return ret;
- /* Wait for the flush to complete */
+ /*
+ * Explicitly unlock the region as the AS is not unlocked automatically
+ * at the end of the GPU_CONTROL cache flush command, unlike
+ * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT.
+ */
+ write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK);
+
+ /* Wait for the unlock command to complete */
return wait_ready(ptdev, as_nr);
}
@@ -608,14 +629,9 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
if (ret)
return ret;
- gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
- gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
-
- gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
- gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
-
- gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
- gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
+ gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab);
+ gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr);
+ gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg);
return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
}
@@ -628,14 +644,9 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
if (ret)
return ret;
- gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0);
- gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0);
-
- gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0);
- gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0);
-
- gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
- gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0);
+ gpu_write64(ptdev, AS_TRANSTAB(as_nr), 0);
+ gpu_write64(ptdev, AS_MEMATTR(as_nr), 0);
+ gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
}
@@ -691,7 +702,7 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
/**
* panthor_vm_active() - Flag a VM as active
- * @VM: VM to flag as active.
+ * @vm: VM to flag as active.
*
* Assigns an address space to a VM so it can be used by the GPU/MCU.
*
@@ -774,6 +785,7 @@ out_enable_as:
if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
+ ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
}
@@ -795,7 +807,7 @@ out_dev_exit:
/**
* panthor_vm_idle() - Flag a VM idle
- * @VM: VM to flag as idle.
+ * @vm: VM to flag as idle.
*
* When we know the GPU is done with the VM (no more jobs to process),
* we can relinquish the AS slot attached to this VM, if any.
@@ -820,6 +832,14 @@ void panthor_vm_idle(struct panthor_vm *vm)
mutex_unlock(&ptdev->mmu->as.slots_lock);
}
+u32 panthor_vm_page_size(struct panthor_vm *vm)
+{
+ const struct io_pgtable *pgt = io_pgtable_ops_to_pgtable(vm->pgtbl_ops);
+ u32 pg_shift = ffs(pgt->cfg.pgsize_bitmap) - 1;
+
+ return 1u << pg_shift;
+}
+
static void panthor_vm_stop(struct panthor_vm *vm)
{
drm_sched_stop(&vm->sched, NULL);
@@ -827,7 +847,7 @@ static void panthor_vm_stop(struct panthor_vm *vm)
static void panthor_vm_start(struct panthor_vm *vm)
{
- drm_sched_start(&vm->sched, true);
+ drm_sched_start(&vm->sched, 0);
}
/**
@@ -874,9 +894,7 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
if (!drm_dev_enter(&ptdev->base, &cookie))
return 0;
- /* Flush the PTs only if we're already awake */
- if (pm_runtime_active(ptdev->base.dev))
- ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
+ ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
drm_dev_exit(cookie);
return ret;
@@ -886,10 +904,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
{
struct panthor_device *ptdev = vm->ptdev;
struct io_pgtable_ops *ops = vm->pgtbl_ops;
+ u64 start_iova = iova;
u64 offset = 0;
- drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
-
while (offset < size) {
size_t unmapped_sz = 0, pgcount;
size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
@@ -904,6 +921,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
return -EINVAL;
}
+
+ drm_dbg(&ptdev->base,
+ "unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu",
+ vm->as.id, start_iova, size, iova + offset,
+ unmapped_sz / pgsize, pgsize);
+
offset += unmapped_sz;
}
@@ -919,6 +942,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
struct scatterlist *sgl;
struct io_pgtable_ops *ops = vm->pgtbl_ops;
u64 start_iova = iova;
+ u64 start_size = size;
int ret;
if (!size)
@@ -938,15 +962,18 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
len = min_t(size_t, len, size);
size -= len;
- drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
- vm->as.id, iova, &paddr, len);
-
while (len) {
size_t pgcount, mapped = 0;
size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
GFP_KERNEL, &mapped);
+
+ drm_dbg(&ptdev->base,
+ "map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu",
+ vm->as.id, start_iova, start_size, iova, &paddr,
+ mapped / pgsize, pgsize);
+
iova += mapped;
paddr += mapped;
len -= mapped;
@@ -967,6 +994,8 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
if (!size)
break;
+
+ offset = 0;
}
return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
@@ -992,7 +1021,7 @@ static int flags_to_prot(u32 flags)
/**
* panthor_vm_alloc_va() - Allocate a region in the auto-va space
- * @VM: VM to allocate a region on.
+ * @vm: VM to allocate a region on.
* @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
* wants the VA to be automatically allocated from the auto-VA range.
* @size: size of the VA range.
@@ -1010,12 +1039,13 @@ int
panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
struct drm_mm_node *va_node)
{
+ ssize_t vm_pgsz = panthor_vm_page_size(vm);
int ret;
- if (!size || (size & ~PAGE_MASK))
+ if (!size || !IS_ALIGNED(size, vm_pgsz))
return -EINVAL;
- if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK))
+ if (va != PANTHOR_VM_KERNEL_AUTO_VA && !IS_ALIGNED(va, vm_pgsz))
return -EINVAL;
mutex_lock(&vm->mm_lock);
@@ -1037,7 +1067,7 @@ panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
/**
* panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
- * @VM: VM to free the region on.
+ * @vm: VM to free the region on.
* @va_node: Memory node representing the region to free.
*/
void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
@@ -1047,47 +1077,18 @@ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
mutex_unlock(&vm->mm_lock);
}
-static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
+static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo)
{
struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
- struct drm_gpuvm *vm = vm_bo->vm;
- bool unpin;
- /* We must retain the GEM before calling drm_gpuvm_bo_put(),
- * otherwise the mutex might be destroyed while we hold it.
- * Same goes for the VM, since we take the VM resv lock.
- */
- drm_gem_object_get(&bo->base.base);
- drm_gpuvm_get(vm);
-
- /* We take the resv lock to protect against concurrent accesses to the
- * gpuvm evicted/extobj lists that are modified in
- * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
- * releases sthe last vm_bo reference.
- * We take the BO GPUVA list lock to protect the vm_bo removal from the
- * GEM vm_bo list.
- */
- dma_resv_lock(drm_gpuvm_resv(vm), NULL);
- mutex_lock(&bo->gpuva_list_lock);
- unpin = drm_gpuvm_bo_put(vm_bo);
- mutex_unlock(&bo->gpuva_list_lock);
- dma_resv_unlock(drm_gpuvm_resv(vm));
-
- /* If the vm_bo object was destroyed, release the pin reference that
- * was hold by this object.
- */
- if (unpin && !bo->base.base.import_attach)
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
-
- drm_gpuvm_put(vm);
- drm_gem_object_put(&bo->base.base);
+ kfree(vm_bo);
}
static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
struct panthor_vm *vm)
{
- struct panthor_vma *vma, *tmp_vma;
-
u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
op_ctx->rsvd_page_tables.ptr;
@@ -1100,16 +1101,26 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
kfree(op_ctx->rsvd_page_tables.pages);
if (op_ctx->map.vm_bo)
- panthor_vm_bo_put(op_ctx->map.vm_bo);
+ drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo);
for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
kfree(op_ctx->preallocated_vmas[i]);
- list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
- list_del(&vma->node);
- panthor_vm_bo_put(vma->base.vm_bo);
- kfree(vma);
+ drm_gpuvm_bo_deferred_cleanup(&vm->base);
+}
+
+static void
+panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx,
+ struct panthor_vma *vma)
+{
+ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
+ if (!op_ctx->preallocated_vmas[i]) {
+ op_ctx->preallocated_vmas[i] = vma;
+ return;
+ }
}
+
+ WARN_ON_ONCE(1);
}
static struct panthor_vma *
@@ -1141,10 +1152,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
break;
case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
- /* Partial unmaps might trigger a remap with either a prev or a next VA,
- * but not both.
+ /* Two VMAs can be needed for an unmap, as an unmap can happen
+ * in the middle of a drm_gpuva, requiring a remap with both
+ * prev & next VA. Or an unmap can span more than one drm_gpuva
+ * where the first and last ones are covered partially, requring
+ * a remap for the first with a prev VA and remap for the last
+ * with a next VA.
*/
- vma_count = 1;
+ vma_count = 2;
break;
default:
@@ -1188,7 +1203,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
(flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
return -EINVAL;
- /* Make sure the VA and size are aligned and in-bounds. */
+ /* Make sure the VA and size are in-bounds. */
if (size > bo->base.base.size || offset > bo->base.base.size - size)
return -EINVAL;
@@ -1198,7 +1213,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
return -EINVAL;
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->flags = flags;
op_ctx->va.range = size;
op_ctx->va.addr = va;
@@ -1207,9 +1221,11 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
if (ret)
goto err_cleanup;
- if (!bo->base.base.import_attach) {
+ if (!drm_gem_is_imported(&bo->base.base)) {
/* Pre-reserve the BO pages, so the map operation doesn't have to
- * allocate.
+ * allocate. This pin is dropped in panthor_vm_bo_free(), so
+ * once we have successfully called drm_gpuvm_bo_create(),
+ * GPUVM will take care of dropping the pin for us.
*/
ret = drm_gem_shmem_pin(&bo->base);
if (ret)
@@ -1218,7 +1234,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
if (IS_ERR(sgt)) {
- if (!bo->base.base.import_attach)
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
ret = PTR_ERR(sgt);
@@ -1229,26 +1245,24 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
if (!preallocated_vm_bo) {
- if (!bo->base.base.import_attach)
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
ret = -ENOMEM;
goto err_cleanup;
}
- mutex_lock(&bo->gpuva_list_lock);
- op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
- mutex_unlock(&bo->gpuva_list_lock);
-
- /* If the a vm_bo for this <VM,BO> combination exists, it already
- * retains a pin ref, and we can release the one we took earlier.
- *
- * If our pre-allocated vm_bo is picked, it now retains the pin ref,
- * which will be released in panthor_vm_bo_put().
+ /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
+ * pre-allocated BO if the <BO,VM> association exists. Given we
+ * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
+ * be called immediately, and we have to hold the VM resv lock when
+ * calling this function.
*/
- if (preallocated_vm_bo != op_ctx->map.vm_bo &&
- !bo->base.base.import_attach)
- drm_gem_shmem_unpin(&bo->base);
+ dma_resv_lock(panthor_vm_resv(vm), NULL);
+ mutex_lock(&bo->base.base.gpuva.lock);
+ op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
+ mutex_unlock(&bo->base.base.gpuva.lock);
+ dma_resv_unlock(panthor_vm_resv(vm));
op_ctx->map.bo_offset = offset;
@@ -1297,7 +1311,6 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
int ret;
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->va.range = size;
op_ctx->va.addr = va;
op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
@@ -1345,7 +1358,6 @@ static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx
struct panthor_vm *vm)
{
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
}
@@ -1458,9 +1470,9 @@ panthor_vm_create_check_args(const struct panthor_device *ptdev,
/**
* panthor_vm_pool_create_vm() - Create a VM
+ * @ptdev: The panthor device
* @pool: The VM to create this VM on.
- * @kernel_va_start: Start of the region reserved for kernel objects.
- * @kernel_va_range: Size of the region reserved for kernel objects.
+ * @args: VM creation args.
*
* Return: a positive VM ID on success, a negative error code otherwise.
*/
@@ -1524,6 +1536,8 @@ static void panthor_vm_destroy(struct panthor_vm *vm)
*
* The VM resources are freed when the last reference on the VM object is
* dropped.
+ *
+ * Return: %0 for success, negative errno value for failure
*/
int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
{
@@ -1548,7 +1562,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
{
struct panthor_vm *vm;
+ xa_lock(&pool->xa);
vm = panthor_vm_get(xa_load(&pool->xa, handle));
+ xa_unlock(&pool->xa);
return vm;
}
@@ -1641,8 +1657,7 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
u32 source_id;
fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
- addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as));
- addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32;
+ addr = gpu_read64(ptdev, AS_FAULTADDRESS(as));
/* decode the fault status */
exception_type = fault_status & 0xFF;
@@ -1670,11 +1685,17 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
access_type, access_type_name(ptdev, fault_status),
source_id);
+ /* We don't handle VM faults at the moment, so let's just clear the
+ * interrupt and let the writer/reader crash.
+ * Note that COMPLETED irqs are never cleared, but this is fine
+ * because they are always masked.
+ */
+ gpu_write(ptdev, MMU_INT_CLEAR, mask);
+
/* Ignore MMU interrupts on this AS until it's been
* re-enabled.
*/
ptdev->mmu->irq.mask = new_int_mask;
- gpu_write(ptdev, MMU_INT_MASK, new_int_mask);
if (ptdev->mmu->as.slots[as].vm)
ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
@@ -1905,7 +1926,34 @@ struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool c
return pool;
}
-static u64 mair_to_memattr(u64 mair)
+/**
+ * panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all
+ * heaps over all the heap pools in a VM
+ * @pfile: File.
+ * @stats: Memory stats to be updated.
+ *
+ * Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM
+ * is active, record the size as active as well.
+ */
+void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats)
+{
+ struct panthor_vm *vm;
+ unsigned long i;
+
+ if (!pfile->vms)
+ return;
+
+ xa_lock(&pfile->vms->xa);
+ xa_for_each(&pfile->vms->xa, i, vm) {
+ size_t size = panthor_heap_pool_size(vm->heaps.pool);
+ stats->resident += size;
+ if (vm->as.id >= 0)
+ stats->active += size;
+ }
+ xa_unlock(&pfile->vms->xa);
+}
+
+static u64 mair_to_memattr(u64 mair, bool coherent)
{
u64 memattr = 0;
u32 i;
@@ -1924,14 +1972,21 @@ static u64 mair_to_memattr(u64 mair)
AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
} else {
- /* Use SH_CPU_INNER mode so SH_IS, which is used when
- * IOMMU_CACHE is set, actually maps to the standard
- * definition of inner-shareable and not Mali's
- * internal-shareable mode.
- */
out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
- AS_MEMATTR_AARCH64_SH_CPU_INNER |
AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
+ /* Use SH_MIDGARD_INNER mode when device isn't coherent,
+ * so SH_IS, which is used when IOMMU_CACHE is set, maps
+ * to Mali's internal-shareable mode. As per the Mali
+ * Spec, inner and outer-shareable modes aren't allowed
+ * for WB memory when coherency is disabled.
+ * Use SH_CPU_INNER mode when coherency is enabled, so
+ * that SH_IS actually maps to the standard definition of
+ * inner-shareable.
+ */
+ if (!coherent)
+ out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER;
+ else
+ out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER;
}
memattr |= (u64)out_attr << (8 * i);
@@ -1946,28 +2001,15 @@ static void panthor_vma_link(struct panthor_vm *vm,
{
struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
- mutex_lock(&bo->gpuva_list_lock);
+ mutex_lock(&bo->base.base.gpuva.lock);
drm_gpuva_link(&vma->base, vm_bo);
- drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
- mutex_unlock(&bo->gpuva_list_lock);
+ mutex_unlock(&bo->base.base.gpuva.lock);
}
-static void panthor_vma_unlink(struct panthor_vm *vm,
- struct panthor_vma *vma)
+static void panthor_vma_unlink(struct panthor_vma *vma)
{
- struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
- struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
-
- mutex_lock(&bo->gpuva_list_lock);
- drm_gpuva_unlink(&vma->base);
- mutex_unlock(&bo->gpuva_list_lock);
-
- /* drm_gpuva_unlink() release the vm_bo, but we manually retained it
- * when entering this function, so we can implement deferred VMA
- * destruction. Re-assign it here.
- */
- vma->base.vm_bo = vm_bo;
- list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
+ drm_gpuva_unlink_defer(&vma->base);
+ kfree(vma);
}
static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
@@ -1996,15 +2038,17 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
op_ctx->map.sgt, op->map.gem.offset,
op->map.va.range);
- if (ret)
+ if (ret) {
+ panthor_vm_op_ctx_return_vma(op_ctx, vma);
return ret;
+ }
- /* Ref owned by the mapping now, clear the obj field so we don't release the
- * pinning/obj ref behind GPUVA's back.
- */
drm_gpuva_map(&vm->base, &vma->base, &op->map);
panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
+
+ drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo);
op_ctx->map.vm_bo = NULL;
+
return 0;
}
@@ -2043,16 +2087,14 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
* owned by the old mapping which will be released when this
* mapping is destroyed, we need to grab a ref here.
*/
- panthor_vma_link(vm, prev_vma,
- drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+ panthor_vma_link(vm, prev_vma, op->remap.unmap->va->vm_bo);
}
if (next_vma) {
- panthor_vma_link(vm, next_vma,
- drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+ panthor_vma_link(vm, next_vma, op->remap.unmap->va->vm_bo);
}
- panthor_vma_unlink(vm, unmap_vma);
+ panthor_vma_unlink(unmap_vma);
return 0;
}
@@ -2069,12 +2111,13 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
return ret;
drm_gpuva_unmap(&op->unmap);
- panthor_vma_unlink(vm, unmap_vma);
+ panthor_vma_unlink(unmap_vma);
return 0;
}
static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
.vm_free = panthor_vm_free,
+ .vm_bo_free = panthor_vm_bo_free,
.sm_step_map = panthor_gpuva_sm_step_map,
.sm_step_remap = panthor_gpuva_sm_step_remap,
.sm_step_unmap = panthor_gpuva_sm_step_unmap,
@@ -2112,15 +2155,22 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
mutex_lock(&vm->op_lock);
vm->op_ctx = op;
switch (op_type) {
- case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: {
+ const struct drm_gpuvm_map_req map_req = {
+ .map.va.addr = op->va.addr,
+ .map.va.range = op->va.range,
+ .map.gem.obj = op->map.vm_bo->obj,
+ .map.gem.offset = op->map.bo_offset,
+ };
+
if (vm->unusable) {
ret = -EINVAL;
break;
}
- ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range,
- op->map.vm_bo->obj, op->map.bo_offset);
+ ret = drm_gpuvm_sm_map(&vm->base, vm, &map_req);
break;
+ }
case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range);
@@ -2202,7 +2252,7 @@ static enum drm_gpu_sched_stat
panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
{
WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
@@ -2232,6 +2282,16 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
u64 full_va_range = 1ull << va_bits;
struct drm_gem_object *dummy_gem;
struct drm_gpu_scheduler *sched;
+ const struct drm_sched_init_args sched_args = {
+ .ops = &panthor_vm_bind_ops,
+ .submit_wq = ptdev->mmu->vm.wq,
+ .num_rqs = 1,
+ .credit_limit = 1,
+ /* Bind operations are synchronous for now, no timeout needed. */
+ .timeout = MAX_SCHEDULE_TIMEOUT,
+ .name = "panthor-vm-bind",
+ .dev = ptdev->base.dev,
+ };
struct io_pgtable_cfg pgtbl_cfg;
u64 mair, min_va, va_range;
struct panthor_vm *vm;
@@ -2289,11 +2349,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
goto err_mm_takedown;
}
- /* Bind operations are synchronous for now, no timeout needed. */
- ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq,
- 1, 1, 0,
- MAX_SCHEDULE_TIMEOUT, NULL, NULL,
- "panthor-vm-bind", ptdev->base.dev);
+ ret = drm_sched_init(&vm->sched, &sched_args);
if (ret)
goto err_free_io_pgtable;
@@ -2303,7 +2359,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
goto err_sched_fini;
mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
- vm->memattr = mair_to_memattr(mair);
+ vm->memattr = mair_to_memattr(mair, ptdev->coherent);
mutex_lock(&ptdev->mmu->vm.lock);
list_add_tail(&vm->node, &ptdev->mmu->vm.list);
@@ -2317,8 +2373,9 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
* to be handled the same way user VMAs are.
*/
drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
- DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem,
- min_va, va_range, 0, 0, &panthor_gpuvm_ops);
+ DRM_GPUVM_RESV_PROTECTED | DRM_GPUVM_IMMEDIATE_MODE,
+ &ptdev->base, dummy_gem, min_va, va_range, 0, 0,
+ &panthor_gpuvm_ops);
drm_gem_object_put(dummy_gem);
return vm;
@@ -2343,11 +2400,12 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
const struct drm_panthor_vm_bind_op *op,
struct panthor_vm_op_ctx *op_ctx)
{
+ ssize_t vm_pgsz = panthor_vm_page_size(vm);
struct drm_gem_object *gem;
int ret;
/* Aligned on page size. */
- if ((op->va | op->size) & ~PAGE_MASK)
+ if (!IS_ALIGNED(op->va | op->size | op->bo_offset, vm_pgsz))
return -EINVAL;
switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
@@ -2436,7 +2494,7 @@ panthor_vm_bind_job_create(struct drm_file *file,
kref_init(&job->refcount);
job->vm = panthor_vm_get(vm);
- ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm);
+ ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm, file->client_id);
if (ret)
goto err_put_job;
@@ -2628,7 +2686,8 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm
*/
void panthor_mmu_unplug(struct panthor_device *ptdev)
{
- panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+ if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
+ panthor_mmu_irq_suspend(&ptdev->mmu->irq);
mutex_lock(&ptdev->mmu->as.slots_lock);
for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
@@ -2693,9 +2752,9 @@ int panthor_mmu_init(struct panthor_device *ptdev)
* which passes iova as an unsigned long. Patch the mmu_features to reflect this
* limitation.
*/
- if (sizeof(unsigned long) * 8 < va_bits) {
+ if (va_bits > BITS_PER_LONG) {
ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
- ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8;
+ ptdev->gpu_info.mmu_features |= BITS_PER_LONG;
}
return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);