diff options
Diffstat (limited to 'drivers/gpu/drm/panthor/panthor_mmu.c')
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.c | 428 |
1 files changed, 225 insertions, 203 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index a49132f3778b..d4839d282689 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -7,6 +7,7 @@ #include <drm/drm_exec.h> #include <drm/drm_gpuvm.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> @@ -29,6 +30,7 @@ #include "panthor_device.h" #include "panthor_gem.h" +#include "panthor_gpu.h" #include "panthor_heap.h" #include "panthor_mmu.h" #include "panthor_regs.h" @@ -53,26 +55,27 @@ struct panthor_mmu { /** @irq: The MMU irq. */ struct panthor_irq irq; - /** @as: Address space related fields. + /** + * @as: Address space related fields. * * The GPU has a limited number of address spaces (AS) slots, forcing * us to re-assign them to re-assign slots on-demand. */ struct { - /** @slots_lock: Lock protecting access to all other AS fields. */ + /** @as.slots_lock: Lock protecting access to all other AS fields. */ struct mutex slots_lock; - /** @alloc_mask: Bitmask encoding the allocated slots. */ + /** @as.alloc_mask: Bitmask encoding the allocated slots. */ unsigned long alloc_mask; - /** @faulty_mask: Bitmask encoding the faulty slots. */ + /** @as.faulty_mask: Bitmask encoding the faulty slots. */ unsigned long faulty_mask; - /** @slots: VMs currently bound to the AS slots. */ + /** @as.slots: VMs currently bound to the AS slots. */ struct panthor_as_slot slots[MAX_AS_SLOTS]; /** - * @lru_list: List of least recently used VMs. + * @as.lru_list: List of least recently used VMs. * * We use this list to pick a VM to evict when all slots are * used. @@ -87,16 +90,16 @@ struct panthor_mmu { /** @vm: VMs management fields */ struct { - /** @lock: Lock protecting access to list. */ + /** @vm.lock: Lock protecting access to list. */ struct mutex lock; - /** @list: List containing all VMs. */ + /** @vm.list: List containing all VMs. */ struct list_head list; - /** @reset_in_progress: True if a reset is in progress. */ + /** @vm.reset_in_progress: True if a reset is in progress. */ bool reset_in_progress; - /** @wq: Workqueue used for the VM_BIND queues. */ + /** @vm.wq: Workqueue used for the VM_BIND queues. */ struct workqueue_struct *wq; } vm; }; @@ -143,14 +146,14 @@ struct panthor_vma { struct panthor_vm_op_ctx { /** @rsvd_page_tables: Pages reserved for the MMU page table update. */ struct { - /** @count: Number of pages reserved. */ + /** @rsvd_page_tables.count: Number of pages reserved. */ u32 count; - /** @ptr: Point to the first unused page in the @pages table. */ + /** @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. */ u32 ptr; /** - * @page: Array of pages that can be used for an MMU page table update. + * @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update. * * After an VM operation, there might be free pages left in this array. * They should be returned to the pt_cache as part of the op_ctx cleanup. @@ -172,37 +175,23 @@ struct panthor_vm_op_ctx { /** @va: Virtual range targeted by the VM operation. */ struct { - /** @addr: Start address. */ + /** @va.addr: Start address. */ u64 addr; - /** @range: Range size. */ + /** @va.range: Range size. */ u64 range; } va; - /** - * @returned_vmas: List of panthor_vma objects returned after a VM operation. - * - * For unmap operations, this will contain all VMAs that were covered by the - * specified VA range. - * - * For map operations, this will contain all VMAs that previously mapped to - * the specified VA range. - * - * Those VMAs, and the resources they point to will be released as part of - * the op_ctx cleanup operation. - */ - struct list_head returned_vmas; - /** @map: Fields specific to a map operation. */ struct { - /** @vm_bo: Buffer object to map. */ + /** @map.vm_bo: Buffer object to map. */ struct drm_gpuvm_bo *vm_bo; - /** @bo_offset: Offset in the buffer object. */ + /** @map.bo_offset: Offset in the buffer object. */ u64 bo_offset; /** - * @sgt: sg-table pointing to pages backing the GEM object. + * @map.sgt: sg-table pointing to pages backing the GEM object. * * This is gathered at job creation time, such that we don't have * to allocate in ::run_job(). @@ -210,7 +199,7 @@ struct panthor_vm_op_ctx { struct sg_table *sgt; /** - * @new_vma: The new VMA object that will be inserted to the VA tree. + * @map.new_vma: The new VMA object that will be inserted to the VA tree. */ struct panthor_vma *new_vma; } map; @@ -304,27 +293,27 @@ struct panthor_vm { /** @kernel_auto_va: Automatic VA-range for kernel BOs. */ struct { - /** @start: Start of the automatic VA-range for kernel BOs. */ + /** @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. */ u64 start; - /** @size: Size of the automatic VA-range for kernel BOs. */ + /** @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. */ u64 end; } kernel_auto_va; /** @as: Address space related fields. */ struct { /** - * @id: ID of the address space this VM is bound to. + * @as.id: ID of the address space this VM is bound to. * * A value of -1 means the VM is inactive/not bound. */ int id; - /** @active_cnt: Number of active users of this VM. */ + /** @as.active_cnt: Number of active users of this VM. */ refcount_t active_cnt; /** - * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. + * @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. * * Active VMs should not be inserted in the LRU list. */ @@ -336,13 +325,13 @@ struct panthor_vm { */ struct { /** - * @pool: The heap pool attached to this VM. + * @heaps.pool: The heap pool attached to this VM. * * Will stay NULL until someone creates a heap context on this VM. */ struct panthor_heap_pool *pool; - /** @lock: Lock used to protect access to @pool. */ + /** @heaps.lock: Lock used to protect access to @pool. */ struct mutex lock; } heaps; @@ -408,7 +397,7 @@ struct panthor_vm_bind_job { struct panthor_vm_op_ctx ctx; }; -/** +/* * @pt_cache: Cache used to allocate MMU page tables. * * The pre-allocation pattern forces us to over-allocate to plan for @@ -478,7 +467,7 @@ static void *alloc_pt(void *cookie, size_t size, gfp_t gfp) } /** - * @free_pt() - Custom page table free function + * free_pt() - Custom page table free function * @cookie: Cookie passed at page table allocation time. * @data: Page table to free. * @size: Size of the page table. This size should be fixed, @@ -509,9 +498,9 @@ static int wait_ready(struct panthor_device *ptdev, u32 as_nr) /* Wait for the MMU status to indicate there is no active command, in * case one is pending. */ - ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr), - val, !(val & AS_STATUS_AS_ACTIVE), - 10, 100000); + ret = gpu_read_relaxed_poll_timeout_atomic(ptdev, AS_STATUS(as_nr), val, + !(val & AS_STATUS_AS_ACTIVE), + 10, 100000); if (ret) { panthor_device_schedule_reset(ptdev); @@ -563,16 +552,31 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr, region = region_width | region_start; /* Lock the region that needs to be updated */ - gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); - gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); + gpu_write64(ptdev, AS_LOCKADDR(as_nr), region); write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); } static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, u64 iova, u64 size, u32 op) { + const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV; + u32 lsc_flush_op; + int ret; + lockdep_assert_held(&ptdev->mmu->as.slots_lock); + switch (op) { + case AS_COMMAND_FLUSH_MEM: + lsc_flush_op = CACHE_CLEAN | CACHE_INV; + break; + case AS_COMMAND_FLUSH_PT: + lsc_flush_op = 0; + break; + default: + drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op); + return -EINVAL; + } + if (as_nr < 0) return 0; @@ -582,13 +586,24 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, * power it up */ - if (op != AS_COMMAND_UNLOCK) - lock_region(ptdev, as_nr, iova, size); + lock_region(ptdev, as_nr, iova, size); + + ret = wait_ready(ptdev, as_nr); + if (ret) + return ret; + + ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0); + if (ret) + return ret; - /* Run the MMU operation */ - write_cmd(ptdev, as_nr, op); + /* + * Explicitly unlock the region as the AS is not unlocked automatically + * at the end of the GPU_CONTROL cache flush command, unlike + * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT. + */ + write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK); - /* Wait for the flush to complete */ + /* Wait for the unlock command to complete */ return wait_ready(ptdev, as_nr); } @@ -614,14 +629,9 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, if (ret) return ret; - gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); - gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); - - gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); - gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); - - gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); - gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); + gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab); + gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr); + gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg); return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); } @@ -634,14 +644,9 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) if (ret) return ret; - gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0); - gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0); - - gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0); - gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0); - - gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); - gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0); + gpu_write64(ptdev, AS_TRANSTAB(as_nr), 0); + gpu_write64(ptdev, AS_MEMATTR(as_nr), 0); + gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); } @@ -697,7 +702,7 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm) /** * panthor_vm_active() - Flag a VM as active - * @VM: VM to flag as active. + * @vm: VM to flag as active. * * Assigns an address space to a VM so it can be used by the GPU/MCU. * @@ -780,6 +785,7 @@ out_enable_as: if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } @@ -801,7 +807,7 @@ out_dev_exit: /** * panthor_vm_idle() - Flag a VM idle - * @VM: VM to flag as idle. + * @vm: VM to flag as idle. * * When we know the GPU is done with the VM (no more jobs to process), * we can relinquish the AS slot attached to this VM, if any. @@ -894,25 +900,13 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) return ret; } -/** - * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS - * @vm: VM whose cache to flush - * - * Return: 0 on success, a negative error code if flush failed. - */ -int panthor_vm_flush_all(struct panthor_vm *vm) -{ - return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range); -} - static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; struct io_pgtable_ops *ops = vm->pgtbl_ops; + u64 start_iova = iova; u64 offset = 0; - drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); - while (offset < size) { size_t unmapped_sz = 0, pgcount; size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); @@ -927,6 +921,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) panthor_vm_flush_range(vm, iova, offset + unmapped_sz); return -EINVAL; } + + drm_dbg(&ptdev->base, + "unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, size, iova + offset, + unmapped_sz / pgsize, pgsize); + offset += unmapped_sz; } @@ -942,6 +942,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, struct scatterlist *sgl; struct io_pgtable_ops *ops = vm->pgtbl_ops; u64 start_iova = iova; + u64 start_size = size; int ret; if (!size) @@ -961,15 +962,18 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, len = min_t(size_t, len, size); size -= len; - drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", - vm->as.id, iova, &paddr, len); - while (len) { size_t pgcount, mapped = 0; size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_KERNEL, &mapped); + + drm_dbg(&ptdev->base, + "map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, start_size, iova, &paddr, + mapped / pgsize, pgsize); + iova += mapped; paddr += mapped; len -= mapped; @@ -1017,7 +1021,7 @@ static int flags_to_prot(u32 flags) /** * panthor_vm_alloc_va() - Allocate a region in the auto-va space - * @VM: VM to allocate a region on. + * @vm: VM to allocate a region on. * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user * wants the VA to be automatically allocated from the auto-VA range. * @size: size of the VA range. @@ -1063,7 +1067,7 @@ panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, /** * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va() - * @VM: VM to free the region on. + * @vm: VM to free the region on. * @va_node: Memory node representing the region to free. */ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) @@ -1073,47 +1077,18 @@ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) mutex_unlock(&vm->mm_lock); } -static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) +static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo) { struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); - struct drm_gpuvm *vm = vm_bo->vm; - bool unpin; - /* We must retain the GEM before calling drm_gpuvm_bo_put(), - * otherwise the mutex might be destroyed while we hold it. - * Same goes for the VM, since we take the VM resv lock. - */ - drm_gem_object_get(&bo->base.base); - drm_gpuvm_get(vm); - - /* We take the resv lock to protect against concurrent accesses to the - * gpuvm evicted/extobj lists that are modified in - * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put() - * releases sthe last vm_bo reference. - * We take the BO GPUVA list lock to protect the vm_bo removal from the - * GEM vm_bo list. - */ - dma_resv_lock(drm_gpuvm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); - unpin = drm_gpuvm_bo_put(vm_bo); - mutex_unlock(&bo->gpuva_list_lock); - dma_resv_unlock(drm_gpuvm_resv(vm)); - - /* If the vm_bo object was destroyed, release the pin reference that - * was hold by this object. - */ - if (unpin && !bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); - - drm_gpuvm_put(vm); - drm_gem_object_put(&bo->base.base); + kfree(vm_bo); } static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, struct panthor_vm *vm) { - struct panthor_vma *vma, *tmp_vma; - u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - op_ctx->rsvd_page_tables.ptr; @@ -1126,16 +1101,26 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, kfree(op_ctx->rsvd_page_tables.pages); if (op_ctx->map.vm_bo) - panthor_vm_bo_put(op_ctx->map.vm_bo); + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) kfree(op_ctx->preallocated_vmas[i]); - list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { - list_del(&vma->node); - panthor_vm_bo_put(vma->base.vm_bo); - kfree(vma); + drm_gpuvm_bo_deferred_cleanup(&vm->base); +} + +static void +panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx, + struct panthor_vma *vma) +{ + for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { + if (!op_ctx->preallocated_vmas[i]) { + op_ctx->preallocated_vmas[i] = vma; + return; + } } + + WARN_ON_ONCE(1); } static struct panthor_vma * @@ -1167,10 +1152,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) break; case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: - /* Partial unmaps might trigger a remap with either a prev or a next VA, - * but not both. + /* Two VMAs can be needed for an unmap, as an unmap can happen + * in the middle of a drm_gpuva, requiring a remap with both + * prev & next VA. Or an unmap can span more than one drm_gpuva + * where the first and last ones are covered partially, requring + * a remap for the first with a prev VA and remap for the last + * with a next VA. */ - vma_count = 1; + vma_count = 2; break; default: @@ -1214,7 +1203,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) return -EINVAL; - /* Make sure the VA and size are aligned and in-bounds. */ + /* Make sure the VA and size are in-bounds. */ if (size > bo->base.base.size || offset > bo->base.base.size - size) return -EINVAL; @@ -1224,7 +1213,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, return -EINVAL; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = flags; op_ctx->va.range = size; op_ctx->va.addr = va; @@ -1233,9 +1221,11 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, if (ret) goto err_cleanup; - if (!bo->base.base.import_attach) { + if (!drm_gem_is_imported(&bo->base.base)) { /* Pre-reserve the BO pages, so the map operation doesn't have to - * allocate. + * allocate. This pin is dropped in panthor_vm_bo_free(), so + * once we have successfully called drm_gpuvm_bo_create(), + * GPUVM will take care of dropping the pin for us. */ ret = drm_gem_shmem_pin(&bo->base); if (ret) @@ -1244,7 +1234,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, sgt = drm_gem_shmem_get_pages_sgt(&bo->base); if (IS_ERR(sgt)) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = PTR_ERR(sgt); @@ -1255,7 +1245,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); if (!preallocated_vm_bo) { - if (!bo->base.base.import_attach) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); ret = -ENOMEM; @@ -1269,21 +1259,11 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, * calling this function. */ dma_resv_lock(panthor_vm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(panthor_vm_resv(vm)); - /* If the a vm_bo for this <VM,BO> combination exists, it already - * retains a pin ref, and we can release the one we took earlier. - * - * If our pre-allocated vm_bo is picked, it now retains the pin ref, - * which will be released in panthor_vm_bo_put(). - */ - if (preallocated_vm_bo != op_ctx->map.vm_bo && - !bo->base.base.import_attach) - drm_gem_shmem_unpin(&bo->base); - op_ctx->map.bo_offset = offset; /* L1, L2 and L3 page tables. @@ -1331,7 +1311,6 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, int ret; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->va.range = size; op_ctx->va.addr = va; op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; @@ -1379,7 +1358,6 @@ static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx struct panthor_vm *vm) { memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; } @@ -1492,9 +1470,9 @@ panthor_vm_create_check_args(const struct panthor_device *ptdev, /** * panthor_vm_pool_create_vm() - Create a VM + * @ptdev: The panthor device * @pool: The VM to create this VM on. - * @kernel_va_start: Start of the region reserved for kernel objects. - * @kernel_va_range: Size of the region reserved for kernel objects. + * @args: VM creation args. * * Return: a positive VM ID on success, a negative error code otherwise. */ @@ -1558,6 +1536,8 @@ static void panthor_vm_destroy(struct panthor_vm *vm) * * The VM resources are freed when the last reference on the VM object is * dropped. + * + * Return: %0 for success, negative errno value for failure */ int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle) { @@ -1677,8 +1657,7 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) u32 source_id; fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as)); - addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as)); - addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32; + addr = gpu_read64(ptdev, AS_FAULTADDRESS(as)); /* decode the fault status */ exception_type = fault_status & 0xFF; @@ -1706,11 +1685,17 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) access_type, access_type_name(ptdev, fault_status), source_id); + /* We don't handle VM faults at the moment, so let's just clear the + * interrupt and let the writer/reader crash. + * Note that COMPLETED irqs are never cleared, but this is fine + * because they are always masked. + */ + gpu_write(ptdev, MMU_INT_CLEAR, mask); + /* Ignore MMU interrupts on this AS until it's been * re-enabled. */ ptdev->mmu->irq.mask = new_int_mask; - gpu_write(ptdev, MMU_INT_MASK, new_int_mask); if (ptdev->mmu->as.slots[as].vm) ptdev->mmu->as.slots[as].vm->unhandled_fault = true; @@ -1941,7 +1926,34 @@ struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool c return pool; } -static u64 mair_to_memattr(u64 mair) +/** + * panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all + * heaps over all the heap pools in a VM + * @pfile: File. + * @stats: Memory stats to be updated. + * + * Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM + * is active, record the size as active as well. + */ +void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats) +{ + struct panthor_vm *vm; + unsigned long i; + + if (!pfile->vms) + return; + + xa_lock(&pfile->vms->xa); + xa_for_each(&pfile->vms->xa, i, vm) { + size_t size = panthor_heap_pool_size(vm->heaps.pool); + stats->resident += size; + if (vm->as.id >= 0) + stats->active += size; + } + xa_unlock(&pfile->vms->xa); +} + +static u64 mair_to_memattr(u64 mair, bool coherent) { u64 memattr = 0; u32 i; @@ -1960,14 +1972,21 @@ static u64 mair_to_memattr(u64 mair) AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); } else { - /* Use SH_CPU_INNER mode so SH_IS, which is used when - * IOMMU_CACHE is set, actually maps to the standard - * definition of inner-shareable and not Mali's - * internal-shareable mode. - */ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | - AS_MEMATTR_AARCH64_SH_CPU_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); + /* Use SH_MIDGARD_INNER mode when device isn't coherent, + * so SH_IS, which is used when IOMMU_CACHE is set, maps + * to Mali's internal-shareable mode. As per the Mali + * Spec, inner and outer-shareable modes aren't allowed + * for WB memory when coherency is disabled. + * Use SH_CPU_INNER mode when coherency is enabled, so + * that SH_IS actually maps to the standard definition of + * inner-shareable. + */ + if (!coherent) + out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER; + else + out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER; } memattr |= (u64)out_attr << (8 * i); @@ -1982,28 +2001,15 @@ static void panthor_vma_link(struct panthor_vm *vm, { struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_link(&vma->base, vm_bo); - drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); } -static void panthor_vma_unlink(struct panthor_vm *vm, - struct panthor_vma *vma) +static void panthor_vma_unlink(struct panthor_vma *vma) { - struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); - - mutex_lock(&bo->gpuva_list_lock); - drm_gpuva_unlink(&vma->base); - mutex_unlock(&bo->gpuva_list_lock); - - /* drm_gpuva_unlink() release the vm_bo, but we manually retained it - * when entering this function, so we can implement deferred VMA - * destruction. Re-assign it here. - */ - vma->base.vm_bo = vm_bo; - list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); + drm_gpuva_unlink_defer(&vma->base); + kfree(vma); } static void panthor_vma_init(struct panthor_vma *vma, u32 flags) @@ -2032,15 +2038,17 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), op_ctx->map.sgt, op->map.gem.offset, op->map.va.range); - if (ret) + if (ret) { + panthor_vm_op_ctx_return_vma(op_ctx, vma); return ret; + } - /* Ref owned by the mapping now, clear the obj field so we don't release the - * pinning/obj ref behind GPUVA's back. - */ drm_gpuva_map(&vm->base, &vma->base, &op->map); panthor_vma_link(vm, vma, op_ctx->map.vm_bo); + + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); op_ctx->map.vm_bo = NULL; + return 0; } @@ -2079,16 +2087,14 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, * owned by the old mapping which will be released when this * mapping is destroyed, we need to grab a ref here. */ - panthor_vma_link(vm, prev_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, prev_vma, op->remap.unmap->va->vm_bo); } if (next_vma) { - panthor_vma_link(vm, next_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, next_vma, op->remap.unmap->va->vm_bo); } - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } @@ -2105,12 +2111,13 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, return ret; drm_gpuva_unmap(&op->unmap); - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } static const struct drm_gpuvm_ops panthor_gpuvm_ops = { .vm_free = panthor_vm_free, + .vm_bo_free = panthor_vm_bo_free, .sm_step_map = panthor_gpuva_sm_step_map, .sm_step_remap = panthor_gpuva_sm_step_remap, .sm_step_unmap = panthor_gpuva_sm_step_unmap, @@ -2148,15 +2155,22 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, mutex_lock(&vm->op_lock); vm->op_ctx = op; switch (op_type) { - case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: + case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: { + const struct drm_gpuvm_map_req map_req = { + .map.va.addr = op->va.addr, + .map.va.range = op->va.range, + .map.gem.obj = op->map.vm_bo->obj, + .map.gem.offset = op->map.bo_offset, + }; + if (vm->unusable) { ret = -EINVAL; break; } - ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range, - op->map.vm_bo->obj, op->map.bo_offset); + ret = drm_gpuvm_sm_map(&vm->base, vm, &map_req); break; + } case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range); @@ -2238,7 +2252,7 @@ static enum drm_gpu_sched_stat panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) { WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops panthor_vm_bind_ops = { @@ -2268,6 +2282,16 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, u64 full_va_range = 1ull << va_bits; struct drm_gem_object *dummy_gem; struct drm_gpu_scheduler *sched; + const struct drm_sched_init_args sched_args = { + .ops = &panthor_vm_bind_ops, + .submit_wq = ptdev->mmu->vm.wq, + .num_rqs = 1, + .credit_limit = 1, + /* Bind operations are synchronous for now, no timeout needed. */ + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = "panthor-vm-bind", + .dev = ptdev->base.dev, + }; struct io_pgtable_cfg pgtbl_cfg; u64 mair, min_va, va_range; struct panthor_vm *vm; @@ -2325,11 +2349,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, goto err_mm_takedown; } - /* Bind operations are synchronous for now, no timeout needed. */ - ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq, - 1, 1, 0, - MAX_SCHEDULE_TIMEOUT, NULL, NULL, - "panthor-vm-bind", ptdev->base.dev); + ret = drm_sched_init(&vm->sched, &sched_args); if (ret) goto err_free_io_pgtable; @@ -2339,7 +2359,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, goto err_sched_fini; mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; - vm->memattr = mair_to_memattr(mair); + vm->memattr = mair_to_memattr(mair, ptdev->coherent); mutex_lock(&ptdev->mmu->vm.lock); list_add_tail(&vm->node, &ptdev->mmu->vm.list); @@ -2353,8 +2373,9 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, * to be handled the same way user VMAs are. */ drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", - DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem, - min_va, va_range, 0, 0, &panthor_gpuvm_ops); + DRM_GPUVM_RESV_PROTECTED | DRM_GPUVM_IMMEDIATE_MODE, + &ptdev->base, dummy_gem, min_va, va_range, 0, 0, + &panthor_gpuvm_ops); drm_gem_object_put(dummy_gem); return vm; @@ -2384,7 +2405,7 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file, int ret; /* Aligned on page size. */ - if (!IS_ALIGNED(op->va | op->size, vm_pgsz)) + if (!IS_ALIGNED(op->va | op->size | op->bo_offset, vm_pgsz)) return -EINVAL; switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { @@ -2473,7 +2494,7 @@ panthor_vm_bind_job_create(struct drm_file *file, kref_init(&job->refcount); job->vm = panthor_vm_get(vm); - ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm); + ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm, file->client_id); if (ret) goto err_put_job; @@ -2665,7 +2686,8 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm */ void panthor_mmu_unplug(struct panthor_device *ptdev) { - panthor_mmu_irq_suspend(&ptdev->mmu->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_mmu_irq_suspend(&ptdev->mmu->irq); mutex_lock(&ptdev->mmu->as.slots_lock); for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { |
