diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 460 |
1 files changed, 378 insertions, 82 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 367c84b90e9e..2035604121e6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -732,7 +732,9 @@ int xe_vm_userptr_pin(struct xe_vm *vm) DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); + down_read(&vm->userptr.notifier_lock); err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->userptr.notifier_lock); xe_vm_unlock(vm); if (err) break; @@ -798,21 +800,47 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) } ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) +{ + struct xe_vma *vma; + + vma = gpuva_to_vma(op->base.prefetch.va); + + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) + xa_destroy(&op->prefetch_range.range); +} + +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return; + + list_for_each_entry(op, &vops->list, link) + xe_vma_svm_prefetch_op_fini(op); +} + static void xe_vma_ops_fini(struct xe_vma_ops *vops) { int i; + xe_vma_svm_prefetch_ops_fini(vops); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) kfree(vops->pt_update_ops[i].ops); } -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) { int i; + if (!inc_val) + return; + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; + vops->pt_update_ops[i].num_ops += inc_val; } static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, @@ -842,7 +870,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -977,7 +1005,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, xe_vm_populate_range_rebind(op, vma, range, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -1062,7 +1090,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, xe_vm_populate_range_unbind(op, range); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, range->tile_present); + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); return 0; } @@ -1678,13 +1706,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + } + + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1724,10 +1760,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + if (vm->flags & XE_VM_FLAG_LR_MODE) vm->batch_invalidate_tlb = false; - } /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { @@ -1757,12 +1791,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_close; - } - if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); @@ -1776,6 +1804,11 @@ err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) @@ -2049,7 +2082,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + !xe->info.needs_scratch)) return -EINVAL; if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && @@ -2135,6 +2169,35 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +/** + * xe_vm_find_vma_by_addr() - Find a VMA by its address + * + * @vm: the xe_vm the vma belongs to + * @page_addr: address to look up + */ +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + static const u32 region_to_mem_type[] = { XE_PL_TT, XE_PL_VRAM0, @@ -2201,13 +2264,39 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) } #endif +static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) +{ + if (!xe_vm_in_fault_mode(vm)) + return false; + + if (!xe_vm_has_scratch(vm)) + return false; + + if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) + return false; + + return true; +} + +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) +{ + struct drm_gpuva_op *__op; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_svm_prefetch_op_fini(op); + } +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. */ static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, + struct xe_bo *bo, u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { @@ -2215,6 +2304,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_end = addr + range; int err; lockdep_assert_held_write(&vm->lock); @@ -2273,15 +2363,83 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; + op->map.invalidate_on_bind = + __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + u8 id, tile_mask = 0; + u32 i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) { + op->prefetch.region = prefetch_region; + break; + } + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); + op->prefetch_range.region = prefetch_region; + op->prefetch_range.ranges_count = 0; +alloc_next_range: + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); + + if (PTR_ERR(svm_range) == -ENOENT) { + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); + + addr = ret == ULONG_MAX ? 0 : ret; + if (addr) + goto alloc_next_range; + else + goto print_op_label; + } + + if (IS_ERR(svm_range)) { + err = PTR_ERR(svm_range); + goto unwind_prefetch_ops; + } + + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); + goto check_next_range; + } + + err = xa_alloc(&op->prefetch_range.range, + &i, svm_range, xa_limit_32b, + GFP_KERNEL); + + if (err) + goto unwind_prefetch_ops; + + op->prefetch_range.ranges_count++; + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); +check_next_range: + if (range_end > xe_svm_range_end(svm_range) && + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { + addr = xe_svm_range_end(svm_range); + goto alloc_next_range; + } + } +print_op_label: print_op(vm->xe, __op); } return ops; + +unwind_prefetch_ops: + xe_svm_prefetch_gpuva_ops_fini(ops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return ERR_PTR(err); } + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, @@ -2472,10 +2630,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return PTR_ERR(vma); op->map.vma = vma; - if ((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) + if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && + !op->map.is_cpu_addr_mirror) || + op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); + op->tile_mask, 1); break; } case DRM_GPUVA_OP_REMAP: @@ -2484,6 +2643,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, gpuva_to_vma(op->base.remap.unmap->va); bool skip = xe_vma_is_cpu_addr_mirror(old); u64 start = xe_vma_start(old), end = xe_vma_end(old); + int num_remap_ops = 0; if (op->base.remap.prev) start = op->base.remap.prev->va.addr + @@ -2536,7 +2696,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } @@ -2565,11 +2725,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (!skip) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; + + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); break; } case DRM_GPUVA_OP_UNMAP: @@ -2581,7 +2743,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return -EBUSY; if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); break; case DRM_GPUVA_OP_PREFETCH: vma = gpuva_to_vma(op->base.prefetch.va); @@ -2592,8 +2754,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return err; } - if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + if (xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, + op->prefetch_range.ranges_count); + else + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -2719,6 +2885,57 @@ static int check_ufence(struct xe_vma *vma) return 0; } +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + int err = 0; + + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + unsigned long i; + u32 region; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + return 0; + + region = op->prefetch_range.region; + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = devmem_possible; + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + + /* TODO: Threading the migration */ + xa_for_each(&op->prefetch_range.range, i, svm_range) { + if (!region) + xe_svm_range_migrate_to_smem(vm, svm_range); + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; + err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + return -ENODATA; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); + } + + err = xe_svm_range_get_pages(vm, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) + err = -ENODATA; + return err; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); + } + + return err; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_op *op) { @@ -2726,9 +2943,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); + if (!op->map.invalidate_on_bind) + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); break; case DRM_GPUVA_OP_REMAP: err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); @@ -2755,7 +2973,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; + u32 region; + + if (xe_vma_is_cpu_addr_mirror(vma)) + region = op->prefetch_range.region; + else + region = op->prefetch.region; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2774,6 +2997,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, return err; } +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return 0; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + err = prefetch_ranges(vm, op); + if (err) + return err; + } + } + + return 0; +} + static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_ops *vops) @@ -3082,9 +3324,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); + err = copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); if (XE_IOCTL_DBG(xe, err)) { err = -EFAULT; goto free_bind_ops; @@ -3109,7 +3351,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && (!xe_vm_in_fault_mode(vm) || - !IS_ENABLED(CONFIG_DRM_GPUSVM)))) { + !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { err = -EINVAL; goto free_bind_ops; } @@ -3215,6 +3457,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->q = q; vops->syncs = syncs; vops->num_syncs = num_syncs; + vops->flags = 0; } static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, @@ -3223,9 +3466,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3243,7 +3486,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; + return -EINVAL; } } @@ -3251,7 +3494,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, if (bo->cpu_caching) { if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; + return -EINVAL; } } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { /* @@ -3260,7 +3503,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, * how it was mapped on the CPU. Just assume is it * potentially cached on CPU side. */ - return -EINVAL; + return -EINVAL; } /* If a BO is protected it can only be mapped if the key is still valid */ @@ -3422,7 +3665,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, addr, range, op, flags, prefetch_region, pat_index); if (IS_ERR(ops[i])) { @@ -3455,6 +3698,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); + if (err) + goto unwind_ops; + fence = vm_bind_ioctl_ops_execute(vm, &vops); if (IS_ERR(fence)) err = PTR_ERR(fence); @@ -3524,7 +3771,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { @@ -3596,6 +3843,68 @@ void xe_vm_unlock(struct xe_vm *vm) } /** + * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * address range + * @vm: The VM + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) +{ + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + if (!tile_mask) + return 0; + + for_each_tile(tile, vm->xe, id) { + if (tile_mask & BIT(id)) { + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->primary_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->media_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + } + } + +wait: + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + + return err; +} + +/** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate * @@ -3608,28 +3917,34 @@ void xe_vm_unlock(struct xe_vm *vm) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0; int ret = 0; + u8 id; xe_assert(xe, !xe_vma_is_null(vma)); xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); trace_xe_vma_invalidate(vma); - vm_dbg(&xe_vma_vm(vma)->xe->drm, + vm_dbg(&vm->xe->drm, "INVALIDATE: addr=0x%016llx, range=0x%016llx", xe_vma_start(vma), xe_vma_size(vma)); - /* Check that we don't race with page-table updates */ + /* + * Check that we don't race with page-table updates, tile_invalidated + * update is safe + */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { + lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || + (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); + WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3637,39 +3952,17 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; + for_each_tile(tile, xe, id) + if (xe_pt_zap_ptes(tile, vma)) + tile_mask |= BIT(id); - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); + xe_device_wmb(xe); - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); - vma->tile_invalidated = vma->tile_mask; + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); return ret; } @@ -3846,6 +4139,9 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); + + if (drm_coredump_printer_is_full(p)) + return; } } |