summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nouveau_bo.c
diff options
context:
space:
mode:
authorDanilo Krummrich <dakr@redhat.com>2023-08-04 20:23:51 +0200
committerDanilo Krummrich <dakr@redhat.com>2023-08-04 20:34:41 +0200
commitb88baab828713ce0b49b185444b2ee83bed373a8 (patch)
tree248867f189fd3f2c05c43e501e52fb94beac36c2 /drivers/gpu/drm/nouveau/nouveau_bo.c
parent6b252cf42281045a9f803d2198023500cfa6ebd2 (diff)
drm/nouveau: implement new VM_BIND uAPI
This commit provides the implementation for the new uapi motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. UMDs can request the named operations to be processed either synchronously or asynchronously. It supports DRM syncobjs (incl. timelines) as synchronization mechanism. The management of the GPU VA mappings is implemented with the DRM GPU VA manager. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The execution happens asynchronously. It supports DRM syncobj (incl. timelines) as synchronization mechanism. DRM GEM object locking is handled with drm_exec. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM GPU scheduler for the asynchronous paths. Reviewed-by: Dave Airlie <airlied@redhat.com> Signed-off-by: Danilo Krummrich <dakr@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-12-dakr@redhat.com
Diffstat (limited to 'drivers/gpu/drm/nouveau/nouveau_bo.c')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c159
1 files changed, 102 insertions, 57 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index e38e448d9632..19cab37ac69c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -199,7 +199,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, u64 *size)
struct nouveau_bo *
nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
- u32 tile_mode, u32 tile_flags)
+ u32 tile_mode, u32 tile_flags, bool internal)
{
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
@@ -233,68 +233,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
nvbo->force_coherent = true;
}
- if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
- nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
- if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
- kfree(nvbo);
- return ERR_PTR(-EINVAL);
+ nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
+ if (!nouveau_cli_uvmm(cli) || internal) {
+ /* for BO noVM allocs, don't assign kinds */
+ if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
+ nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
+ if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+ kfree(nvbo);
+ return ERR_PTR(-EINVAL);
+ }
+
+ nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
+ } else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+ nvbo->kind = (tile_flags & 0x00007f00) >> 8;
+ nvbo->comp = (tile_flags & 0x00030000) >> 16;
+ if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+ kfree(nvbo);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ nvbo->zeta = (tile_flags & 0x00000007);
}
+ nvbo->mode = tile_mode;
+
+ /* Determine the desirable target GPU page size for the buffer. */
+ for (i = 0; i < vmm->page_nr; i++) {
+ /* Because we cannot currently allow VMM maps to fail
+ * during buffer migration, we need to determine page
+ * size for the buffer up-front, and pre-allocate its
+ * page tables.
+ *
+ * Skip page sizes that can't support needed domains.
+ */
+ if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
+ (domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
+ continue;
+ if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
+ (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
+ continue;
- nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
- } else
- if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
- nvbo->kind = (tile_flags & 0x00007f00) >> 8;
- nvbo->comp = (tile_flags & 0x00030000) >> 16;
- if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+ /* Select this page size if it's the first that supports
+ * the potential memory domains, or when it's compatible
+ * with the requested compression settings.
+ */
+ if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
+ pi = i;
+
+ /* Stop once the buffer is larger than the current page size. */
+ if (*size >= 1ULL << vmm->page[i].shift)
+ break;
+ }
+
+ if (WARN_ON(pi < 0)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}
- } else {
- nvbo->zeta = (tile_flags & 0x00000007);
- }
- nvbo->mode = tile_mode;
- nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
-
- /* Determine the desirable target GPU page size for the buffer. */
- for (i = 0; i < vmm->page_nr; i++) {
- /* Because we cannot currently allow VMM maps to fail
- * during buffer migration, we need to determine page
- * size for the buffer up-front, and pre-allocate its
- * page tables.
- *
- * Skip page sizes that can't support needed domains.
- */
- if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
- (domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
- continue;
- if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
- (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
- continue;
-
- /* Select this page size if it's the first that supports
- * the potential memory domains, or when it's compatible
- * with the requested compression settings.
- */
- if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
- pi = i;
- /* Stop once the buffer is larger than the current page size. */
- if (*size >= 1ULL << vmm->page[i].shift)
- break;
- }
+ /* Disable compression if suitable settings couldn't be found. */
+ if (nvbo->comp && !vmm->page[pi].comp) {
+ if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
+ nvbo->kind = mmu->kind[nvbo->kind];
+ nvbo->comp = 0;
+ }
+ nvbo->page = vmm->page[pi].shift;
+ } else {
+ /* reject other tile flags when in VM mode. */
+ if (tile_mode)
+ return ERR_PTR(-EINVAL);
+ if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG)
+ return ERR_PTR(-EINVAL);
- if (WARN_ON(pi < 0)) {
- kfree(nvbo);
- return ERR_PTR(-EINVAL);
- }
+ /* Determine the desirable target GPU page size for the buffer. */
+ for (i = 0; i < vmm->page_nr; i++) {
+ /* Because we cannot currently allow VMM maps to fail
+ * during buffer migration, we need to determine page
+ * size for the buffer up-front, and pre-allocate its
+ * page tables.
+ *
+ * Skip page sizes that can't support needed domains.
+ */
+ if ((domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
+ continue;
+ if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
+ (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
+ continue;
- /* Disable compression if suitable settings couldn't be found. */
- if (nvbo->comp && !vmm->page[pi].comp) {
- if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
- nvbo->kind = mmu->kind[nvbo->kind];
- nvbo->comp = 0;
+ if (pi < 0)
+ pi = i;
+ /* Stop once the buffer is larger than the current page size. */
+ if (*size >= 1ULL << vmm->page[i].shift)
+ break;
+ }
+ if (WARN_ON(pi < 0)) {
+ kfree(nvbo);
+ return ERR_PTR(-EINVAL);
+ }
+ nvbo->page = vmm->page[pi].shift;
}
- nvbo->page = vmm->page[pi].shift;
nouveau_bo_fixup_align(nvbo, align, size);
@@ -307,18 +342,26 @@ nouveau_bo_init(struct nouveau_bo *nvbo, u64 size, int align, u32 domain,
{
int type = sg ? ttm_bo_type_sg : ttm_bo_type_device;
int ret;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false,
+ .resv = robj,
+ };
nouveau_bo_placement_set(nvbo, domain, 0);
INIT_LIST_HEAD(&nvbo->io_reserve_lru);
- ret = ttm_bo_init_validate(nvbo->bo.bdev, &nvbo->bo, type,
- &nvbo->placement, align >> PAGE_SHIFT, false,
+ ret = ttm_bo_init_reserved(nvbo->bo.bdev, &nvbo->bo, type,
+ &nvbo->placement, align >> PAGE_SHIFT, &ctx,
sg, robj, nouveau_bo_del_ttm);
if (ret) {
/* ttm will call nouveau_bo_del_ttm if it fails.. */
return ret;
}
+ if (!robj)
+ ttm_bo_unreserve(&nvbo->bo);
+
return 0;
}
@@ -332,7 +375,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
int ret;
nvbo = nouveau_bo_alloc(cli, &size, &align, domain, tile_mode,
- tile_flags);
+ tile_flags, true);
if (IS_ERR(nvbo))
return PTR_ERR(nvbo);
@@ -951,6 +994,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
list_for_each_entry(vma, &nvbo->vma_list, head) {
nouveau_vma_map(vma, mem);
}
+ nouveau_uvmm_bo_map_all(nvbo, mem);
} else {
list_for_each_entry(vma, &nvbo->vma_list, head) {
ret = dma_resv_wait_timeout(bo->base.resv,
@@ -959,6 +1003,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
WARN_ON(ret <= 0);
nouveau_vma_unmap(vma);
}
+ nouveau_uvmm_bo_unmap_all(nvbo);
}
if (new_reg)