summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_bo.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_bo.c')
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c773
1 files changed, 536 insertions, 237 deletions
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4d3b80ec906d..3f5391d416d4 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -13,7 +13,7 @@
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_tt.h>
-#include <drm/xe_drm.h>
+#include <uapi/drm/xe_drm.h>
#include "xe_device.h"
#include "xe_dma_buf.h"
@@ -22,9 +22,10 @@
#include "xe_gt.h"
#include "xe_map.h"
#include "xe_migrate.h"
+#include "xe_pm.h"
#include "xe_preempt_fence.h"
#include "xe_res_cursor.h"
-#include "xe_trace.h"
+#include "xe_trace_bo.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
@@ -46,22 +47,26 @@ static const struct ttm_place sys_placement_flags = {
static struct ttm_placement sys_placement = {
.num_placement = 1,
.placement = &sys_placement_flags,
- .num_busy_placement = 1,
- .busy_placement = &sys_placement_flags,
};
-static const struct ttm_place tt_placement_flags = {
- .fpfn = 0,
- .lpfn = 0,
- .mem_type = XE_PL_TT,
- .flags = 0,
+static const struct ttm_place tt_placement_flags[] = {
+ {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = XE_PL_TT,
+ .flags = TTM_PL_FLAG_DESIRED,
+ },
+ {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = XE_PL_SYSTEM,
+ .flags = TTM_PL_FLAG_FALLBACK,
+ }
};
static struct ttm_placement tt_placement = {
- .num_placement = 1,
- .placement = &tt_placement_flags,
- .num_busy_placement = 1,
- .busy_placement = &sys_placement_flags,
+ .num_placement = 2,
+ .placement = tt_placement_flags,
};
bool mem_type_is_vram(u32 mem_type)
@@ -91,6 +96,20 @@ bool xe_bo_is_stolen(struct xe_bo *bo)
}
/**
+ * xe_bo_has_single_placement - check if BO is placed only in one memory location
+ * @bo: The BO
+ *
+ * This function checks whether a given BO is placed in only one memory location.
+ *
+ * Returns: true if the BO is placed in a single memory location, false otherwise.
+ *
+ */
+bool xe_bo_has_single_placement(struct xe_bo *bo)
+{
+ return bo->placement.num_placement == 1;
+}
+
+/**
* xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
* @bo: The BO
*
@@ -107,7 +126,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
static bool xe_bo_is_user(struct xe_bo *bo)
{
- return bo->flags & XE_BO_CREATE_USER_BIT;
+ return bo->flags & XE_BO_FLAG_USER;
}
static struct xe_migrate *
@@ -133,19 +152,25 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+ if (bo_flags & XE_BO_FLAG_SYSTEM) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT,
};
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = XE_PL_TT;
}
}
+static bool force_contiguous(u32 bo_flags)
+{
+ /*
+ * For eviction / restore on suspend / resume objects pinned in VRAM
+ * must be contiguous, also only contiguous BOs support xe_bo_vmap.
+ */
+ return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+}
+
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
{
@@ -159,16 +184,11 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
xe_assert(xe, vram && vram->usable_size);
io_size = vram->io_size;
- /*
- * For eviction / restore on suspend / resume objects
- * pinned in VRAM must be contiguous
- */
- if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT))
+ if (force_contiguous(bo_flags))
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (io_size < vram->usable_size) {
- if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+ if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
place.fpfn = 0;
place.lpfn = io_size >> PAGE_SHIFT;
} else {
@@ -177,37 +197,26 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
}
places[*c] = place;
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = mem_type;
}
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo->props.preferred_gt == XE_GT1) {
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- } else {
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- }
+ if (bo_flags & XE_BO_FLAG_VRAM0)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+ if (bo_flags & XE_BO_FLAG_VRAM1)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+ if (bo_flags & XE_BO_FLAG_STOLEN) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_STOLEN,
- .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT) ?
+ .flags = force_contiguous(bo_flags) ?
TTM_PL_FLAG_CONTIGUOUS : 0,
};
*c += 1;
@@ -219,17 +228,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
{
u32 c = 0;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
-
- /* The order of placements should indicate preferred location */
-
- if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
- try_add_system(xe, bo, bo_flags, &c);
- try_add_vram(xe, bo, bo_flags, &c);
- } else {
- try_add_vram(xe, bo, bo_flags, &c);
- try_add_system(xe, bo, bo_flags, &c);
- }
+ try_add_vram(xe, bo, bo_flags, &c);
+ try_add_system(xe, bo, bo_flags, &c);
try_add_stolen(xe, bo, bo_flags, &c);
if (!c)
@@ -238,8 +238,6 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
bo->placement = (struct ttm_placement) {
.num_placement = c,
.placement = bo->placements,
- .num_busy_placement = c,
- .busy_placement = bo->placements,
};
return 0;
@@ -259,7 +257,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
/* Don't handle scatter gather BOs */
if (tbo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
@@ -289,6 +286,8 @@ struct xe_ttm_tt {
struct device *dev;
struct sg_table sgt;
struct sg_table *sg;
+ /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
+ bool purgeable;
};
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -322,6 +321,18 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
return 0;
}
+static void xe_tt_unmap_sg(struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->sg) {
+ dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(xe_tt->sg);
+ xe_tt->sg = NULL;
+ }
+}
+
struct sg_table *xe_bo_sg(struct xe_bo *bo)
{
struct ttm_tt *tt = bo->ttm.ttm;
@@ -337,7 +348,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
- enum ttm_caching caching;
+ enum ttm_caching caching = ttm_cached;
int err;
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -351,26 +362,44 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
PAGE_SIZE);
- switch (bo->cpu_caching) {
- case DRM_XE_GEM_CPU_CACHING_WC:
- caching = ttm_write_combined;
- break;
- default:
- caching = ttm_cached;
- break;
- }
-
- WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
-
/*
- * Display scanout is always non-coherent with the CPU cache.
- *
- * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
- * require a CPU:WC mapping.
+ * DGFX system memory is always WB / ttm_cached, since
+ * other caching modes are only supported on x86. DGFX
+ * GPU system memory accesses are always coherent with the
+ * CPU.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
- caching = ttm_write_combined;
+ if (!IS_DGFX(xe)) {
+ switch (bo->cpu_caching) {
+ case DRM_XE_GEM_CPU_CACHING_WC:
+ caching = ttm_write_combined;
+ break;
+ default:
+ caching = ttm_cached;
+ break;
+ }
+
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+ /*
+ * Display scanout is always non-coherent with the CPU cache.
+ *
+ * For Xe_LPG and beyond, PPGTT PTE lookups are also
+ * non-coherent and require a CPU:WC mapping.
+ */
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 &&
+ bo->flags & XE_BO_FLAG_PAGETABLE))
+ caching = ttm_write_combined;
+ }
+
+ if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
+ /*
+ * Valid only for internally-created buffers only, for
+ * which cpu_caching is never initialized.
+ */
+ xe_assert(xe, bo->cpu_caching == 0);
+ caching = ttm_uncached;
+ }
err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
if (err) {
@@ -397,27 +426,15 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
if (err)
return err;
- /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
- err = xe_tt_map_sg(tt);
- if (err)
- ttm_pool_free(&ttm_dev->pool, tt);
-
return err;
}
static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
{
- struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
-
if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
return;
- if (xe_tt->sg) {
- dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(xe_tt->sg);
- xe_tt->sg = NULL;
- }
+ xe_tt_unmap_sg(tt);
return ttm_pool_free(&ttm_dev->pool, tt);
}
@@ -428,6 +445,14 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
kfree(tt);
}
+static bool xe_ttm_resource_visible(struct ttm_resource *mem)
+{
+ struct xe_ttm_vram_mgr_resource *vres =
+ to_xe_ttm_vram_mgr_resource(mem);
+
+ return vres->used_visible_size == mem->size;
+}
+
static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
@@ -439,11 +464,9 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
return 0;
case XE_PL_VRAM0:
case XE_PL_VRAM1: {
- struct xe_ttm_vram_mgr_resource *vres =
- to_xe_ttm_vram_mgr_resource(mem);
struct xe_mem_region *vram = res_to_mem_region(mem);
- if (vres->used_visible_size < mem->size)
+ if (!xe_ttm_resource_visible(mem))
return -EINVAL;
mem->bus.offset = mem->start << PAGE_SHIFT;
@@ -456,7 +479,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += vram->io_start;
mem->bus.is_iomem = true;
-#if !defined(CONFIG_X86)
+#if !IS_ENABLED(CONFIG_X86)
mem->bus.caching = ttm_write_combined;
#endif
return 0;
@@ -594,6 +617,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
{
struct ttm_buffer_object *ttm_bo = &bo->ttm;
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct ttm_resource *old_mem = ttm_bo->resource;
+ u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
int ret;
/*
@@ -613,6 +638,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
dma_buf_move_notify(ttm_bo->base.dma_buf);
+ /*
+ * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
+ * so if we moved from VRAM make sure to unlink this from the userfault
+ * tracking.
+ */
+ if (mem_type_is_vram(old_mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (!list_empty(&bo->vram_userfault_link))
+ list_del_init(&bo->vram_userfault_link);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
+
return 0;
}
@@ -634,28 +671,38 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
ttm && ttm_tt_is_populated(ttm)) ? true : false;
int ret = 0;
+
/* Bo creation path, moving to system or TT. */
if ((!old_mem && ttm) && !handle_system_ccs) {
- ttm_bo_move_null(ttm_bo, new_mem);
- return 0;
+ if (new_mem->mem_type == XE_PL_TT)
+ ret = xe_tt_map_sg(ttm);
+ if (!ret)
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
}
if (ttm_bo->type == ttm_bo_type_sg) {
ret = xe_bo_move_notify(bo, ctx);
if (!ret)
ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
- goto out;
+ return ret;
}
tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
(ttm->page_flags & TTM_TT_FLAG_SWAPPED));
- move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) :
- (!mem_type_is_vram(old_mem_type) && !tt_has_data);
+ move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
+ (!mem_type_is_vram(old_mem_type) && !tt_has_data));
needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
(!ttm && ttm_bo->type == ttm_bo_type_device);
+ if (new_mem->mem_type == XE_PL_TT) {
+ ret = xe_tt_map_sg(ttm);
+ if (ret)
+ goto out;
+ }
+
if ((move_lacks_source && !needs_clear)) {
ttm_bo_move_null(ttm_bo, new_mem);
goto out;
@@ -686,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
new_mem->mem_type == XE_PL_SYSTEM) {
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP,
- true,
+ false,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
ret = timeout;
@@ -722,7 +769,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
xe_assert(xe, migrate);
trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
- xe_device_mem_access_get(xe);
+ if (xe_rpm_reclaim_safe(xe)) {
+ /*
+ * We might be called through swapout in the validation path of
+ * another TTM device, so acquire rpm here.
+ */
+ xe_pm_runtime_get(xe);
+ } else {
+ drm_WARN_ON(&xe->drm, handle_system_ccs);
+ xe_pm_runtime_get_noresume(xe);
+ }
if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
/*
@@ -730,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
* / resume, some of the pinned memory is required for the
* device to resume / use the GPU to move other evicted memory
* (user memory) around. This likely could be optimized a bit
- * futher where we find the minimum set of pinned memory
+ * further where we find the minimum set of pinned memory
* required for resume but for simplity doing a memcpy for all
* pinned memory.
*/
@@ -746,7 +802,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
ret = -EINVAL;
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
goto out;
}
@@ -757,14 +813,22 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
}
}
} else {
- if (move_lacks_source)
- fence = xe_migrate_clear(migrate, bo, new_mem);
+ if (move_lacks_source) {
+ u32 flags = 0;
+
+ if (mem_type_is_vram(new_mem->mem_type))
+ flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
+ else if (handle_system_ccs)
+ flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
+
+ fence = xe_migrate_clear(migrate, bo, new_mem, flags);
+ }
else
fence = xe_migrate_copy(migrate, bo, bo, old_mem,
new_mem, handle_system_ccs);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
goto out;
}
if (!move_lacks_source) {
@@ -789,19 +853,29 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
dma_fence_put(fence);
}
- xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
out:
- return ret;
+ if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
+ ttm_bo->ttm) {
+ long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+ DMA_RESV_USAGE_KERNEL,
+ false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ ret = timeout;
+
+ xe_tt_unmap_sg(ttm_bo->ttm);
+ }
+ return ret;
}
/**
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
* @bo: The buffer object to move.
*
- * On successful completion, the object memory will be moved to sytem memory.
- * This function blocks until the object has been fully moved.
+ * On successful completion, the object memory will be moved to system memory.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -819,6 +893,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
};
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_resource *new_mem;
int ret;
@@ -831,8 +906,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
- if (WARN_ON(!xe_bo_is_vram(bo)))
- return -EINVAL;
+ if (!xe_bo_is_vram(bo))
+ return 0;
ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
if (ret)
@@ -846,7 +921,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
}
}
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -858,9 +933,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (ret)
goto err_res_free;
- dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
- false, MAX_SCHEDULE_TIMEOUT);
-
return 0;
err_res_free:
@@ -873,7 +945,6 @@ err_res_free:
* @bo: The buffer object to move.
*
* On successful completion, the object memory will be moved back to VRAM.
- * This function blocks until the object has been fully moved.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -884,8 +955,10 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = false,
};
struct ttm_resource *new_mem;
+ struct ttm_place *place = &bo->placements[0];
int ret;
xe_bo_assert_held(bo);
@@ -896,14 +969,20 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
- if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+ if (WARN_ON(xe_bo_is_vram(bo)))
+ return -EINVAL;
+
+ if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
return -EINVAL;
+ if (!mem_type_is_vram(place->mem_type))
+ return 0;
+
ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
if (ret)
return ret;
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -915,9 +994,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
goto err_res_free;
- dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
- false, MAX_SCHEDULE_TIMEOUT);
-
return 0;
err_res_free:
@@ -1034,7 +1110,81 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
-struct ttm_device_funcs xe_ttm_funcs = {
+static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
+{
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+ if (ttm_bo->ttm) {
+ struct ttm_placement place = {};
+ int ret = ttm_bo_validate(ttm_bo, &place, ctx);
+
+ drm_WARN_ON(&xe->drm, ret);
+ }
+}
+
+static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .gfp_retry_mayfail = false,
+ };
+
+ if (ttm_bo->ttm) {
+ struct xe_ttm_tt *xe_tt =
+ container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_ttm_bo_purge(ttm_bo, &ctx);
+ }
+}
+
+static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
+ unsigned long offset, void *buf, int len,
+ int write)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct iosys_map vmap;
+ struct xe_res_cursor cursor;
+ struct xe_mem_region *vram;
+ int bytes_left = len;
+
+ xe_bo_assert_held(bo);
+ xe_device_assert_mem_access(xe);
+
+ if (!mem_type_is_vram(ttm_bo->resource->mem_type))
+ return -EIO;
+
+ /* FIXME: Use GPU for non-visible VRAM */
+ if (!xe_ttm_resource_visible(ttm_bo->resource))
+ return -EIO;
+
+ vram = res_to_mem_region(ttm_bo->resource);
+ xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
+ bo->size - (offset & PAGE_MASK), &cursor);
+
+ do {
+ unsigned long page_offset = (offset & ~PAGE_MASK);
+ int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
+
+ iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
+ cursor.start);
+ if (write)
+ xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
+ else
+ xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
+
+ buf += byte_count;
+ offset += byte_count;
+ bytes_left -= byte_count;
+ if (bytes_left)
+ xe_res_next(&cursor, PAGE_SIZE);
+ } while (bytes_left);
+
+ return len;
+}
+
+const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
@@ -1043,15 +1193,19 @@ struct ttm_device_funcs xe_ttm_funcs = {
.move = xe_bo_move,
.io_mem_reserve = xe_ttm_io_mem_reserve,
.io_mem_pfn = xe_ttm_io_mem_pfn,
+ .access_memory = xe_ttm_access_memory,
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+ .swap_notify = xe_ttm_bo_swap_notify,
};
static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
{
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_tile *tile;
+ u8 id;
if (bo->ttm.base.import_attach)
drm_prime_gem_destroy(&bo->ttm.base, NULL);
@@ -1059,8 +1213,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
- if (bo->ggtt_node.size)
- xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+ for_each_tile(tile, xe, id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
+ xe_ggtt_remove_bo(tile->mem.ggtt, bo);
#ifdef CONFIG_PROC_FS
if (bo->client)
@@ -1070,6 +1225,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
if (bo->vm && xe_bo_is_user(bo))
xe_vm_put(bo->vm);
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (!list_empty(&bo->vram_userfault_link))
+ list_del(&bo->vram_userfault_link);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
kfree(bo);
}
@@ -1106,48 +1266,89 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
}
}
-static bool should_migrate_to_system(struct xe_bo *bo)
-{
- struct xe_device *xe = xe_bo_device(bo);
-
- return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
-}
-
static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct drm_device *ddev = tbo->base.dev;
+ struct xe_device *xe = to_xe_device(ddev);
+ struct xe_bo *bo = ttm_to_xe_bo(tbo);
+ bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
vm_fault_t ret;
- int idx, r = 0;
+ int idx;
+
+ if (needs_rpm)
+ xe_pm_runtime_get(xe);
ret = ttm_bo_vm_reserve(tbo, vmf);
if (ret)
- return ret;
+ goto out;
if (drm_dev_enter(ddev, &idx)) {
- struct xe_bo *bo = ttm_to_xe_bo(tbo);
-
trace_xe_bo_cpu_fault(bo);
- if (should_migrate_to_system(bo)) {
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
- }
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}
+
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- return ret;
+ goto out;
+ /*
+ * ttm_bo_vm_reserve() already has dma_resv_lock.
+ */
+ if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (list_empty(&bo->vram_userfault_link))
+ list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
dma_resv_unlock(tbo->base.resv);
+out:
+ if (needs_rpm)
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = xe_bo_device(bo);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = ttm_bo_vm_access(vma, addr, buf, len, write);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+/**
+ * xe_bo_read() - Read from an xe_bo
+ * @bo: The buffer object to read from.
+ * @offset: The byte offset to start reading from.
+ * @dst: Location to store the read.
+ * @size: Size in bytes for the read.
+ *
+ * Read @size bytes from the @bo, starting from @offset, storing into @dst.
+ *
+ * Return: Zero on success, or negative error.
+ */
+int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
+{
+ int ret;
+
+ ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
+ if (ret >= 0 && ret != size)
+ ret = -EIO;
+ else if (ret == size)
+ ret = 0;
+
return ret;
}
@@ -1155,7 +1356,7 @@ static const struct vm_operations_struct xe_gem_vm_ops = {
.fault = xe_gem_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
- .access = ttm_bo_vm_access
+ .access = xe_bo_vm_access,
};
static const struct drm_gem_object_funcs xe_gem_object_funcs = {
@@ -1169,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = {
/**
* xe_bo_alloc - Allocate storage for a struct xe_bo
*
- * This funcition is intended to allocate storage to be used for input
+ * This function is intended to allocate storage to be used for input
* to __xe_bo_create_locked(), in the case a pointer to the bo to be
* created is needed before the call to __xe_bo_create_locked().
* If __xe_bo_create_locked ends up never to be called, then the
@@ -1209,6 +1410,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement *placement;
uint32_t alignment;
@@ -1223,18 +1425,24 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return ERR_PTR(-EINVAL);
}
- if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
- !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
- xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
- aligned_size = ALIGN(size, SZ_64K);
- if (type != ttm_bo_type_device)
- size = ALIGN(size, SZ_64K);
- flags |= XE_BO_INTERNAL_64K;
- alignment = SZ_64K >> PAGE_SHIFT;
+ /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ return ERR_PTR(-EINVAL);
+
+ if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
+ !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
+ ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
+ (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
+ size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
+ aligned_size = ALIGN(size, align);
+ if (type != ttm_bo_type_device)
+ size = ALIGN(size, align);
+ flags |= XE_BO_FLAG_INTERNAL_64K;
+ alignment = align >> PAGE_SHIFT;
} else {
aligned_size = ALIGN(size, SZ_4K);
- flags &= ~XE_BO_INTERNAL_64K;
+ flags &= ~XE_BO_FLAG_INTERNAL_64K;
alignment = SZ_4K >> PAGE_SHIFT;
}
@@ -1253,23 +1461,21 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
bo->flags = flags;
bo->cpu_caching = cpu_caching;
bo->ttm.base.funcs = &xe_gem_object_funcs;
- bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
- bo->props.preferred_gt = XE_BO_PROPS_INVALID;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
INIT_LIST_HEAD(&bo->pinned_link);
#ifdef CONFIG_PROC_FS
INIT_LIST_HEAD(&bo->client_link);
#endif
+ INIT_LIST_HEAD(&bo->vram_userfault_link);
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
if (resv) {
- ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
+ ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
ctx.resv = resv;
}
- if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+ if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
if (WARN_ON(err)) {
xe_ttm_bo_destroy(&bo->ttm);
@@ -1279,7 +1485,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
/* Defer populating type_sg bos */
placement = (type == ttm_bo_type_sg ||
- bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+ bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
&bo->placement;
err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
placement, alignment,
@@ -1334,21 +1540,21 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
{
struct ttm_place *place = bo->placements;
- if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+ if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
return -EINVAL;
place->flags = TTM_PL_FLAG_CONTIGUOUS;
place->fpfn = start >> PAGE_SHIFT;
place->lpfn = end >> PAGE_SHIFT;
- switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
- case XE_BO_CREATE_VRAM0_BIT:
+ switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
+ case XE_BO_FLAG_VRAM0:
place->mem_type = XE_PL_VRAM0;
break;
- case XE_BO_CREATE_VRAM1_BIT:
+ case XE_BO_FLAG_VRAM1:
place->mem_type = XE_PL_VRAM1;
break;
- case XE_BO_CREATE_STOLEN_BIT:
+ case XE_BO_FLAG_STOLEN:
place->mem_type = XE_PL_STOLEN;
break;
@@ -1360,8 +1566,6 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
bo->placement = (struct ttm_placement) {
.num_placement = 1,
.placement = place,
- .num_busy_placement = 1,
- .busy_placement = place,
};
return 0;
@@ -1371,7 +1575,8 @@ static struct xe_bo *
__xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+ u16 cpu_caching, enum ttm_bo_type type, u32 flags,
+ u64 alignment)
{
struct xe_bo *bo = NULL;
int err;
@@ -1384,7 +1589,7 @@ __xe_bo_create_locked(struct xe_device *xe,
if (IS_ERR(bo))
return bo;
- flags |= XE_BO_FIXED_PLACEMENT_BIT;
+ flags |= XE_BO_FLAG_FIXED_PLACEMENT;
err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
if (err) {
xe_bo_free(bo);
@@ -1394,12 +1599,14 @@ __xe_bo_create_locked(struct xe_device *xe,
bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
vm && !xe_vm_in_fault_mode(vm) &&
- flags & XE_BO_CREATE_USER_BIT ?
+ flags & XE_BO_FLAG_USER ?
&vm->lru_bulk_move : NULL, size,
cpu_caching, type, flags);
if (IS_ERR(bo))
return bo;
+ bo->min_align = alignment;
+
/*
* Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
* to ensure the shared resv doesn't disappear under the bo, the bo
@@ -1411,20 +1618,30 @@ __xe_bo_create_locked(struct xe_device *xe,
xe_vm_get(vm);
bo->vm = vm;
- if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
- if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
- tile = xe_device_get_root_tile(xe);
+ if (bo->flags & XE_BO_FLAG_GGTT) {
+ struct xe_tile *t;
+ u8 id;
- xe_assert(xe, tile);
+ if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
+ if (!tile && flags & XE_BO_FLAG_STOLEN)
+ tile = xe_device_get_root_tile(xe);
- if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
- err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
- start + bo->size, U64_MAX);
- } else {
- err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+ xe_assert(xe, tile);
+ }
+
+ for_each_tile(t, xe, id) {
+ if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
+ continue;
+
+ if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
+ err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
+ start + bo->size, U64_MAX);
+ } else {
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ }
+ if (err)
+ goto err_unlock_put_bo;
}
- if (err)
- goto err_unlock_put_bo;
}
return bo;
@@ -1440,27 +1657,28 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags, u64 alignment)
{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags, alignment);
}
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags)
{
- return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
+ flags, 0);
}
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
u16 cpu_caching,
- enum ttm_bo_type type,
u32 flags)
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
- cpu_caching, type,
- flags | XE_BO_CREATE_USER_BIT);
+ cpu_caching, ttm_bo_type_device,
+ flags | XE_BO_FLAG_USER, 0);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -1484,17 +1702,29 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
size_t size, u64 offset,
enum ttm_bo_type type, u32 flags)
{
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
+ type, flags, 0);
+}
+
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment)
+{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
u64 end = offset == ~0ull ? offset : start + size;
- if (flags & XE_BO_CREATE_STOLEN_BIT &&
+ if (flags & XE_BO_FLAG_STOLEN &&
xe_ttm_stolen_cpu_access_needs_ggtt(xe))
- flags |= XE_BO_CREATE_GGTT_BIT;
+ flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_NEEDS_CPU_ACCESS);
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
+ alignment);
if (IS_ERR(bo))
return bo;
@@ -1540,7 +1770,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
return bo;
}
-static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
+static void __xe_bo_unpin_map_no_vm(void *arg)
{
xe_bo_unpin_map_no_vm(arg);
}
@@ -1555,7 +1785,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
if (IS_ERR(bo))
return bo;
- ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
+ ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
if (ret)
return ERR_PTR(ret);
@@ -1575,6 +1805,40 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til
return bo;
}
+/**
+ * xe_managed_bo_reinit_in_vram
+ * @xe: xe device
+ * @tile: Tile where the new buffer will be created
+ * @src: Managed buffer object allocated in system memory
+ *
+ * Replace a managed src buffer object allocated in system memory with a new
+ * one allocated in vram, copying the data between them.
+ * Buffer object in VRAM is not going to have the same GGTT address, the caller
+ * is responsible for making sure that any old references to it are updated.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
+{
+ struct xe_bo *bo;
+ u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
+
+ dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
+
+ xe_assert(xe, IS_DGFX(xe));
+ xe_assert(xe, !(*src)->vmap.is_iomem);
+
+ bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
+ (*src)->size, dst_flags);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
+ *src = bo;
+
+ return 0;
+}
+
/*
* XXX: This is in the VM bind data path, likely should calculate this once and
* store, with a recalculation if the BO is moved.
@@ -1633,6 +1897,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
int xe_bo_pin(struct xe_bo *bo)
{
+ struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -1640,8 +1905,8 @@ int xe_bo_pin(struct xe_bo *bo)
xe_assert(xe, !xe_bo_is_user(bo));
/* Pinned object must be in GGTT or have pinned flag */
- xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT));
+ xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_GGTT));
/*
* No reason we can't support pinning imported dma-bufs we just don't
@@ -1662,22 +1927,22 @@ int xe_bo_pin(struct xe_bo *bo)
* during suspend / resume (force restore to same physical address).
*/
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_INTERNAL_TEST)) {
- struct ttm_place *place = &(bo->placements[0]);
-
+ bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
if (mem_type_is_vram(place->mem_type)) {
xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
-
- spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
- spin_unlock(&xe->pinned.lock);
}
}
+ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
+ spin_lock(&xe->pinned.lock);
+ list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ spin_unlock(&xe->pinned.lock);
+ }
+
ttm_bo_pin(&bo->ttm);
/*
@@ -1707,11 +1972,10 @@ void xe_bo_unpin_external(struct xe_bo *bo)
xe_assert(xe, xe_bo_is_pinned(bo));
xe_assert(xe, xe_bo_is_user(bo));
- if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
- spin_lock(&xe->pinned.lock);
+ spin_lock(&xe->pinned.lock);
+ if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
list_del_init(&bo->pinned_link);
- spin_unlock(&xe->pinned.lock);
- }
+ spin_unlock(&xe->pinned.lock);
ttm_bo_unpin(&bo->ttm);
@@ -1724,24 +1988,18 @@ void xe_bo_unpin_external(struct xe_bo *bo)
void xe_bo_unpin(struct xe_bo *bo)
{
+ struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
xe_assert(xe, !bo->ttm.base.import_attach);
xe_assert(xe, xe_bo_is_pinned(bo));
- if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_INTERNAL_TEST)) {
- struct ttm_place *place = &(bo->placements[0]);
-
- if (mem_type_is_vram(place->mem_type)) {
- xe_assert(xe, !list_empty(&bo->pinned_link));
-
- spin_lock(&xe->pinned.lock);
- list_del_init(&bo->pinned_link);
- spin_unlock(&xe->pinned.lock);
- }
+ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
+ spin_lock(&xe->pinned.lock);
+ xe_assert(xe, !list_empty(&bo->pinned_link));
+ list_del_init(&bo->pinned_link);
+ spin_unlock(&xe->pinned.lock);
}
-
ttm_bo_unpin(&bo->ttm);
}
@@ -1766,6 +2024,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
if (vm) {
@@ -1776,6 +2035,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
ctx.resv = xe_vm_resv(vm);
}
+ trace_xe_bo_validate(bo);
return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
}
@@ -1827,13 +2087,15 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
int xe_bo_vmap(struct xe_bo *bo)
{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
void *virtual;
bool is_iomem;
int ret;
xe_bo_assert_held(bo);
- if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
+ !force_contiguous(bo->flags)))
return -EINVAL;
if (!iosys_map_is_null(&bo->vmap))
@@ -1915,29 +2177,36 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
bo_flags = 0;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
- bo_flags |= XE_BO_DEFER_BACKING;
+ bo_flags |= XE_BO_FLAG_DEFER_BACKING;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
- bo_flags |= XE_BO_SCANOUT_BIT;
+ bo_flags |= XE_BO_FLAG_SCANOUT;
- bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+ bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
+
+ /* CCS formats need physical placement at a 64K alignment in VRAM. */
+ if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
+ (bo_flags & XE_BO_FLAG_SCANOUT) &&
+ !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
+ IS_ALIGNED(args->size, SZ_64K))
+ bo_flags |= XE_BO_FLAG_NEEDS_64K;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
- if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+ if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
return -EINVAL;
- bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+ bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
}
if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+ if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+ if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
return -EINVAL;
@@ -1951,7 +2220,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
}
bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
- ttm_bo_type_device, bo_flags);
+ bo_flags);
if (vm)
xe_vm_unlock(vm);
@@ -2102,6 +2371,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
struct ttm_place requested;
@@ -2119,9 +2389,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
xe_place_from_ttm_type(mem_type, &requested);
placement.num_placement = 1;
- placement.num_busy_placement = 1;
placement.placement = &requested;
- placement.busy_placement = &requested;
/*
* Stolen needs to be handled like below VRAM handling if we ever need
@@ -2144,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
* @force_alloc: Set force_alloc in ttm_operation_ctx
*
* On successful completion, the object memory will be moved to evict
- * placement. Ths function blocks until the object has been fully moved.
+ * placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/
@@ -2154,6 +2422,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
.interruptible = false,
.no_wait_gpu = false,
.force_alloc = force_alloc,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
int ret;
@@ -2180,6 +2449,9 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
+ if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
+ return false;
+
if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
return false;
@@ -2188,7 +2460,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
* can't be used since there's no CCS storage associated with
* non-VRAM addresses.
*/
- if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+ if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
return false;
return true;
@@ -2228,6 +2500,24 @@ void xe_bo_put_commit(struct llist_head *deferred)
drm_gem_object_free(&bo->ttm.base.refcount);
}
+void xe_bo_put(struct xe_bo *bo)
+{
+ struct xe_tile *tile;
+ u8 id;
+
+ might_sleep();
+ if (bo) {
+#ifdef CONFIG_PROC_FS
+ if (bo->client)
+ might_lock(&bo->client->bos_lock);
+#endif
+ for_each_tile(tile, xe_bo_device(bo), id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
+ might_lock(&bo->ggtt_node[id]->ggtt->lock);
+ drm_gem_object_put(&bo->ttm.base);
+ }
+}
+
/**
* xe_bo_dumb_create - Create a dumb bo as backing for a fb
* @file_priv: ...
@@ -2256,10 +2546,9 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bo = xe_bo_create_user(xe, NULL, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device,
- XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
- XE_BO_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -2271,6 +2560,16 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
return err;
}
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ struct ttm_device *bdev = tbo->bdev;
+
+ drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
+
+ list_del_init(&bo->vram_userfault_link);
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_bo.c"
#endif