summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c881
1 files changed, 599 insertions, 282 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5ef7f7bdc15..2b931e855abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -38,7 +38,6 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
-#include <linux/swiotlb.h>
#include <linux/dma-buf.h>
#include <linux/sizes.h>
#include <linux/module.h>
@@ -50,7 +49,6 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/amdgpu_drm.h>
-#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
@@ -63,9 +61,9 @@
#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
-#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
+#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm,
@@ -104,23 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
/* Object isn't an AMDGPU object so ignore */
if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
- placement->busy_placement = &placements;
placement->num_placement = 1;
- placement->num_busy_placement = 1;
return;
}
abo = ttm_to_amdgpu_bo(bo);
if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
@@ -128,17 +122,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+ case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -150,8 +146,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
AMDGPU_GEM_DOMAIN_CPU);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
- abo->placement.busy_placement = &abo->placements[1];
- abo->placement.num_busy_placement = 1;
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
} else {
/* Move to GTT memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
@@ -184,16 +179,15 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
- unsigned window, struct amdgpu_ring *ring,
+ unsigned int window, struct amdgpu_ring *ring,
bool tmz, uint64_t *size, uint64_t *addr)
{
struct amdgpu_device *adev = ring->adev;
- unsigned offset, num_pages, num_dw, num_bytes;
+ unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr;
struct amdgpu_job *job;
void *cpu_addr;
uint64_t flags;
- unsigned int i;
int r;
BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
@@ -229,10 +223,11 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
if (r)
return r;
@@ -242,7 +237,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -259,16 +254,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
} else {
- dma_addr_t dma_address;
-
- dma_address = mm_cur->start;
- dma_address += adev->vm_manager.vram_base_offset;
+ u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset;
- for (i = 0; i < num_pages; ++i) {
- amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address,
- flags, cpu_addr);
- dma_address += PAGE_SIZE;
- }
+ amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
dma_fence_put(amdgpu_job_submit(job));
@@ -290,20 +278,24 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
* move and different for a BO to BO copy.
*
*/
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f)
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+ struct amdgpu_bo *abo_src, *abo_dst;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -312,7 +304,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
- uint64_t from, to, cur_size;
+ uint64_t from, to, cur_size, tiling_flags;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
/* Never copy more than 256MiB at once to avoid a timeout */
@@ -329,8 +322,31 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ abo_src = ttm_to_amdgpu_bo(src->bo);
+ abo_dst = ttm_to_amdgpu_bo(dst->bo);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+ copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+ if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (dst->mem->mem_type == TTM_PL_VRAM)) {
+ copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+ amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+ max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+ num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+ data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
+ copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+ AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
+ }
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
@@ -342,9 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *f = fence;
return r;
}
@@ -384,10 +398,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
@@ -408,40 +424,56 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -466,23 +498,21 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
- /* Can't move a pinned BO */
abo = ttm_to_amdgpu_bo(bo);
- if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
- return -EINVAL;
-
adev = amdgpu_ttm_adev(bo->bdev);
if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
bo->ttm == NULL)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
(new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if ((old_mem->mem_type == TTM_PL_TT ||
old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
@@ -492,20 +522,26 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+ old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
- new_mem->mem_type == AMDGPU_PL_OA) {
+ new_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (bo->type == ttm_bo_type_device &&
@@ -517,27 +553,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
}
- if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
- hop->fpfn = 0;
- hop->lpfn = 0;
- hop->mem_type = TTM_PL_TT;
- hop->flags = TTM_PL_FLAG_TEMPORARY;
- return -EMULTIHOP;
- }
+ if (adev->mman.buffer_funcs_enabled &&
+ ((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
- } else {
+ else
r = -ENODEV;
- }
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -547,10 +584,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
-out:
- /* update statistics */
+ /* update statistics after the move */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
- amdgpu_bo_move_notify(bo, evict, new_mem);
return 0;
}
@@ -563,7 +600,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
@@ -574,9 +610,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
@@ -586,6 +619,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += adev->gmc.aper_base;
mem->bus.is_iomem = true;
break;
+ case AMDGPU_PL_DOORBELL:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->doorbell.base;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
default:
return -EINVAL;
}
@@ -600,6 +645,12 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
&cursor);
+
+ if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
+ return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
+
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
@@ -635,6 +686,7 @@ struct amdgpu_ttm_tt {
struct task_struct *usertask;
uint32_t userflags;
bool bound;
+ int32_t pool_id;
};
#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
@@ -645,10 +697,11 @@ struct amdgpu_ttm_tt {
* memory and start HMM tracking CPU page table update
*
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
- * once afterwards to stop HMM tracking
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
*/
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
- struct hmm_range **range)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct amdgpu_hmm_range *range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
@@ -658,9 +711,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
bool readonly;
int r = 0;
- /* Make sure get_user_pages_done() can cleanup gracefully */
- *range = NULL;
-
mm = bo->notifier.mm;
if (unlikely(!mm)) {
DRM_DEBUG_DRIVER("BO is not registered?\n");
@@ -684,7 +734,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
- readonly, NULL, pages, range);
+ readonly, NULL, range);
out_unlock:
mmap_read_unlock(mm);
if (r)
@@ -695,38 +745,6 @@ out_unlock:
return r;
}
-/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
- */
-void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
- if (gtt && gtt->userptr && range)
- amdgpu_hmm_range_get_pages_done(range);
-}
-
-/*
- * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
- * Check if the pages backing this ttm range have been invalidated
- *
- * Returns: true if pages are still valid
- */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
-
- if (!gtt || !gtt->userptr || !range)
- return false;
-
- DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
- gtt->userptr, ttm->num_pages);
-
- WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
-
- return !amdgpu_hmm_range_get_pages_done(range);
-}
#endif
/*
@@ -736,12 +754,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = pages ? pages[i] : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
}
/*
@@ -769,7 +787,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
/* Map SG to device */
r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
if (r)
- goto release_sg;
+ goto release_sg_table;
/* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
@@ -777,6 +795,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
return 0;
+release_sg_table:
+ sg_free_table(ttm->sg);
release_sg:
kfree(ttm->sg);
ttm->sg = NULL;
@@ -804,6 +824,43 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
sg_free_table(ttm->sg);
}
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+ struct ttm_tt *ttm, uint64_t flags)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint64_t total_pages = ttm->num_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ int i;
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
+
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ /* MQD page: use default flags */
+ amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ 1, &gtt->ttm.dma_address[page_idx], flags);
+ /*
+ * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+ * the second page of the BO onward.
+ */
+ amdgpu_gart_bind(adev,
+ gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+ pages_per_xcc - 1,
+ &gtt->ttm.dma_address[page_idx + 1],
+ ctrl_flags);
+ }
+}
+
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
@@ -816,25 +873,12 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
- uint64_t page_idx = 1;
-
- amdgpu_gart_bind(adev, gtt->offset, page_idx,
- gtt->ttm.dma_address, flags);
-
- /* The memory type of the first page defaults to UC. Now
- * modify the memory type to NC from the second page of
- * the BO onward.
- */
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
-
- amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
- ttm->num_pages - page_idx,
- &(gtt->ttm.dma_address[page_idx]), flags);
+ amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
} else {
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
}
+ gtt->bound = true;
}
/*
@@ -861,7 +905,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
if (gtt->userptr) {
r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
if (r) {
- DRM_ERROR("failed to pin userptr\n");
+ dev_err(adev->dev, "failed to pin userptr\n");
return r;
}
} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
@@ -926,16 +970,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
addr = amdgpu_gmc_agp_addr(bo);
- if (addr != AMDGPU_BO_INVALID_OFFSET) {
- bo->resource->start = addr >> PAGE_SHIFT;
+ if (addr != AMDGPU_BO_INVALID_OFFSET)
return 0;
- }
/* allocate GART space */
placement.num_placement = 1;
placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.mem_type = TTM_PL_TT;
@@ -991,7 +1031,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
- } else if (ttm->sg && gtt->gobj->import_attach) {
+ } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
struct dma_buf_attachment *attach;
attach = gtt->gobj->import_attach;
@@ -1033,15 +1073,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_ttm_tt *gtt;
enum ttm_caching caching;
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
- if (gtt == NULL) {
+ if (!gtt)
return NULL;
- }
+
gtt->gobj = &bo->base;
+ if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+ gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+ else
+ gtt->pool_id = abo->xcp_id;
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined;
@@ -1068,6 +1113,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct ttm_pool *pool;
pgoff_t i;
int ret;
@@ -1082,7 +1128,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return 0;
- ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+ ret = ttm_pool_alloc(pool, ttm, ctx);
if (ret)
return ret;
@@ -1103,6 +1153,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
{
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
+ struct ttm_pool *pool;
pgoff_t i;
amdgpu_ttm_backend_unbind(bdev, ttm);
@@ -1121,7 +1172,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
ttm->pages[i]->mapping = NULL;
adev = amdgpu_ttm_adev(bdev);
- return ttm_pool_free(&adev->mman.bdev.pool, ttm);
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+
+ return ttm_pool_free(pool, ttm);
}
/**
@@ -1267,10 +1324,12 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
flags |= AMDGPU_PTE_VALID;
if (mem && (mem->mem_type == TTM_PL_TT ||
- mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ mem->mem_type == AMDGPU_PL_DOORBELL ||
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
- if (ttm->caching == ttm_cached)
+ if (ttm && ttm->caching == ttm_cached)
flags |= AMDGPU_PTE_SNOOPED;
}
@@ -1336,7 +1395,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
*/
dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP, f) {
- if (amdkfd_fence_check_mm(f, current->mm))
+ if (amdkfd_fence_check_mm(f, current->mm) &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
return false;
}
@@ -1418,13 +1478,15 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
- &job);
+ &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
if (r)
goto out;
+ mutex_lock(&adev->mman.gtt_window_lock);
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
src_mm.start;
@@ -1433,12 +1495,13 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
swap(src_addr, dst_addr);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- PAGE_SIZE, false);
+ PAGE_SIZE, 0);
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
r = -ETIMEDOUT;
@@ -1627,14 +1690,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
{
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx));
ctx->c2p_train_data_offset =
- ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
@@ -1652,11 +1716,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
- int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;
- if (!amdgpu_sriov_vf(adev)) {
+ if (adev->bios && !amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev))
mem_train_support = true;
else
@@ -1670,41 +1735,135 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
- adev->mman.discovery_tmr_size =
- amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->mman.discovery_tmr_size)
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+ if (!adev->bios &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
+ reserve_size = max(reserve_size, (uint32_t)280 << 20);
+ else if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
- amdgpu_ttm_training_data_block_init(adev);
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
&ctx->c2p_bo,
NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
return ret;
}
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- ret = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
- adev->mman.discovery_tmr_size,
- &adev->mman.discovery_memory,
- NULL);
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+ &adev->mman.fw_reserved_memory, NULL);
if (ret) {
- DRM_ERROR("alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
return ret;
}
return 0;
}
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+ return 0;
+
+ adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+ sizeof(*adev->mman.ttm_pools),
+ GFP_KERNEL);
+ if (!adev->mman.ttm_pools)
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+ ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
+ }
+ return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+ return;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+ ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+ kfree(adev->mman.ttm_pools);
+ adev->mman.ttm_pools = NULL;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1721,24 +1880,37 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
+ dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
- adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ (adev->need_swiotlb ?
+ TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+ (dma_addressing_limited(adev->dev) ?
+ TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (r) {
- DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ dev_err(adev->dev,
+ "failed initializing buffer object driver(%d).\n", r);
return r;
}
- adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
+ r = amdgpu_ttm_pools_init(adev);
if (r) {
- DRM_ERROR("Failed initializing VRAM heap.\n");
+ dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
return r;
}
+ adev->mman.initialized = true;
+
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
+ }
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_buffer_funcs_status(adev, false);
@@ -1748,6 +1920,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
+ else if (adev->gmc.is_app_apu)
+ DRM_DEBUG_DRIVER(
+ "No need to ioremap when real vram size is 0\n");
else
#endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
@@ -1759,24 +1934,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*place on the VRAM, so reserve it early.
*/
r = amdgpu_ttm_fw_reserve_vram_init(adev);
- if (r) {
+ if (r)
return r;
- }
/*
- *The reserved vram for driver must be pinned to the specified
- *place on the VRAM, so reserve it early.
+ * The reserved VRAM for the driver must be pinned to a specific
+ * location in VRAM, so reserve it early.
*/
r = amdgpu_ttm_drv_reserve_vram_init(adev);
if (r)
return r;
/*
- * only NAVI10 and onwards ASIC support for IP discovery.
- * If IP discovery enabled, a block of memory should be
- * reserved for IP discovey.
+ * only NAVI10 and later ASICs support IP discovery.
+ * If IP discovery is enabled, a block of memory should be
+ * reserved for it.
*/
- if (adev->mman.discovery_bin) {
+ if (adev->discovery.reserve_tmr) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
@@ -1785,84 +1959,120 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
- * and driver. */
- r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- &adev->mman.stolen_vga_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
- adev->mman.stolen_extended_size,
- &adev->mman.stolen_extended_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
- adev->mman.stolen_reserved_size,
- &adev->mman.stolen_reserved_memory,
- NULL);
- if (r)
- return r;
+ * and driver.
+ */
+ if (!adev->gmc.is_app_apu) {
+ r = amdgpu_bo_create_kernel_at(adev, 0,
+ adev->mman.stolen_vga_size,
+ &adev->mman.stolen_vga_memory,
+ NULL);
+ if (r)
+ return r;
- DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
- (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
-
- /* Compute GTT size, either based on 1/2 the size of RAM size
- * or whatever the user passed on module init */
- if (amdgpu_gtt_size == -1) {
- struct sysinfo si;
-
- si_meminfo(&si);
- /* Certain GL unit tests for large textures can cause problems
- * with the OOM killer since there is no way to link this memory
- * to a process. This was originally mitigated (but not necessarily
- * eliminated) by limiting the GTT size. The problem is this limit
- * is often too low for many modern games so just make the limit 1/2
- * of system memory which aligns with TTM. The OOM accounting needs
- * to be addressed, but we shouldn't prevent common 3D applications
- * from being usable just to potentially mitigate that corner case.
- */
- gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- (u64)si.totalram * si.mem_unit / 2);
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+ adev->mman.stolen_extended_size,
+ &adev->mman.stolen_extended_memory,
+ NULL);
+
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->mman.stolen_reserved_offset,
+ adev->mman.stolen_reserved_size,
+ &adev->mman.stolen_reserved_memory,
+ NULL);
+ if (r)
+ return r;
} else {
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+ DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+ }
+
+ dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
+ (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
+
+ /* Compute GTT size, either based on TTM limit
+ * or whatever the user passed on module init.
+ */
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ if (amdgpu_gtt_size != -1) {
+ uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ drm_warn(&adev->ddev,
+ "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+ if (gtt_size != configured_size)
+ drm_warn(&adev->ddev,
+ "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+ configured_size, gtt_size);
+
+ gtt_size = configured_size;
}
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
if (r) {
- DRM_ERROR("Failed initializing GTT heap.\n");
+ dev_err(adev->dev, "Failed initializing GTT heap.\n");
+ return r;
+ }
+ dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
+ (unsigned int)(gtt_size / (1024 * 1024)));
+
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.real_vram_size < gtt_size)
+ adev->apu_prefer_gtt = true;
+ }
+
+ /* Initialize doorbell pool on PCI BAR */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing doorbell heap.\n");
+ return r;
+ }
+
+ /* Create a boorbell page for kernel usages */
+ r = amdgpu_doorbell_create_kernel_doorbells(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
+ return r;
+ }
+
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
return r;
}
- DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
- (unsigned)(gtt_size / (1024 * 1024)));
+
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
if (r) {
- DRM_ERROR("Failed initializing PREEMPT heap.\n");
+ dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
return r;
}
/* Initialize various on-chip memory pools */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
+ dev_err(adev->dev, "Failed initializing GDS heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
+ dev_err(adev->dev, "Failed initializing gws heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
+ dev_err(adev->dev, "Failed initializing oa heap.\n");
return r;
}
-
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL,
@@ -1878,20 +2088,30 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
int idx;
+
if (!adev->mman.initialized)
return;
+ amdgpu_ttm_pools_fini(adev);
+
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
- if (adev->mman.stolen_reserved_size)
- amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
- NULL, NULL);
+ if (!adev->gmc.is_app_apu) {
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
+ NULL);
+ if (adev->mman.stolen_reserved_size)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+ NULL, NULL);
+ }
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
@@ -1904,15 +2124,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
drm_dev_exit(idx);
}
- amdgpu_vram_mgr_fini(adev);
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
+ amdgpu_doorbell_fini(adev);
+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
- DRM_INFO("amdgpu: ttm finalized\n");
+ dev_info(adev->dev, "amdgpu: ttm finalized\n");
}
/**
@@ -1931,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
- adev->mman.buffer_funcs_enabled == enable)
+ adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return;
if (enable) {
@@ -1940,18 +2165,32 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.entity,
+ r = drm_sched_entity_init(&adev->mman.high_pr,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
- DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
- r);
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
return;
}
+
+ r = drm_sched_entity_init(&adev->mman.low_pr,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
} else {
- drm_sched_entity_destroy(&adev->mman.entity);
- dma_fence_put(man->move);
- man->move = NULL;
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.low_pr);
+ /* Drop all the old fences since re-creating the scheduler entities
+ * will allocate new contexts.
+ */
+ ttm_resource_manager_cleanup(man);
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -1961,6 +2200,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.visible_vram_size;
man->size = size;
adev->mman.buffer_funcs_enabled = enable;
+
+ return;
+
+error_free_entity:
+ drm_sched_entity_destroy(&adev->mman.high_pr);
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -1968,16 +2212,18 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
- struct amdgpu_job **job)
+ struct amdgpu_job **job,
+ bool delayed, u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED;
int r;
-
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+ &adev->mman.high_pr;
+ r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
- num_dw * 4, pool, job);
+ num_dw * 4, pool, job, k_job_id);
if (r)
return r;
@@ -1998,17 +2244,18 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
- unsigned num_loops, num_dw;
+ unsigned int num_loops, num_dw;
struct amdgpu_job *job;
uint32_t max_bytes;
- unsigned i;
+ unsigned int i;
int r;
if (!direct_submit && !ring->sched.ready) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -2016,7 +2263,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
- resv, vm_needs_flush, &job);
+ resv, vm_needs_flush, &job, false,
+ AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
@@ -2024,8 +2272,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -2044,7 +2291,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
error_free:
amdgpu_job_free(job);
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
return r;
}
@@ -2052,7 +2299,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush)
+ bool vm_needs_flush, bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2065,7 +2313,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
- &job);
+ &job, delayed, k_job_id);
if (r)
return r;
@@ -2085,10 +2333,78 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
return 0;
}
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r = 0;
+
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
+
+ if (!fence)
+ return -EINVAL;
+
+ *fence = dma_fence_get_stub();
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
+
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
+
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true,
+ AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **f)
+ struct dma_fence **f,
+ bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2097,7 +2413,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
int r;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to clear memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to clear memory with ring turned off.\n");
return -EINVAL;
}
@@ -2117,7 +2434,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true);
+ &next, true, delayed, k_job_id);
if (r)
goto error;
@@ -2157,7 +2474,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
man = ttm_manager_type(&adev->mman.bdev, mem_type);
break;
default:
- DRM_ERROR("Trying to evict invalid memory type\n");
+ dev_err(adev->dev, "Trying to evict invalid memory type\n");
return -EINVAL;
}
@@ -2168,7 +2485,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}
@@ -2282,7 +2599,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
/* Translate the bus address to a physical address. If
* the domain is NULL it means there is no IOMMU active
@@ -2337,7 +2654,7 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
struct page *p;
void *ptr;
- bytes = bytes < size ? bytes : size;
+ bytes = min(bytes, size);
addr = dom ? iommu_iova_to_phys(dom, addr) : addr;