summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c742
1 files changed, 484 insertions, 258 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d6daf8d2bfa..b1c24c8fa686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -44,6 +44,7 @@
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
/*
* Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
@@ -117,11 +118,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
return;
si_meminfo(&si);
- mem = si.freeram - si.freehigh;
+ mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+ if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+ kfd_mem_limit.max_system_mem_limit >>= 1;
+ else
+ kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -165,6 +171,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
uint64_t vram_size = 0;
@@ -189,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
return -EINVAL;
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
system_mem_needed = size;
ttm_mem_needed = size;
}
@@ -205,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+ if (!no_system_mem_limit) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
- if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
- kfd_mem_limit.max_ttm_mem_limit) ||
- (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
- vram_size - reserved_for_pt)) {
+ if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM;
goto release;
}
+ /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+ * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+ * VRAM check since ttm_mem_limit check already cover this allocation
+ */
+
+ if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+ uint64_t vram_available =
+ vram_size - reserved_for_pt - reserved_for_ras -
+ atomic64_read(&adev->vram_pin_size);
+ if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
+
/* Update memory accounting by decreasing available system
* memory, TTM memory and GPU memory as computed above
*/
@@ -225,7 +249,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
"adev reference can't be null when vram is used");
if (adev && xcp_id >= 0) {
adev->kfd.vram_used[xcp_id] += vram_needed;
- adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+ adev->kfd.vram_used_aligned[xcp_id] +=
+ adev->apu_prefer_gtt ?
vram_needed :
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
}
@@ -253,7 +278,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
if (adev) {
adev->kfd.vram_used[xcp_id] -= size;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
adev->kfd.vram_used_aligned[xcp_id] -= size;
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
@@ -361,40 +386,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
return 0;
}
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- struct amdgpu_bo *root = bo;
- struct amdgpu_vm_bo_base *vm_bo;
- struct amdgpu_vm *vm;
- struct amdkfd_process_info *info;
- struct amdgpu_amdkfd_fence *ef;
- int ret;
-
- /* we can always get vm_bo from root PD bo.*/
- while (root->parent)
- root = root->parent;
-
- vm_bo = root->vm_bo;
- if (!vm_bo)
- return 0;
-
- vm = vm_bo->vm;
- if (!vm)
- return 0;
-
- info = vm->process_info;
- if (!info || !info->eviction_fence)
- return 0;
+ struct dma_resv *resv = &bo->tbo.base._resv;
+ struct dma_fence *fence, *stub;
+ struct dma_resv_iter cursor;
- ef = container_of(dma_fence_get(&info->eviction_fence->base),
- struct amdgpu_amdkfd_fence, base);
+ dma_resv_assert_held(resv);
- BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
- dma_resv_unlock(bo->tbo.base.resv);
+ stub = dma_fence_get_stub();
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (!to_amdgpu_amdkfd_fence(fence))
+ continue;
- dma_fence_put(&ef->base);
- return ret;
+ dma_resv_replace_fences(resv, fence->context, stub,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ dma_fence_put(stub);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -407,6 +424,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
+ /* bo has been pinned, not need validate it */
+ if (bo->tbo.pin_count)
+ return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -419,6 +440,32 @@ validate_fail:
return ret;
}
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
+
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -431,13 +478,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
* again. Page directories are only updated after updating page
* tables.
*/
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
+ ret = amdgpu_vm_validate(adev, vm, ticket,
+ amdgpu_amdkfd_validate_vm_bo, NULL);
if (ret) {
pr_err("failed to validate PT BOs\n");
return ret;
@@ -458,10 +507,11 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
-static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct kgd_mem *mem)
{
uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_MTYPE_DEFAULT;
@@ -471,7 +521,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- return amdgpu_gem_va_map_flags(adev, mapping_flags);
+ return mapping_flags;
}
/**
@@ -562,12 +612,6 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
- int ret;
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (ret)
- return ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -689,7 +733,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
return;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
@@ -733,12 +777,12 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
enum dma_data_direction dir;
if (unlikely(!ttm->sg)) {
- pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+ pr_debug("SG Table of BO is NULL");
return;
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -774,13 +818,17 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
{
if (!mem->dmabuf) {
- struct dma_buf *ret = amdgpu_gem_prime_export(
- &mem->bo->tbo.base,
+ struct amdgpu_device *bo_adev;
+ struct dma_buf *dmabuf;
+
+ bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
+ mem->gem_handle,
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
- DRM_RDWR : 0);
- if (IS_ERR(ret))
- return PTR_ERR(ret);
- mem->dmabuf = ret;
+ DRM_RDWR : 0);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+ mem->dmabuf = dmabuf;
}
return 0;
@@ -828,6 +876,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+ struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
@@ -844,7 +893,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
- if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
+ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -866,9 +915,10 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
- same_hive) {
+ (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
- * local hive, or userptr mapping can reuse dma map
+ * local hive, or userptr, or GTT mapping can reuse dma map
* address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
@@ -914,7 +964,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
- attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ else
+ ++bo_va->ref_count;
+ attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@@ -923,7 +978,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
goto unwind;
}
attachment[i]->va = va;
- attachment[i]->pte_flags = get_pte_flags(adev, mem);
+ attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
attachment[i]->adev = adev;
list_add(&attachment[i]->list, &mem->attachments);
@@ -937,8 +992,9 @@ unwind:
if (!attachment[i])
continue;
if (attachment[i]->bo_va) {
- amdgpu_bo_reserve(bo[i], true);
- amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+ (void)amdgpu_bo_reserve(bo[i], true);
+ if (--attachment[i]->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
@@ -955,7 +1011,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
- amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+ if (--attachment->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);
@@ -1000,7 +1057,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -1032,9 +1089,19 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range)) {
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret) {
- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ amdgpu_hmm_range_free(range);
+ if (ret == -EAGAIN)
+ pr_debug("Failed to get user pages, try again\n");
+ else
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@@ -1043,6 +1110,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -1050,7 +1120,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+ amdgpu_hmm_range_free(range);
unregister_out:
if (ret)
amdgpu_hmm_unregister(bo);
@@ -1096,14 +1166,14 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->n_vms = 1;
ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&ctx->exec) {
ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(ret))
goto error;
- ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(ret))
goto error;
@@ -1135,7 +1205,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
int ret;
ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&ctx->exec) {
ctx->n_vms = 0;
list_for_each_entry(entry, &mem->attachments, list) {
@@ -1188,7 +1259,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
return ret;
}
-static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
+static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync)
{
@@ -1196,13 +1267,22 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ if (bo_va->queue_refcount) {
+ pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
+ return -EBUSY;
+ }
+
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ /* VM entity stopped if process killed, don't clear freed pt bo */
+ if (!amdgpu_vm_ready(vm))
+ return 0;
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- kfd_mem_dmaunmap_attachment(mem, entry);
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
+
+ return 0;
}
static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -1224,7 +1304,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -1257,17 +1337,19 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
update_gpuvm_pte_failed:
unmap_bo_from_gpuvm(mem, entry, sync);
+ kfd_mem_dmaunmap_attachment(mem, entry);
return ret;
}
-static int process_validate_vms(struct amdkfd_process_info *process_info)
+static int process_validate_vms(struct amdkfd_process_info *process_info,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- ret = vm_validate_pt_pd_bos(peer_vm);
+ ret = vm_validate_pt_pd_bos(peer_vm, ticket);
if (ret)
return ret;
}
@@ -1344,7 +1426,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
amdgpu_amdkfd_restore_userptr_worker);
*process_info = info;
- *ef = dma_fence_get(&info->eviction_fence->base);
}
vm->process_info = *process_info;
@@ -1353,7 +1434,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
ret = amdgpu_bo_reserve(vm->root.bo, true);
if (ret)
goto reserve_pd_fail;
- ret = vm_validate_pt_pd_bos(vm);
+ ret = vm_validate_pt_pd_bos(vm, NULL);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
@@ -1375,6 +1456,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head));
vm->process_info->n_vms++;
+ if (ef)
+ *ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock);
return 0;
@@ -1386,10 +1469,7 @@ validate_pd_fail:
reserve_pd_fail:
vm->process_info = NULL;
if (info) {
- /* Two fence references: one in info and one in *ef */
dma_fence_put(&info->eviction_fence->base);
- dma_fence_put(*ef);
- *ef = NULL;
*process_info = NULL;
put_pid(info->pid);
create_evict_fence_fail:
@@ -1419,13 +1499,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
if (unlikely(ret))
return ret;
- ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ /*
+ * If bo is not contiguous on VRAM, move to system memory first to ensure
+ * we can get contiguous VRAM space after evicting other BOs.
+ */
+ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+ struct ttm_operation_ctx ctx = { true, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret)) {
+ pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+ goto out;
+ }
+ }
+ }
+
+ ret = amdgpu_bo_pin(bo, domain);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
amdgpu_bo_unreserve(bo);
-
return ret;
}
@@ -1449,27 +1546,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct amdgpu_vm *avm, u32 pasid)
-
-{
- int ret;
-
- /* Free the original amdgpu allocated pasid,
- * will be replaced with kfd allocated pasid.
- */
- if (avm->pasid) {
- amdgpu_pasid_free(avm->pasid);
- amdgpu_vm_set_pasid(adev, avm, 0);
- }
-
- ret = amdgpu_vm_set_pasid(adev, avm, pasid);
- if (ret)
- return ret;
-
- return 0;
-}
-
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct amdgpu_vm *avm,
void **process_info,
@@ -1527,27 +1603,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
- void *drm_priv)
-{
- struct amdgpu_vm *avm;
-
- if (WARN_ON(!adev || !drm_priv))
- return;
-
- avm = drm_priv_to_vm(drm_priv);
-
- pr_debug("Releasing process vm %p\n", avm);
-
- /* The original pasid of amdgpu vm has already been
- * released during making a amdgpu vm to a compute vm
- * The current pasid is managed by kfd and will be
- * released on kfd process destroy. Set amdgpu pasid
- * to 0 to avoid duplicate release.
- */
- amdgpu_vm_release_compute(adev, avm);
-}
-
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1583,7 +1638,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
goto out_unlock;
}
WRITE_ONCE(pinfo->block_mmu_notifications, false);
- schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+ queue_delayed_work(system_freezable_wq,
+ &pinfo->restore_userptr_work, 0);
out_unlock:
mutex_unlock(&pinfo->lock);
@@ -1595,16 +1651,23 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
ssize_t available;
uint64_t vram_available, system_mem_available, ttm_mem_available;
spin_lock(&kfd_mem_limit.mem_limit_lock);
- vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- - adev->kfd.vram_used_aligned[xcp_id]
- - atomic64_read(&adev->vram_pin_size)
- - reserved_for_pt;
+ if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id];
+ else
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt
+ - reserved_for_ras;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
system_mem_available = no_system_mem_limit ?
kfd_mem_limit.max_system_mem_limit :
kfd_mem_limit.max_system_mem_limit -
@@ -1652,7 +1715,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1660,6 +1723,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+ /* For contiguous VRAM allocation */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+ alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
}
xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
0 : fpriv->xcp_id;
@@ -1690,6 +1757,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
@@ -1737,6 +1806,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("Failed to allow vma node access. ret %d\n", ret);
goto err_node_allow;
}
+ ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+ if (ret)
+ goto err_gem_handle_create;
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@@ -1768,6 +1840,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
@@ -1777,7 +1858,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
/* Don't unreserve system mem limit twice */
@@ -1785,6 +1869,7 @@ err_node_allow:
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
@@ -1842,7 +1927,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
amdgpu_hmm_unregister(mem->bo);
mutex_lock(&process_info->notifier_lock);
- amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mutex_unlock(&process_info->notifier_lock);
}
@@ -1850,18 +1935,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
- list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+ kfd_mem_dmaunmap_attachment(mem, entry);
kfd_mem_detach(entry);
+ }
ret = unreserve_bo_and_vms(&ctx, false, false);
@@ -1882,9 +1965,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
*/
if (size) {
if (!is_imported &&
- (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- (adev->gmc.is_app_apu &&
- mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
*size = bo_size;
else
*size = 0;
@@ -1892,8 +1973,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
- if (mem->dmabuf)
+ drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+ if (mem->dmabuf) {
dma_buf_put(mem->dmabuf);
+ mem->dmabuf = NULL;
+ }
mutex_destroy(&mem->lock);
/* If this releases the last reference, it will end up calling
@@ -1976,23 +2060,10 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto out_unreserve;
- }
- }
-
list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
@@ -2019,10 +2090,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- dma_resv_add_fence(bo->tbo.base.resv,
- &avm->process_info->eviction_fence->base,
- DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
@@ -2035,11 +2102,41 @@ out:
return ret;
}
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_vm *vm;
+ int ret;
+
+ vm = drm_priv_to_vm(drm_priv);
+
+ mutex_lock(&mem->lock);
+
+ ret = amdgpu_bo_reserve(mem->bo, true);
+ if (ret)
+ goto out;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != vm)
+ continue;
+ if (entry->bo_va->base.bo->tbo.ttm &&
+ !entry->bo_va->base.bo->tbo.ttm->sg)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ }
+
+ amdgpu_bo_unreserve(mem->bo);
+out:
+ mutex_unlock(&mem->lock);
+
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
@@ -2056,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto unreserve_out;
@@ -2072,7 +2169,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va, entry->va + bo_size, entry);
- unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ if (ret)
+ goto unreserve_out;
+
entry->is_mapped = false;
mem->mapped_to_gpu_memory--;
@@ -2080,15 +2180,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mem->mapped_to_gpu_memory);
}
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
-
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
@@ -2115,13 +2206,13 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
/**
* amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
- * @adev: Device to which allocated BO belongs
* @bo: Buffer object to be mapped
+ * @bo_gart: Return bo reference
*
* Before return, bo reference count is incremented. To release the reference and unpin/
* unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
*/
-int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
{
int ret;
@@ -2148,7 +2239,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
amdgpu_bo_unreserve(bo);
- bo = amdgpu_bo_ref(bo);
+ *bo_gart = amdgpu_bo_ref(bo);
return 0;
@@ -2238,7 +2329,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
- amdgpu_bo_reserve(bo, true);
+ (void)amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
@@ -2247,42 +2338,33 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *mem)
{
- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb(); /* make sure read happened */
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
- struct dma_buf *dma_buf,
- uint64_t va, void *drm_priv,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf,
+ struct drm_gem_object *obj,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct drm_gem_object *obj;
struct amdgpu_bo *bo;
int ret;
- obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT))) {
+ AMDGPU_GEM_DOMAIN_GTT)))
/* Only VRAM and GTT BOs are supported */
- ret = -EINVAL;
- goto err_put_obj;
- }
+ return -EINVAL;
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if (!*mem) {
- ret = -ENOMEM;
- goto err_put_obj;
- }
+ if (!*mem)
+ return -ENOMEM;
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
if (ret)
@@ -2307,8 +2389,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
(*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
- (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
- AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
+ !adev->apu_prefer_gtt ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
@@ -2316,12 +2399,57 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
return 0;
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
err_free_mem:
kfree(*mem);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct drm_gem_object *obj;
+ uint32_t handle;
+ int ret;
+
+ ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+ &handle);
+ if (ret)
+ return ret;
+ obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+ if (!obj) {
+ ret = -EINVAL;
+ goto err_release_handle;
+ }
+
+ ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+ mmap_offset);
+ if (ret)
+ goto err_put_obj;
+
+ (*mem)->gem_handle = handle;
+
+ return 0;
+
err_put_obj:
drm_gem_object_put(obj);
+err_release_handle:
+ drm_gem_handle_delete(adev->kfd.client.file, handle);
return ret;
}
@@ -2372,10 +2500,14 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mni->mm,
KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
- if (r)
+
+ if (r && r != -ESRCH)
pr_err("Failed to quiesce KFD\n");
- schedule_delayed_work(&process_info->restore_userptr_work,
- msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ if (r != -ESRCH)
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
mutex_unlock(&process_info->notifier_lock);
@@ -2419,7 +2551,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
/* BO reservations and getting user pages (hmm_range_fault)
@@ -2443,10 +2575,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
}
+ mem->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!mem->range))
+ return -ENOMEM;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &mem->range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
if (ret) {
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
pr_debug("Failed %d to get user pages\n", ret);
/* Return -EFAULT bad address error as success. It will
@@ -2459,9 +2595,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
if (ret != -EFAULT)
return ret;
+ /* If applications unmap memory before destroying the userptr
+ * from the KFD, trigger a segmentation fault in VM debug mode.
+ */
+ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
+ pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+ pid_nr(process_info->pid), mem->va);
+
+ // Send GPU VM fault to user space
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
+ }
+
ret = 0;
}
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated
@@ -2500,7 +2655,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
amdgpu_sync_create(&sync);
- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
/* Reserve all BOs and page tables for validation */
drm_exec_until_all_locked(&exec) {
/* Reserve all the page directories */
@@ -2525,7 +2680,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- ret = process_validate_vms(process_info);
+ ret = process_validate_vms(process_info, NULL);
if (ret)
goto unreserve_out;
@@ -2597,11 +2752,11 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
- continue;
+ continue;
/* Only check mem with hmm range associated */
- valid = amdgpu_ttm_tt_get_user_pages_done(
- mem->bo->tbo.ttm, mem->range);
+ valid = amdgpu_hmm_range_valid(mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
if (!valid) {
@@ -2697,7 +2852,8 @@ unlock_out:
/* If validation failed, reschedule another attempt */
if (evicted_bos) {
- schedule_delayed_work(&process_info->restore_userptr_work,
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
@@ -2706,6 +2862,23 @@ unlock_out:
put_task_struct(usertask);
}
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+ struct dma_fence *new_ef)
+{
+ struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+ /* protected by process_info->lock */);
+
+ /* If we're replacing an unsignaled eviction fence, that fence will
+ * never be signaled, and if anyone is still waiting on that fence,
+ * they will hang forever. This should never happen. We should only
+ * replace the fence in restore_work that only gets scheduled after
+ * eviction work signaled the fence.
+ */
+ WARN_ONCE(!dma_fence_is_signaled(old_ef),
+ "Replacing unsignaled eviction fence");
+ dma_fence_put(old_ef);
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
@@ -2724,12 +2897,11 @@ unlock_out:
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
{
struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem;
- struct amdgpu_amdkfd_fence *new_fence;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
unsigned long failed_size = 0;
@@ -2741,14 +2913,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
mutex_lock(&process_info->lock);
- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ pr_err("Locking VM PD failed, ret: %d\n", ret);
goto ttm_reserve_fail;
+ }
}
/* Reserve all BOs and page tables/directory. Add all BOs from
@@ -2761,31 +2935,21 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
gobj = &mem->bo->tbo.base;
ret = drm_exec_prepare_obj(&exec, gobj, 1);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
goto ttm_reserve_fail;
+ }
}
}
amdgpu_sync_create(&sync_obj);
- /* Validate PDs and PTs */
- ret = process_validate_vms(process_info);
- if (ret)
- goto validate_map_fail;
-
- ret = process_sync_pds_resv(process_info, &sync_obj);
- if (ret) {
- pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
- goto validate_map_fail;
- }
-
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_mem_attachment *attachment;
struct dma_resv_iter cursor;
struct dma_fence *fence;
@@ -2804,19 +2968,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
- ret = amdgpu_sync_fence(&sync_obj, fence);
+ ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
}
}
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
- if (attachment->bo_va->base.bo->tbo.pin_count)
- continue;
-
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {
@@ -2826,8 +3006,31 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
}
- if (failed_size)
- pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+ /* Update mappings not managed by KFD */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_device *adev = amdgpu_ttm_adev(
+ peer_vm->root.bo->tbo.bdev);
+
+ struct amdgpu_fpriv *fpriv =
+ container_of(peer_vm, struct amdgpu_fpriv, vm);
+
+ ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+ }
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
@@ -2836,25 +3039,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
+ /* Sync with fences on all the page tables. They implicitly depend on any
+ * move fences from amdgpu_vm_handle_moved above.
+ */
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
+ }
+
/* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
- /* Release old eviction fence and create new one, because fence only
- * goes from unsignaled to signaled, fence cannot be reused.
- * Use context and mm from the old fence.
+ /* The old eviction fence may be unsignaled if restore happens
+ * after a GPU reset or suspend/resume. Keep the old fence in that
+ * case. Otherwise release the old eviction fence and create new
+ * one, because fence only goes from unsignaled to signaled once
+ * and cannot be reused. Use context and mm from the old fence.
+ *
+ * If an old eviction fence signals after this check, that's OK.
+ * Anyone signaling an eviction fence must stop the queues first
+ * and schedule another restore worker.
*/
- new_fence = amdgpu_amdkfd_fence_create(
+ if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+ struct amdgpu_amdkfd_fence *new_fence =
+ amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm,
NULL);
- if (!new_fence) {
- pr_err("Failed to create eviction fence\n");
- ret = -ENOMEM;
- goto validate_map_fail;
+
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+ } else {
+ WARN_ONCE(*ef != &process_info->eviction_fence->base,
+ "KFD eviction fence doesn't match KGD process_info");
}
- dma_fence_put(&process_info->eviction_fence->base);
- process_info->eviction_fence = new_fence;
- *ef = dma_fence_get(&new_fence->base);
/* Attach new eviction fence to all BOs except pinned ones */
list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
@@ -2865,7 +3090,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
&process_info->eviction_fence->base,
DMA_RESV_USAGE_BOOKKEEP);
}
- /* Attach eviction fence to PD / PT BOs */
+ /* Attach eviction fence to PD / PT BOs and DMABuf imports */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.bo;
@@ -2995,12 +3220,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
return 0;
}
-bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem)
{
+ struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv);
struct kfd_mem_attachment *entry;
list_for_each_entry(entry, &mem->attachments, list) {
- if (entry->is_mapped && entry->adev == adev)
+ if (entry->is_mapped && entry->bo_va->base.vm == vm)
return true;
}
return false;