summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c303
1 files changed, 186 insertions, 117 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 96a138a39515..5ff1a5a89d96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -170,12 +170,11 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
page = hmm_pfn_to_page(hmm_pfns[i]);
if (is_zone_device_page(page)) {
- struct amdgpu_device *bo_adev =
- amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset -
- bo_adev->kfd.dev->pgmap.range.start;
+ bo_adev->kfd.pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
continue;
@@ -281,7 +280,7 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
if (update_mem_usage && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
}
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
@@ -314,7 +313,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
p = container_of(svms, struct kfd_process, svms);
if (!p->xnack_enabled && update_mem_usage &&
amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
pr_info("SVM mapping failed, exceeds resident system memory limit\n");
kfree(prange);
return NULL;
@@ -424,10 +423,8 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
}
static bool
-svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
+svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
{
- struct amdgpu_device *bo_adev;
-
mutex_lock(&prange->lock);
if (!prange->svm_bo) {
mutex_unlock(&prange->lock);
@@ -440,12 +437,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
/*
- * Migrate from GPU to GPU, remove range from source bo_adev
- * svm_bo range list, and return false to allocate svm_bo from
- * destination adev.
+ * Migrate from GPU to GPU, remove range from source svm_bo->node
+ * range list, and return false to allocate svm_bo from destination
+ * node.
*/
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- if (bo_adev != adev) {
+ if (prange->svm_bo->node != node) {
mutex_unlock(&prange->lock);
spin_lock(&prange->svm_bo->list_lock);
@@ -513,7 +509,7 @@ static struct svm_range_bo *svm_range_bo_new(void)
}
int
-svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
+svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
struct amdgpu_bo_param bp;
@@ -528,7 +524,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
prange->start, prange->last);
- if (svm_range_validate_svm_bo(adev, prange))
+ if (svm_range_validate_svm_bo(node, prange))
return 0;
svm_bo = svm_range_bo_new();
@@ -542,6 +538,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
+ svm_bo->node = node;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -558,13 +555,20 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
+ if (node->xcp)
+ bp.xcp_id_plus1 = node->xcp->id + 1;
- r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ r = amdgpu_bo_create_user(node->adev, &bp, &ubo);
if (r) {
pr_debug("failed %d to create bo\n", r);
goto create_bo_failed;
}
bo = &ubo->bo;
+
+ pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n",
+ bo->tbo.resource->start << PAGE_SHIFT, bp.size,
+ bp.xcp_id_plus1 - 1);
+
r = amdgpu_bo_reserve(bo, true);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
@@ -617,45 +621,30 @@ void svm_range_vram_node_free(struct svm_range *prange)
prange->ttm_res = NULL;
}
-struct amdgpu_device *
-svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
+struct kfd_node *
+svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id)
{
- struct kfd_process_device *pdd;
struct kfd_process *p;
- int32_t gpu_idx;
+ struct kfd_process_device *pdd;
p = container_of(prange->svms, struct kfd_process, svms);
-
- gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
- if (gpu_idx < 0) {
- pr_debug("failed to get device by id 0x%x\n", gpu_id);
- return NULL;
- }
- pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
if (!pdd) {
- pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
+ pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id);
return NULL;
}
- return pdd->dev->adev;
+ return pdd->dev;
}
struct kfd_process_device *
-svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node)
{
struct kfd_process *p;
- int32_t gpu_idx, gpuid;
- int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx);
- if (r) {
- pr_debug("failed to get device id by adev %p\n", adev);
- return NULL;
- }
-
- return kfd_process_device_from_gpuidx(p, gpu_idx);
+ return kfd_get_process_device_data(node, p);
}
static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
@@ -735,7 +724,9 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
- *update_mapping = true;
+ if (!p->xnack_enabled)
+ *update_mapping = true;
+
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -818,7 +809,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
}
}
- return true;
+ return !prange->is_error_flag;
}
/**
@@ -1146,31 +1137,39 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
}
return 0;
}
+static bool
+svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
+{
+ return (node_a->adev == node_b->adev ||
+ amdgpu_xgmi_same_hive(node_a->adev, node_b->adev));
+}
static uint64_t
-svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
- int domain)
+svm_range_get_pte_flags(struct kfd_node *node,
+ struct svm_range *prange, int domain)
{
- struct amdgpu_device *bo_adev;
+ struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+ bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
+ unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ bo_node = prange->svm_bo->node;
- switch (KFD_GC_VERSION(adev->kfd.dev)) {
+ switch (node->adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1180,15 +1179,15 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
break;
case IP_VERSION(9, 4, 2):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (node->adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1196,6 +1195,37 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
+ case IP_VERSION(9, 4, 3):
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+ snoop = true;
+ if (uncached) {
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ /* local HBM region close to partition */
+ if (bo_node->adev == node->adev &&
+ (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+ mapping_flags |= mtype_local;
+ /* local HBM region far from partition or remote XGMI GPU */
+ else if (svm_nodes_in_same_hive(bo_node, node))
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* PCIe P2P */
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* system memory accessed by the APU */
+ } else if (node->adev->flags & AMD_IS_APU) {
+ /* On NUMA systems, locality is determined per-page
+ * in amdgpu_gmc_override_vm_pte_flags
+ */
+ if (num_possible_nodes() <= 1)
+ mapping_flags |= mtype_local;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* system memory accessed by the dGPU */
+ } else {
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ }
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
@@ -1212,7 +1242,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
- pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+ pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
return pte_flags;
}
@@ -1319,7 +1349,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
- pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
+ pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
if (readonly)
pte_flags &= ~AMDGPU_PTE_WRITEABLE;
@@ -1328,6 +1358,10 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
+ /* For dGPU mode, we use same vm_manager to allocate VRAM for
+ * different memory partition based on fpfn/lpfn, we should use
+ * same vm_manager.vram_base_offset regardless memory partition.
+ */
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
last_start, prange->start + i,
pte_flags,
@@ -1365,16 +1399,14 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
+ struct amdgpu_device *bo_adev = NULL;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
int r = 0;
if (prange->svm_bo && prange->ttm_res)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- else
- bo_adev = NULL;
+ bo_adev = prange->svm_bo->node->adev;
p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
@@ -1522,48 +1554,54 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
struct svm_range *prange, int32_t gpuidx,
bool intr, bool wait, bool flush_tlb)
{
- struct svm_validate_context ctx;
+ struct svm_validate_context *ctx;
unsigned long start, end, addr;
struct kfd_process *p;
void *owner;
int32_t idx;
int r = 0;
- ctx.process = container_of(prange->svms, struct kfd_process, svms);
- ctx.prange = prange;
- ctx.intr = intr;
+ ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->process = container_of(prange->svms, struct kfd_process, svms);
+ ctx->prange = prange;
+ ctx->intr = intr;
if (gpuidx < MAX_GPU_INSTANCE) {
- bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
- bitmap_set(ctx.bitmap, gpuidx, 1);
- } else if (ctx.process->xnack_enabled) {
- bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+ bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
+ } else if (ctx->process->xnack_enabled) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
/* If prefetch range to GPU, or GPU retry fault migrate range to
* GPU, which has ACCESS attribute to the range, create mapping
* on that GPU.
*/
if (prange->actual_loc) {
- gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
+ gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process,
prange->actual_loc);
if (gpuidx < 0) {
WARN_ONCE(1, "failed get device by id 0x%x\n",
prange->actual_loc);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
if (test_bit(gpuidx, prange->bitmap_access))
- bitmap_set(ctx.bitmap, gpuidx, 1);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
}
} else {
- bitmap_or(ctx.bitmap, prange->bitmap_access,
+ bitmap_or(ctx->bitmap, prange->bitmap_access,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
- if (!prange->mapped_to_gpu)
- return 0;
+ if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ if (!prange->mapped_to_gpu) {
+ r = 0;
+ goto free_ctx;
+ }
- bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -1571,15 +1609,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
* svm_migrate_ram_to_vram after allocating a BO.
*/
WARN_ONCE(1, "VRAM BO missing during validation\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
- svm_range_reserve_bos(&ctx);
+ svm_range_reserve_bos(ctx);
p = container_of(prange->svms, struct kfd_process, svms);
- owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+ owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
MAX_GPU_INSTANCE));
- for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+ for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) {
if (kfd_svm_page_owner(p, idx) != owner) {
owner = NULL;
break;
@@ -1616,7 +1655,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
offset = (addr - start) >> PAGE_SHIFT;
- r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
+ r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
hmm_range->hmm_pfns);
if (r) {
pr_debug("failed %d to dma map range\n", r);
@@ -1636,7 +1675,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait, flush_tlb);
+ ctx->bitmap, wait, flush_tlb);
unlock_out:
svm_range_unlock(prange);
@@ -1650,11 +1689,15 @@ unlock_out:
}
unreserve_out:
- svm_range_unreserve_bos(&ctx);
+ svm_range_unreserve_bos(ctx);
+ prange->is_error_flag = !!r;
if (!r)
prange->validate_timestamp = ktime_get_boottime();
+free_ctx:
+ kfree(ctx);
+
return r;
}
@@ -1783,6 +1826,7 @@ out_reschedule:
* @mm: current process mm_struct
* @start: starting process queue number
* @last: last process queue number
+ * @event: mmu notifier event when range is evicted or migrated
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1906,14 +1950,23 @@ void svm_range_set_max_pages(struct amdgpu_device *adev)
{
uint64_t max_pages;
uint64_t pages, _pages;
+ uint64_t min_pages = 0;
+ int i, id;
+
+ for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
+ if (adev->kfd.dev->nodes[i]->xcp)
+ id = adev->kfd.dev->nodes[i]->xcp->id;
+ else
+ id = -1;
+ pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
+ pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+ pages = rounddown_pow_of_two(pages);
+ min_pages = min_not_zero(min_pages, pages);
+ }
- /* 1/32 VRAM size in pages */
- pages = adev->gmc.real_vram_size >> 17;
- pages = clamp(pages, 1ULL << 9, 1ULL << 18);
- pages = rounddown_pow_of_two(pages);
do {
max_pages = READ_ONCE(max_svm_range_pages);
- _pages = min_not_zero(max_pages, pages);
+ _pages = min_not_zero(max_pages, min_pages);
} while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
}
@@ -2507,29 +2560,31 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
*/
static int32_t
svm_range_best_restore_location(struct svm_range *prange,
- struct amdgpu_device *adev,
+ struct kfd_node *node,
int32_t *gpuidx)
{
- struct amdgpu_device *bo_adev, *preferred_adev;
+ struct kfd_node *bo_node, *preferred_node;
struct kfd_process *p;
uint32_t gpuid;
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
}
+ if (node->adev->gmc.is_app_apu)
+ return 0;
+
if (prange->preferred_loc == gpuid ||
prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
return prange->preferred_loc;
} else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
- preferred_adev = svm_range_get_adev_by_id(prange,
- prange->preferred_loc);
- if (amdgpu_xgmi_same_hive(adev, preferred_adev))
+ preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc);
+ if (preferred_node && svm_nodes_in_same_hive(node, preferred_node))
return prange->preferred_loc;
/* fall through */
}
@@ -2541,8 +2596,8 @@ svm_range_best_restore_location(struct svm_range *prange,
if (!prange->actual_loc)
return 0;
- bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ bo_node = svm_range_get_node_by_id(prange, prange->actual_loc);
+ if (bo_node && svm_nodes_in_same_hive(node, bo_node))
return prange->actual_loc;
else
return 0;
@@ -2659,7 +2714,7 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
}
static struct
-svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
struct kfd_process *p,
struct mm_struct *mm,
int64_t addr)
@@ -2694,7 +2749,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
- if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
svm_range_free(prange, true);
return NULL;
@@ -2748,7 +2803,7 @@ static bool svm_range_skip_recover(struct svm_range *prange)
}
static void
-svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
+svm_range_count_fault(struct kfd_node *node, struct kfd_process *p,
int32_t gpuidx)
{
struct kfd_process_device *pdd;
@@ -2761,7 +2816,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
uint32_t gpuid;
int r;
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx);
if (r < 0)
return;
}
@@ -2789,6 +2844,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+ uint32_t vmid, uint32_t node_id,
uint64_t addr, bool write_fault)
{
struct mm_struct *mm = NULL;
@@ -2796,6 +2852,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range *prange;
struct kfd_process *p;
ktime_t timestamp = ktime_get_boottime();
+ struct kfd_node *node;
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
@@ -2803,7 +2860,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
bool migration = false;
int r = 0;
- if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
+ if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
pr_debug("device does not support SVM\n");
return -EFAULT;
}
@@ -2839,6 +2896,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
+ if (!node) {
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
+ r = -EFAULT;
+ goto out;
+ }
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
@@ -2857,7 +2921,7 @@ retry_write_locked:
write_locked = true;
goto retry_write_locked;
}
- prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+ prange = svm_range_create_unregistered_range(node, p, mm, addr);
if (!prange) {
pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
svms, addr);
@@ -2872,7 +2936,7 @@ retry_write_locked:
mutex_lock(&prange->migrate_mutex);
if (svm_range_skip_recover(prange)) {
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
goto out_unlock_range;
}
@@ -2903,7 +2967,7 @@ retry_write_locked:
goto out_unlock_range;
}
- best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
+ best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
if (best_loc == -1) {
pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
svms, prange->start, prange->last);
@@ -2915,7 +2979,7 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
- kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
write_fault, timestamp);
if (prange->actual_loc != best_loc) {
@@ -2953,7 +3017,7 @@ retry_write_locked:
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
- kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
out_unlock_range:
@@ -2962,7 +3026,7 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(adev, p, gpuidx);
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -2970,7 +3034,7 @@ out:
if (r == -EAGAIN) {
pr_debug("recover vm fault later\n");
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
}
return r;
@@ -2994,10 +3058,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
if (r)
goto out_unlock;
reserved_size += size;
@@ -3007,10 +3071,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
size = (prange->last - prange->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
if (r)
goto out_unlock;
reserved_size += size;
@@ -3023,7 +3087,7 @@ out_unlock:
if (r)
amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
else
/* Change xnack mode must be inside svms lock, to avoid race with
* svm_range_deferred_list_work unreserve memory in parallel.
@@ -3081,7 +3145,7 @@ int svm_range_list_init(struct kfd_process *p)
spin_lock_init(&svms->deferred_list_lock);
for (i = 0; i < p->n_pdds; i++)
- if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
+ if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
return 0;
@@ -3212,7 +3276,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
uint32_t best_loc = prange->prefetch_loc;
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
+ struct kfd_node *bo_node;
struct kfd_process *p;
uint32_t gpuidx;
@@ -3221,9 +3285,14 @@ svm_range_best_prefetch_location(struct svm_range *prange)
if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
goto out;
- bo_adev = svm_range_get_adev_by_id(prange, best_loc);
- if (!bo_adev) {
- WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
+ bo_node = svm_range_get_node_by_id(prange, best_loc);
+ if (!bo_node) {
+ WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc);
+ best_loc = 0;
+ goto out;
+ }
+
+ if (bo_node->adev->gmc.is_app_apu) {
best_loc = 0;
goto out;
}
@@ -3241,10 +3310,10 @@ svm_range_best_prefetch_location(struct svm_range *prange)
continue;
}
- if (pdd->dev->adev == bo_adev)
+ if (pdd->dev->adev == bo_node->adev)
continue;
- if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
+ if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) {
best_loc = 0;
break;
}