diff options
author | Dave Airlie <airlied@redhat.com> | 2022-05-06 15:05:27 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-05-06 15:05:27 +1000 |
commit | b900352f9ddebc5c8dd30dc16218c4ff1b8c9147 (patch) | |
tree | b4b376bfa21a1dd65164b295f048eadc91f9ccec /drivers/gpu/drm/amd/amdkfd | |
parent | 8d62a974ac5fa1609e57a54622eef71e87bace78 (diff) | |
parent | 3da2c38231a4c62dafdbd762a199cfacaccd0533 (diff) |
Merge tag 'amd-drm-next-5.19-2022-04-29' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.19-2022-04-29:
amdgpu
- RAS updates
- SI dpm deadlock fix
- Misc code cleanups
- HDCP fixes
- PSR fixes
- DSC fixes
- SDMA doorbell cleanups
- S0ix fix
- DC FP fix
- Zen dom0 regression fix for APUs
- IP discovery updates
- Initial SoC21 support
- Support for new vbios tables
- Runtime PM fixes
- Add PSP TA debugfs interface
amdkfd:
- Misc code cleanups
- Ignore bogus MEC signals more efficiently
- SVM fixes
- Use bitmap helpers
radeon:
- Misc code cleanups
- Spelling/grammer fixes
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220429144853.5742-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 33 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 66 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 17 |
11 files changed, 138 insertions, 68 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ee8b288dd8cc..1c7016958d6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -944,8 +944,6 @@ err_drm_file: bool kfd_dev_is_large_bar(struct kfd_dev *dev) { - struct kfd_local_mem_info mem_info; - if (debug_largebar) { pr_debug("Simulate large-bar allocation on non large-bar machine\n"); return true; @@ -954,9 +952,8 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) if (dev->use_iommu_v2) return false; - amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info); - if (mem_info.local_mem_size_private == 0 && - mem_info.local_mem_size_public > 0) + if (dev->local_mem_info.local_mem_size_private == 0 && + dev->local_mem_info.local_mem_size_public > 0) return true; return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index afc8a7fcdad8..90c70adc946c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2152,7 +2152,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info); + local_mem_info = kdev->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 62aa6c9d5123..ed33e95c03e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -575,6 +575,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd_resume(kfd)) goto kfd_resume_error; + amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); + if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device to topology\n"); goto kfd_topology_add_device_error; @@ -873,8 +875,6 @@ out: static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { - unsigned int num_of_longs; - if (WARN_ON(buf_size < chunk_size)) return -EINVAL; if (WARN_ON(buf_size == 0)) @@ -885,11 +885,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, kfd->gtt_sa_chunk_size = chunk_size; kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; - num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / - BITS_PER_LONG; - - kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); - + kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks, + GFP_KERNEL); if (!kfd->gtt_sa_bitmap) return -ENOMEM; @@ -899,13 +896,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, mutex_init(&kfd->gtt_sa_lock); return 0; - } static void kfd_gtt_sa_fini(struct kfd_dev *kfd) { mutex_destroy(&kfd->gtt_sa_lock); - kfree(kfd->gtt_sa_bitmap); + bitmap_free(kfd->gtt_sa_bitmap); } static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, @@ -973,7 +969,7 @@ kfd_gtt_restart_search: /* If we need only one chunk, mark it as allocated and get out */ if (size <= kfd->gtt_sa_chunk_size) { pr_debug("Single bit\n"); - set_bit(found, kfd->gtt_sa_bitmap); + __set_bit(found, kfd->gtt_sa_bitmap); goto kfd_gtt_out; } @@ -1011,10 +1007,8 @@ kfd_gtt_restart_search: (*mem_obj)->range_start, (*mem_obj)->range_end); /* Mark the chunks as allocated */ - for (found = (*mem_obj)->range_start; - found <= (*mem_obj)->range_end; - found++) - set_bit(found, kfd->gtt_sa_bitmap); + bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start, + (*mem_obj)->range_end - (*mem_obj)->range_start + 1); kfd_gtt_out: mutex_unlock(&kfd->gtt_sa_lock); @@ -1029,8 +1023,6 @@ kfd_gtt_no_free_chunk: int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) { - unsigned int bit; - /* Act like kfree when trying to free a NULL object */ if (!mem_obj) return 0; @@ -1041,10 +1033,8 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) mutex_lock(&kfd->gtt_sa_lock); /* Mark the chunks as free */ - for (bit = mem_obj->range_start; - bit <= mem_obj->range_end; - bit++) - clear_bit(bit, kfd->gtt_sa_bitmap); + bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start, + mem_obj->range_end - mem_obj->range_start + 1); mutex_unlock(&kfd->gtt_sa_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6e5e8d637f48..4df9c36146ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -238,12 +238,24 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const return 0; } -void kfd_event_init_process(struct kfd_process *p) +int kfd_event_init_process(struct kfd_process *p) { + int id; + mutex_init(&p->event_mutex); idr_init(&p->event_idr); p->signal_page = NULL; - p->signal_event_count = 0; + p->signal_event_count = 1; + /* Allocate event ID 0. It is used for a fast path to ignore bogus events + * that are sent by the CP without a context ID + */ + id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL); + if (id < 0) { + idr_destroy(&p->event_idr); + mutex_destroy(&p->event_mutex); + return id; + } + return 0; } static void destroy_event(struct kfd_process *p, struct kfd_event *ev) @@ -271,8 +283,10 @@ static void destroy_events(struct kfd_process *p) uint32_t id; idr_for_each_entry(&p->event_idr, ev, id) - destroy_event(p, ev); + if (ev) + destroy_event(p, ev); idr_destroy(&p->event_idr); + mutex_destroy(&p->event_mutex); } /* @@ -749,7 +763,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, * iterate over the signal slots and lookup * only signaled events from the IDR. */ - for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) + for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++) if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) { ev = lookup_event_by_id(p, id); set_event_from_interrupt(p, ev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 03c29bdd89a1..f27fe022ef6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -141,6 +141,25 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev, } } +static bool context_id_expected(struct kfd_dev *dev) +{ + switch (KFD_GC_VERSION(dev)) { + case IP_VERSION(9, 0, 1): + return dev->mec_fw_version >= 0x817a; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 0): + return dev->mec_fw_version >= 0x17a; + default: + /* Other GFXv9 and later GPUs always sent valid context IDs + * on legitimate events + */ + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1); + } +} + static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -206,6 +225,20 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) return false; + /* Workaround CP firmware sending bogus signals with 0 context_id. + * Those can be safely ignored on hardware and firmware versions that + * include a valid context_id on legitimate signals. This avoids the + * slow path in kfd_signal_event_interrupt that scans all event slots + * for signaled events. + */ + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) { + uint32_t context_id = + SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (context_id == 0 && context_id_expected(dev)) + return false; + } + /* Interrupt types we care about: various signals and faults. * They will be forwarded to a work queue (see below). */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7e3a7fcb9fe6..997650d597ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -410,7 +410,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma migrate; unsigned long cpages = 0; dma_addr_t *scratch; - size_t size; void *buf; int r = -ENOMEM; @@ -421,9 +420,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); - size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); - size *= npages; - buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + buf = kvcalloc(npages, + 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + GFP_KERNEL); if (!buf) goto out; @@ -665,7 +664,6 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; - size_t size; void *buf; int r = -ENOMEM; @@ -676,9 +674,10 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); - size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); - size *= npages; - buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + buf = kvcalloc(npages, + 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + GFP_KERNEL); + if (!buf) goto out; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfb3b5c288cb..10bb3bb46246 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -272,6 +272,7 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; struct kfd_vmid_info vm_info; + struct kfd_local_mem_info local_mem_info; const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; @@ -1295,7 +1296,7 @@ extern const struct kfd_event_interrupt_class event_interrupt_class_v9; extern const struct kfd_device_global_init_class device_global_init_class_cik; -void kfd_event_init_process(struct kfd_process *p); +int kfd_event_init_process(struct kfd_process *p); void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e82d7aa67fa..cb8f4a459add 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1370,12 +1370,16 @@ static struct kfd_process *create_process(const struct task_struct *thread) INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); - kfd_event_init_process(process); + err = kfd_event_init_process(process); + if (err) + goto err_event_init; process->is_32bit_user_mode = in_compat_syscall(); process->pasid = kfd_pasid_alloc(); - if (process->pasid == 0) + if (process->pasid == 0) { + err = -ENOSPC; goto err_alloc_pasid; + } err = pqm_init(&process->pqm, process); if (err != 0) @@ -1424,6 +1428,8 @@ err_init_apertures: err_process_pqm_init: kfd_pasid_free(process->pasid); err_alloc_pasid: + kfd_event_free_process(process); +err_event_init: mutex_destroy(&process->mutex); kfree(process); err_alloc_process: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 11b395b90a3d..29e9ebf6d8d5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -149,8 +149,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, int i, r; if (!addr) { - addr = kvmalloc_array(prange->npages, sizeof(*addr), - GFP_KERNEL | __GFP_ZERO); + addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL); if (!addr) return -ENOMEM; prange->dma_addr[gpuidx] = addr; @@ -686,7 +685,8 @@ svm_range_check_attr(struct kfd_process *p, static void svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, - uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, + bool *update_mapping) { uint32_t i; int gpuidx; @@ -702,6 +702,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, case KFD_IOCTL_SVM_ATTR_ACCESS: case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + *update_mapping = true; gpuidx = kfd_process_gpuidx_from_gpuid(p, attrs[i].value); if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { @@ -716,9 +717,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, } break; case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + *update_mapping = true; prange->flags |= attrs[i].value; break; case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + *update_mapping = true; prange->flags &= ~attrs[i].value; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: @@ -951,6 +954,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->prefetch_loc = old->prefetch_loc; new->actual_loc = old->actual_loc; new->granularity = old->granularity; + new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -1204,6 +1208,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, uint32_t gpuidx; int r = 0; + if (!prange->mapped_to_gpu) { + pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n", + prange, prange->start, prange->last); + return 0; + } + + if (prange->start == start && prange->last == last) { + pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange); + prange->mapped_to_gpu = false; + } + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); p = container_of(prange->svms, struct kfd_process, svms); @@ -1239,7 +1254,7 @@ static int svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, unsigned long offset, unsigned long npages, bool readonly, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, - struct dma_fence **fence) + struct dma_fence **fence, bool flush_tlb) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); @@ -1277,7 +1292,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, pte_flags, last_start - prange->start, @@ -1311,7 +1326,7 @@ out: static int svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, unsigned long npages, bool readonly, - unsigned long *bitmap, bool wait) + unsigned long *bitmap, bool wait, bool flush_tlb) { struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; @@ -1346,7 +1361,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly, prange->dma_addr[gpuidx], - bo_adev, wait ? &fence : NULL); + bo_adev, wait ? &fence : NULL, + flush_tlb); if (r) break; @@ -1467,8 +1483,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, - struct svm_range *prange, - int32_t gpuidx, bool intr, bool wait) + struct svm_range *prange, int32_t gpuidx, + bool intr, bool wait, bool flush_tlb) { struct svm_validate_context ctx; unsigned long start, end, addr; @@ -1507,8 +1523,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, prange->bitmap_aip, MAX_GPU_INSTANCE); } - if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) - return 0; + if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) { + if (!prange->mapped_to_gpu) + return 0; + + bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); + } if (prange->actual_loc && !prange->ttm_res) { /* This should never happen. actual_loc gets set by @@ -1580,7 +1600,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx.bitmap, wait); + ctx.bitmap, wait, flush_tlb); unlock_out: svm_range_unlock(prange); @@ -1588,8 +1608,10 @@ unlock_out: addr = next; } - if (addr == end) + if (addr == end) { prange->validated_once = true; + prange->mapped_to_gpu = true; + } unreserve_out: svm_range_unreserve_bos(&ctx); @@ -1674,7 +1696,7 @@ static void svm_range_restore_work(struct work_struct *work) mutex_lock(&prange->migrate_mutex); r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true); + false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -1820,6 +1842,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->prefetch_loc = old->prefetch_loc; new->actual_loc = old->actual_loc; new->granularity = old->granularity; + new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2811,7 +2834,7 @@ retry_write_locked: } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); + r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", r, svms, prange->start, prange->last); @@ -3224,6 +3247,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, struct svm_range_list *svms; struct svm_range *prange; struct svm_range *next; + bool update_mapping = false; + bool flush_tlb; int r = 0; pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", @@ -3262,7 +3287,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svm_range_add_notifier_locked(mm, prange); } list_for_each_entry(prange, &update_list, update_list) { - svm_range_apply_attrs(p, prange, nattr, attrs); + svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); /* TODO: unmap ranges from GPU that lost access */ } list_for_each_entry_safe(prange, next, &remove_list, update_list) { @@ -3295,8 +3320,15 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, continue; } + if (!migrated && !update_mapping) { + mutex_unlock(&prange->migrate_mutex); + continue; + } + + flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - true, true); + true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 66c77f00ac3e..2d54147b4dda 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -133,6 +133,7 @@ struct svm_range { DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); bool validated_once; + bool mapped_to_gpu; }; static inline void svm_range_lock(struct svm_range *prange) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8b7710b4d3ed..05089f1de4e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1112,15 +1112,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; - struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info); - - local_mem_size = local_mem_info.local_mem_size_private + - local_mem_info.local_mem_size_public; + local_mem_size = gpu->local_mem_info.local_mem_size_private + + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor | @@ -1534,13 +1531,13 @@ static void kfd_topology_update_io_links(int proximity_domain) list_del(&iolink->list); dev->io_link_count--; dev->node_props.io_links_count--; - } else if (iolink->node_from > proximity_domain) { - iolink->node_from--; - } else if (iolink->node_to > proximity_domain) { - iolink->node_to--; + } else { + if (iolink->node_from > proximity_domain) + iolink->node_from--; + if (iolink->node_to > proximity_domain) + iolink->node_to--; } } - } } |