diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
21 files changed, 402 insertions, 655 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6003f9454354..81a531b652aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -974,6 +974,8 @@ struct amdgpu_device { struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + struct mutex notifier_lock; + int asic_reset_res; struct work_struct xgmi_reset_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d8db5ecdf9c1..b2487f4f271b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -518,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, - uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -1212,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); + ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; } @@ -1757,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return ret; } + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 06fb535a504f..5b330f69194b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -538,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -1212,11 +1210,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. - * p->mn is hold until amdgpu_cs_submit is finished and fence is added - * to BOs. + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. */ - amdgpu_mn_lock(p->mn); + mutex_lock(&p->adev->notifier_lock); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1259,13 +1257,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e2eec7b66334..a59cd47aa6c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -360,10 +360,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, return ERR_PTR(-EPERM); buf = drm_gem_prime_export(gobj, flags); - if (!IS_ERR(buf)) { - buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; + if (!IS_ERR(buf)) buf->ops = &amdgpu_dmabuf_ops; - } return buf; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 143753d237e7..2672dc64a310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user) return 0; } -static struct fb_ops amdgpufb_ops = { +static const struct fb_ops amdgpufb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, .fb_open = amdgpufb_open, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 392300f77b13..828b5167ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,439 +51,107 @@ #include "amdgpu_amdkfd.h" /** - * struct amdgpu_mn_node + * amdgpu_mn_invalidate_gfx - callback to notify about mm change * - * @it: interval node defining start-last of the affected address range - * @bos: list of all BOs in the affected address range - * - * Manages all BOs which are affected of a certain range of address space. - */ -struct amdgpu_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - -/** - * amdgpu_mn_destroy - destroy the HMM mirror - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void amdgpu_mn_destroy(struct work_struct *work) -{ - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = amn->adev; - struct amdgpu_mn_node *node, *next_node; - struct amdgpu_bo *bo, *next_bo; - - mutex_lock(&adev->mn_lock); - down_write(&amn->lock); - hash_del(&amn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &amn->objects.rb_root, it.rb) { - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); - - hmm_mirror_unregister(&amn->mirror); - kfree(amn); -} - -/** - * amdgpu_hmm_mirror_release - callback to notify about mm destruction - * - * @mirror: the HMM mirror (mm) this callback is about - * - * Shedule a work item to lazy destroy HMM mirror. - */ -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - - INIT_WORK(&amn->work, amdgpu_mn_destroy); - schedule_work(&amn->work); -} - -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - -/** - * amdgpu_mn_read_lock - take the read side lock for this notifier - * - * @amn: our notifier - * @blockable: is the notifier blockable - */ -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) -{ - if (blockable) - down_read(&amn->lock); - else if (!down_read_trylock(&amn->lock)) - return -EAGAIN; - - return 0; -} - -/** - * amdgpu_mn_read_unlock - drop the read side lock for this notifier - * - * @amn: our notifier - */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) -{ - up_read(&amn->lock); -} - -/** - * amdgpu_mn_invalidate_node - unmap all BOs of a node - * - * @node: the node with the BOs to unmap - * @start: start of address range affected - * @end: end of address range affected + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, - unsigned long start, - unsigned long end) +static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - list_for_each_entry(bo, &node->bos, mn_list) { - - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) - continue; - - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); - } -} - -/** - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * Block for operations on BOs to finish and mark pages as accessed and - * potentially dirty. - */ -static int -amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; + if (!mmu_notifier_range_blockable(range)) + return false; - /* notification is exclusive, but interval is inclusive */ - end -= 1; + mutex_lock(&adev->notifier_lock); - /* TODO we should be able to split locking for interval tree and - * amdgpu_mn_invalidate_node - */ - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; + mmu_interval_set_seq(mni, cur_seq); - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - amdgpu_mn_invalidate_node(node, start, end); - } - - amdgpu_mn_read_unlock(amn); - - return 0; -} - -/** - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * We temporarily evict all BOs between start and end. This - * necessitates evicting all user-mode queues of the process. The BOs - * are restorted in amdgpu_mn_invalidate_range_end_hsa. - */ -static int -amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; - - /* notification is exclusive, but interval is inclusive */ - end -= 1; - - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - struct amdgpu_bo *bo; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - list_for_each_entry(bo, &node->bos, mn_list) { - struct kgd_mem *mem = bo->kfd_bo; - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) - amdgpu_amdkfd_evict_userptr(mem, amn->mm); - } - } - - amdgpu_mn_read_unlock(amn); - - return 0; + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&adev->notifier_lock); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + return true; } -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - -static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, - .release = amdgpu_hmm_mirror_release - }, - [AMDGPU_MN_TYPE_HSA] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, - .release = amdgpu_hmm_mirror_release - }, +static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { + .invalidate = amdgpu_mn_invalidate_gfx, }; /** - * amdgpu_mn_get - create HMM mirror context + * amdgpu_mn_invalidate_hsa - callback to notify about mm change * - * @adev: amdgpu device pointer - * @type: type of MMU notifier context + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * - * Creates a HMM mirror context for current->mm. + * We temporarily evict the BO attached to this range. This necessitates + * evicting all user-mode queues of the process. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) +static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct mm_struct *mm = current->mm; - struct amdgpu_mn *amn; - unsigned long key = AMDGPU_MN_KEY(mm, type); - int r; - - mutex_lock(&adev->mn_lock); - if (down_write_killable(&mm->mmap_sem)) { - mutex_unlock(&adev->mn_lock); - return ERR_PTR(-EINTR); - } - - hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) - goto release_locks; - - amn = kzalloc(sizeof(*amn), GFP_KERNEL); - if (!amn) { - amn = ERR_PTR(-ENOMEM); - goto release_locks; - } - - amn->adev = adev; - amn->mm = mm; - init_rwsem(&amn->lock); - amn->type = type; - amn->objects = RB_ROOT_CACHED; - - amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; - r = hmm_mirror_register(&amn->mirror, mm); - if (r) - goto free_amn; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); + if (!mmu_notifier_range_blockable(range)) + return false; -release_locks: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); + mutex_lock(&adev->notifier_lock); - return amn; + mmu_interval_set_seq(mni, cur_seq); -free_amn: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - kfree(amn); + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); + mutex_unlock(&adev->notifier_lock); - return ERR_PTR(r); + return true; } +static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { + .invalidate = amdgpu_mn_invalidate_hsa, +}; + /** * amdgpu_mn_register - register a BO for notifier updates * * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an HMM mirror for the given BO at the specified address. + * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { - unsigned long end = addr + amdgpu_bo_size(bo) - 1; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - enum amdgpu_mn_type type = - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *amn; - struct amdgpu_mn_node *node = NULL, *new_node; - struct list_head bos; - struct interval_tree_node *it; - - amn = amdgpu_mn_get(adev, type); - if (IS_ERR(amn)) - return PTR_ERR(amn); - - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); - if (!new_node) - return -ENOMEM; - - INIT_LIST_HEAD(&bos); - - down_write(&amn->lock); - - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &amn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) - node = new_node; - else - kfree(new_node); - - bo->mn = amn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); - - interval_tree_insert(&node->it, &amn->objects); - - up_write(&amn->lock); - - return 0; + if (bo->kfd_bo) + return mmu_interval_notifier_insert(&bo->notifier, current->mm, + addr, amdgpu_bo_size(bo), + &amdgpu_mn_hsa_ops); + return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_mn_gfx_ops); } /** - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates + * amdgpu_mn_unregister - unregister a BO for notifier updates * * @bo: amdgpu buffer object * - * Remove any registration of HMM mirror updates from the buffer object. + * Remove any registration of mmu notifier updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *amn; - struct list_head *head; - - mutex_lock(&adev->mn_lock); - - amn = bo->mn; - if (amn == NULL) { - mutex_unlock(&adev->mn_lock); + if (!bo->notifier.mm) return; - } - - down_write(&amn->lock); - - /* save the next list entry for later */ - head = bo->mn_list.next; - - bo->mn = NULL; - list_del_init(&bo->mn_list); - - if (list_empty(head)) { - struct amdgpu_mn_node *node; - - node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &amn->objects); - kfree(node); - } - - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); -} - -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { - (1 << 0), /* HMM_PFN_VALID */ - (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ -}; - -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ - 0, /* HMM_PFN_NONE */ - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ -}; - -void amdgpu_hmm_init_range(struct hmm_range *range) -{ - if (range) { - range->flags = hmm_range_flags; - range->values = hmm_range_values; - range->pfn_shift = PAGE_SHIFT; - } + mmu_interval_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index b8ed68943625..a292238f75eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,63 +30,10 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> -enum amdgpu_mn_type { - AMDGPU_MN_TYPE_GFX, - AMDGPU_MN_TYPE_HSA, -}; - -/** - * struct amdgpu_mn - * - * @adev: amdgpu device pointer - * @mm: process address space - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes - * @mirror: HMM mirror function support - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - struct mm_struct *mm; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - struct rb_root_cached objects; - -#ifdef CONFIG_HMM_MIRROR - /* HMM mirror */ - struct hmm_mirror mirror; -#endif -}; - #if defined(CONFIG_HMM_MIRROR) -void amdgpu_mn_lock(struct amdgpu_mn *mn); -void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); -void amdgpu_hmm_init_range(struct hmm_range *range); #else -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - return NULL; -} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7e99f6c58c48..36dec51d1ef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,9 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 @@ -101,10 +104,12 @@ struct amdgpu_bo { struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; - union { - struct list_head mn_list; - struct list_head shadow_list; - }; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_interval_notifier notifier; +#endif + + struct list_head shadow_list; struct kgd_mem *kfd_bo; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 80fc3d8ee1cf..445de594c214 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -35,6 +35,7 @@ #include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> @@ -769,6 +770,20 @@ struct amdgpu_ttm_tt { #endif }; +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + /** * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -776,85 +791,89 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - -#define MAX_RETRY_HMM_RANGE_FAULT 16 - int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct hmm_range *range; + unsigned long timeout; + struct mm_struct *mm; unsigned long i; - uint64_t *pfns; int r = 0; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; - - if (unlikely(!mirror)) { - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); - r = -EFAULT; - goto out; + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); + return -EFAULT; } - vma = find_vma(mm, start); - if (unlikely(!vma || start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vma->vm_file)) { - r = -EPERM; - goto out; - } + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; + + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ + return -ESRCH; range = kzalloc(sizeof(*range), GFP_KERNEL); if (unlikely(!range)) { r = -ENOMEM; goto out; } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) { + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { r = -ENOMEM; goto out_free_ranges; } - amdgpu_hmm_init_range(range); - range->default_flags = range->flags[HMM_PFN_VALID]; - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - range->pfn_flags_mask = 0; - range->pfns = pfns; - range->start = start; - range->end = start + ttm->num_pages * PAGE_SIZE; - - hmm_range_register(range, mirror); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out_unlock; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out_unlock; + } + up_read(&mm->mmap_sem); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - /* - * Just wait for range to be valid, safe to ignore return value as we - * will use the return value of hmm_range_fault() below under the - * mmap_sem to ascertain the validity of the range. - */ - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); +retry: + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); down_read(&mm->mmap_sem); r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); - - if (unlikely(r < 0)) + if (unlikely(r <= 0)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) + goto retry; goto out_free_pfns; + } for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_device_entry_to_page(range, pfns[i]); + /* FIXME: The pages cannot be touched outside the notifier_lock */ + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", - i, pfns[i]); + i, range->pfns[i]); r = -ENOMEM; goto out_free_pfns; @@ -862,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) } gtt->range = range; + mmput(mm); return 0; +out_unlock: + up_read(&mm->mmap_sem); out_free_pfns: - hmm_range_unregister(range); - kvfree(pfns); + kvfree(range->pfns); out_free_ranges: kfree(range); out: + mmput(mm); return r; } @@ -895,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) "No user pages to check\n"); if (gtt->range) { - r = hmm_range_valid(gtt->range); - hmm_range_unregister(gtt->range); - + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + r = mmu_interval_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); kvfree(gtt->range->pfns); kfree(gtt->range); gtt->range = NULL; } - return r; + return !r; } #endif @@ -984,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range && - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, - gtt->range->pfns[0])) - WARN_ONCE(1, "Missing get_user_page_done\n"); + if (gtt->range) { + unsigned long i; + + for (i = 0; i < ttm->num_pages; i++) { + if (ttm->pages[i] != + hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[i])) + break; + } + + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); + } #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 1dfe4a1337cf..e9822ea8bb19 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1461,7 +1461,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) static void cik_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; - int bridge_pos, gpu_pos; u32 speed_cntl, current_data_rate; int i; u16 tmp16; @@ -1496,12 +1495,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } - bridge_pos = pci_pcie_cap(root); - if (!bridge_pos) - return; - - gpu_pos = pci_pcie_cap(adev->pdev); - if (!gpu_pos) + if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) return; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { @@ -1511,14 +1505,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, + &gpu_cfg); tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, + tmp16); tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> @@ -1542,15 +1539,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) for (i = 0; i < 10; i++) { /* check status */ - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_DEVSTA, + &tmp16); if (tmp16 & PCI_EXP_DEVSTA_TRPND) break; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &gpu_cfg); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &bridge_cfg2); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &gpu_cfg2); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1563,26 +1568,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) msleep(100); /* linkctl */ - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, + tmp16); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL, + tmp16); /* linkctl2 */ - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); - - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(root, + PCI_EXP_LNKCTL2, + tmp16); + + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL2, + tmp16); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1597,15 +1621,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~0xf; + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL2_TLS; + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - tmp16 |= 3; /* gen3 */ + tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - tmp16 |= 2; /* gen2 */ + tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else - tmp16 |= 1; /* gen1 */ - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL); speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 232469507446..f5725336a5f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -219,6 +219,21 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev))); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -229,6 +244,7 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); /* Use register 17 for GART */ @@ -244,8 +260,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) { + if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); @@ -278,8 +293,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release @@ -369,6 +383,7 @@ error_alloc: static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -381,8 +396,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_sem + eng, 0x1, 0x1); @@ -398,8 +412,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 53dfc82ca171..fa025ceeea0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -416,6 +416,24 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev)) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -435,6 +453,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; u32 j, tmp; struct amdgpu_vmhub *hub; @@ -468,11 +487,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && - (!(adev->asic_type == CHIP_RAVEN && - adev->rev_id < 0x8 && - adev->pdev->device == 0x15d8))) { + if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); @@ -502,11 +517,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && - (!(adev->asic_type == CHIP_RAVEN && - adev->rev_id < 0x8 && - adev->pdev->device == 0x15d8))) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release @@ -524,6 +535,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); @@ -537,11 +549,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) && - (!(adev->asic_type == CHIP_RAVEN && - adev->rev_id < 0x8 && - adev->pdev->device == 0x15d8))) + if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_sem + eng, 0x1, 0x1); @@ -557,11 +565,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) && - (!(adev->asic_type == CHIP_RAVEN && - adev->rev_id < 0x8 && - adev->pdev->device == 0x15d8))) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index cb682d44737a..4d415bfdb42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1650,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev) static void si_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; - int bridge_pos, gpu_pos; u32 speed_cntl, current_data_rate; int i; u16 tmp16; @@ -1685,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } - bridge_pos = pci_pcie_cap(root); - if (!bridge_pos) - return; - - gpu_pos = pci_pcie_cap(adev->pdev); - if (!gpu_pos) + if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) return; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { @@ -1699,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, + &gpu_cfg); tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, + tmp16); tmp = RREG32_PCIE(PCIE_LC_STATUS1); max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; @@ -1723,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) } for (i = 0; i < 10; i++) { - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_DEVSTA, + &tmp16); if (tmp16 & PCI_EXP_DEVSTA_TRPND) break; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &gpu_cfg); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &bridge_cfg2); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &gpu_cfg2); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp |= LC_SET_QUIESCE; @@ -1743,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) mdelay(100); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, + tmp16); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &tmp16); tmp16 &= ~PCI_EXP_LNKCTL_HAWD; tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); - - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); - - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~((1 << 4) | (7 << 9)); - tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL, + tmp16); + + pcie_capability_read_word(root, PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(root, + PCI_EXP_LNKCTL2, + tmp16); + + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL2, + &tmp16); + tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN); + tmp16 |= (gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_write_word(adev->pdev, + PCI_EXP_LNKCTL2, + tmp16); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp &= ~LC_SET_QUIESCE; @@ -1774,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~0xf; + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL2_TLS; + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - tmp16 |= 3; + tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - tmp16 |= 2; + tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else - tmp16 |= 1; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index c44723c267c9..c902f26cf50d 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -234,7 +234,7 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", (uint16_t)address, numbytes); - if (drm_debug & DRM_UT_DRIVER) { + if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, 16, 1, data, numbytes, false); } @@ -388,7 +388,7 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", (uint16_t)address, bytes_received); - if (drm_debug & DRM_UT_DRIVER) { + if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, 16, 1, data, bytes_received, false); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1041f4d627a6..b6ba0697c531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd"; static const struct file_operations kfd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = kfd_ioctl, - .compat_ioctl = kfd_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = kfd_open, .mmap = kfd_mmap, }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4d4bf4fd7b6c..f2db400a3920 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5691,11 +5691,12 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, connector_type = to_drm_connector_type(link->connector_signal); - res = drm_connector_init( + res = drm_connector_init_with_ddc( dm->ddev, &aconnector->base, &amdgpu_dm_connector_funcs, - connector_type); + connector_type, + &i2c->base); if (res) { DRM_ERROR("connector_init failed\n"); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index e54f2031b617..d0714a3d63c8 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,15 +24,20 @@ # It calculates Bandwidth and Watermarks values for HW programming # -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +calcs_ccflags := -mhard-float -msse -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +calcs_ccflags += -mpreferred-stack-boundary=4 +else calcs_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index bb8a3b92d024..fd52862d6624 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -9,15 +9,20 @@ DCN20 = dcn20_resource.o dcn20_init.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o d DCN20 += dcn20_dsc.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index f8fb37278717..4763721fb1c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -5,15 +5,20 @@ DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \ dcn21_hwseq.o dcn21_link_encoder.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 32c1eedfa5e3..fb6358036be8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,15 +24,20 @@ # It provides the general basic services required by other DAL # subcomponents. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dml_ccflags := -mhard-float -msse -dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml_ccflags += -mpreferred-stack-boundary=4 +else dml_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index afc76002eacc..641ffb7cfaed 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -2,15 +2,20 @@ # # Makefile for the 'dsc' sub-component of DAL. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dsc_ccflags := -mhard-float -msse -dsc_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dsc_ccflags += -mpreferred-stack-boundary=4 +else dsc_ccflags += -msse2 endif |