summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c444
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile19
21 files changed, 402 insertions, 655 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6003f9454354..81a531b652aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -974,6 +974,8 @@ struct amdgpu_device {
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
+ struct mutex notifier_lock;
+
int asic_reset_res;
struct work_struct xgmi_reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d8db5ecdf9c1..b2487f4f271b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -518,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -1212,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
+ ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1757,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 06fb535a504f..5b330f69194b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -538,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -1212,11 +1210,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock.
- * p->mn is hold until amdgpu_cs_submit is finished and fence is added
- * to BOs.
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
*/
- amdgpu_mn_lock(p->mn);
+ mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1259,13 +1257,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e2eec7b66334..a59cd47aa6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -360,10 +360,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(gobj, flags);
- if (!IS_ERR(buf)) {
- buf->file->f_mapping = gobj->dev->anon_inode->i_mapping;
+ if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
- }
return buf;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 143753d237e7..2672dc64a310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user)
return 0;
}
-static struct fb_ops amdgpufb_ops = {
+static const struct fb_ops amdgpufb_ops = {
.owner = THIS_MODULE,
DRM_FB_HELPER_DEFAULT_OPS,
.fb_open = amdgpufb_open,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 392300f77b13..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,439 +51,107 @@
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
*
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
- struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = amn->adev;
- struct amdgpu_mn_node *node, *next_node;
- struct amdgpu_bo *bo, *next_bo;
-
- mutex_lock(&adev->mn_lock);
- down_write(&amn->lock);
- hash_del(&amn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
- &amn->objects.rb_root, it.rb) {
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-
- hmm_mirror_unregister(&amn->mirror);
- kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
- INIT_WORK(&amn->work, amdgpu_mn_destroy);
- schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
- if (mn)
- down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
- if (mn)
- up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- * @blockable: is the notifier blockable
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
- if (blockable)
- down_read(&amn->lock);
- else if (!down_read_trylock(&amn->lock))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
- up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
- unsigned long start,
- unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
-
- if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
- continue;
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- }
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
+ mutex_lock(&adev->notifier_lock);
- /* TODO we should be able to split locking for interval tree and
- * amdgpu_mn_invalidate_node
- */
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
+ mmu_interval_set_seq(mni, cur_seq);
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- amdgpu_mn_invalidate_node(node, start, end);
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
-
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
-
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
- struct amdgpu_bo *bo;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
-
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
- }
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
}
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
- [AMDGPU_MN_TYPE_GFX] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
- .release = amdgpu_hmm_mirror_release
- },
- [AMDGPU_MN_TYPE_HSA] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
- .release = amdgpu_hmm_mirror_release
- },
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+ .invalidate = amdgpu_mn_invalidate_gfx,
};
/**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
*
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct mm_struct *mm = current->mm;
- struct amdgpu_mn *amn;
- unsigned long key = AMDGPU_MN_KEY(mm, type);
- int r;
-
- mutex_lock(&adev->mn_lock);
- if (down_write_killable(&mm->mmap_sem)) {
- mutex_unlock(&adev->mn_lock);
- return ERR_PTR(-EINTR);
- }
-
- hash_for_each_possible(adev->mn_hash, amn, node, key)
- if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
- goto release_locks;
-
- amn = kzalloc(sizeof(*amn), GFP_KERNEL);
- if (!amn) {
- amn = ERR_PTR(-ENOMEM);
- goto release_locks;
- }
-
- amn->adev = adev;
- amn->mm = mm;
- init_rwsem(&amn->lock);
- amn->type = type;
- amn->objects = RB_ROOT_CACHED;
-
- amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
- r = hmm_mirror_register(&amn->mirror, mm);
- if (r)
- goto free_amn;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+ if (!mmu_notifier_range_blockable(range))
+ return false;
-release_locks:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
+ mutex_lock(&adev->notifier_lock);
- return amn;
+ mmu_interval_set_seq(mni, cur_seq);
-free_amn:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
- kfree(amn);
+ amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+ mutex_unlock(&adev->notifier_lock);
- return ERR_PTR(r);
+ return true;
}
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+ .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
/**
* amdgpu_mn_register - register a BO for notifier updates
*
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
- unsigned long end = addr + amdgpu_bo_size(bo) - 1;
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- enum amdgpu_mn_type type =
- bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *amn;
- struct amdgpu_mn_node *node = NULL, *new_node;
- struct list_head bos;
- struct interval_tree_node *it;
-
- amn = amdgpu_mn_get(adev, type);
- if (IS_ERR(amn))
- return PTR_ERR(amn);
-
- new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
- if (!new_node)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bos);
-
- down_write(&amn->lock);
-
- while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &amn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node)
- node = new_node;
- else
- kfree(new_node);
-
- bo->mn = amn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &amn->objects);
-
- up_write(&amn->lock);
-
- return 0;
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_mn_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_mn_gfx_ops);
}
/**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *amn;
- struct list_head *head;
-
- mutex_lock(&adev->mn_lock);
-
- amn = bo->mn;
- if (amn == NULL) {
- mutex_unlock(&adev->mn_lock);
+ if (!bo->notifier.mm)
return;
- }
-
- down_write(&amn->lock);
-
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
-
- if (list_empty(head)) {
- struct amdgpu_mn_node *node;
-
- node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &amn->objects);
- kfree(node);
- }
-
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
- (1 << 0), /* HMM_PFN_VALID */
- (1 << 1), /* HMM_PFN_WRITE */
- 0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
- 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
- 0, /* HMM_PFN_NONE */
- 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
- if (range) {
- range->flags = hmm_range_flags;
- range->values = hmm_range_values;
- range->pfn_shift = PAGE_SHIFT;
- }
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
-enum amdgpu_mn_type {
- AMDGPU_MN_TYPE_GFX,
- AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
-
-#ifdef CONFIG_HMM_MIRROR
- /* HMM mirror */
- struct hmm_mirror mirror;
-#endif
-};
-
#if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
#else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
-{
- return NULL;
-}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 7e99f6c58c48..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
@@ -101,10 +104,12 @@ struct amdgpu_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
- union {
- struct list_head mn_list;
- struct list_head shadow_list;
- };
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+
+ struct list_head shadow_list;
struct kgd_mem *kfd_bo;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 80fc3d8ee1cf..445de594c214 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,6 +35,7 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
@@ -769,6 +770,20 @@ struct amdgpu_ttm_tt {
#endif
};
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
/**
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
@@ -776,85 +791,89 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT 16
-
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
- struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
+ unsigned long timeout;
+ struct mm_struct *mm;
unsigned long i;
- uint64_t *pfns;
int r = 0;
- if (!mm) /* Happens during process shutdown */
- return -ESRCH;
-
- if (unlikely(!mirror)) {
- DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
- r = -EFAULT;
- goto out;
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
}
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
- r = -EFAULT;
- goto out;
- }
- if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
- vma->vm_file)) {
- r = -EPERM;
- goto out;
- }
+ /* Another get_user_pages is running at the same time?? */
+ if (WARN_ON(gtt->range))
+ return -EFAULT;
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
+ range->notifier = &bo->notifier;
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->start = bo->notifier.interval_tree.start;
+ range->end = bo->notifier.interval_tree.last + 1;
+ range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ range->default_flags |= range->flags[HMM_PFN_WRITE];
- pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+ GFP_KERNEL);
+ if (unlikely(!range->pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
- amdgpu_hmm_init_range(range);
- range->default_flags = range->flags[HMM_PFN_VALID];
- range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
- 0 : range->flags[HMM_PFN_WRITE];
- range->pfn_flags_mask = 0;
- range->pfns = pfns;
- range->start = start;
- range->end = start + ttm->num_pages * PAGE_SIZE;
-
- hmm_range_register(range, mirror);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+ up_read(&mm->mmap_sem);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- /*
- * Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_fault() below under the
- * mmap_sem to ascertain the validity of the range.
- */
- hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+retry:
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem);
r = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem);
-
- if (unlikely(r < 0))
+ if (unlikely(r <= 0)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+ goto retry;
goto out_free_pfns;
+ }
for (i = 0; i < ttm->num_pages; i++) {
- pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ /* FIXME: The pages cannot be touched outside the notifier_lock */
+ pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
if (unlikely(!pages[i])) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
- i, pfns[i]);
+ i, range->pfns[i]);
r = -ENOMEM;
goto out_free_pfns;
@@ -862,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
}
gtt->range = range;
+ mmput(mm);
return 0;
+out_unlock:
+ up_read(&mm->mmap_sem);
out_free_pfns:
- hmm_range_unregister(range);
- kvfree(pfns);
+ kvfree(range->pfns);
out_free_ranges:
kfree(range);
out:
+ mmput(mm);
return r;
}
@@ -895,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
"No user pages to check\n");
if (gtt->range) {
- r = hmm_range_valid(gtt->range);
- hmm_range_unregister(gtt->range);
-
+ /*
+ * FIXME: Must always hold notifier_lock for this, and must
+ * not ignore the return code.
+ */
+ r = mmu_interval_read_retry(gtt->range->notifier,
+ gtt->range->notifier_seq);
kvfree(gtt->range->pfns);
kfree(gtt->range);
gtt->range = NULL;
}
- return r;
+ return !r;
}
#endif
@@ -984,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range &&
- ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
- gtt->range->pfns[0]))
- WARN_ONCE(1, "Missing get_user_page_done\n");
+ if (gtt->range) {
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->pages[i] !=
+ hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[i]))
+ break;
+ }
+
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 1dfe4a1337cf..e9822ea8bb19 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1461,7 +1461,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1496,12 +1495,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1511,14 +1505,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1542,15 +1539,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1563,26 +1568,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1597,15 +1621,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 232469507446..f5725336a5f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -219,6 +219,21 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -229,6 +244,7 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
/* Use register 17 for GART */
@@ -244,8 +260,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) {
+ if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
@@ -278,8 +293,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
}
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
@@ -369,6 +383,7 @@ error_alloc:
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
@@ -381,8 +396,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
@@ -398,8 +412,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 53dfc82ca171..fa025ceeea0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -416,6 +416,24 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)) &&
+ (!(adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id < 0x8 &&
+ adev->pdev->device == 0x15d8)));
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -435,6 +453,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned eng = 17;
u32 j, tmp;
struct amdgpu_vmhub *hub;
@@ -468,11 +487,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
- (!(adev->asic_type == CHIP_RAVEN &&
- adev->rev_id < 0x8 &&
- adev->pdev->device == 0x15d8))) {
+ if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
@@ -502,11 +517,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
- (!(adev->asic_type == CHIP_RAVEN &&
- adev->rev_id < 0x8 &&
- adev->pdev->device == 0x15d8)))
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
@@ -524,6 +535,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
@@ -537,11 +549,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1) &&
- (!(adev->asic_type == CHIP_RAVEN &&
- adev->rev_id < 0x8 &&
- adev->pdev->device == 0x15d8)))
+ if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
@@ -557,11 +565,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1) &&
- (!(adev->asic_type == CHIP_RAVEN &&
- adev->rev_id < 0x8 &&
- adev->pdev->device == 0x15d8)))
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index cb682d44737a..4d415bfdb42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1650,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1685,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1699,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1723,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
}
for (i = 0; i < 10; i++) {
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -1743,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
mdelay(100);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -1774,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index c44723c267c9..c902f26cf50d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -234,7 +234,7 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
(uint16_t)address, numbytes);
- if (drm_debug & DRM_UT_DRIVER) {
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
16, 1, data, numbytes, false);
}
@@ -388,7 +388,7 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :",
(uint16_t)address, bytes_received);
- if (drm_debug & DRM_UT_DRIVER) {
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
16, 1, data, bytes_received, false);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1041f4d627a6..b6ba0697c531 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4d4bf4fd7b6c..f2db400a3920 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5691,11 +5691,12 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
connector_type = to_drm_connector_type(link->connector_signal);
- res = drm_connector_init(
+ res = drm_connector_init_with_ddc(
dm->ddev,
&aconnector->base,
&amdgpu_dm_connector_funcs,
- connector_type);
+ connector_type,
+ &i2c->base);
if (res) {
DRM_ERROR("connector_init failed\n");
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index e54f2031b617..d0714a3d63c8 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -24,15 +24,20 @@
# It calculates Bandwidth and Watermarks values for HW programming
#
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+calcs_ccflags := -mhard-float -msse
-calcs_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+calcs_ccflags += -mpreferred-stack-boundary=4
+else
calcs_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index bb8a3b92d024..fd52862d6624 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -9,15 +9,20 @@ DCN20 = dcn20_resource.o dcn20_init.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o d
DCN20 += dcn20_dsc.o
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index f8fb37278717..4763721fb1c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -5,15 +5,20 @@
DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
dcn21_hwseq.o dcn21_link_encoder.o
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 32c1eedfa5e3..fb6358036be8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -24,15 +24,20 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dml_ccflags := -mhard-float -msse
-dml_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dml_ccflags += -mpreferred-stack-boundary=4
+else
dml_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index afc76002eacc..641ffb7cfaed 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -2,15 +2,20 @@
#
# Makefile for the 'dsc' sub-component of DAL.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dsc_ccflags := -mhard-float -msse
-dsc_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dsc_ccflags += -mpreferred-stack-boundary=4
+else
dsc_ccflags += -msse2
endif