summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorMukul Joshi <mukul.joshi@amd.com>2022-05-31 14:39:36 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 09:45:35 -0400
commitfe1f05df5919c67c3add49efb55e251a8d78ee4e (patch)
tree1653079f117524d35dbe49328a94cbb3e933bd5d /drivers/gpu/drm/amd/amdkfd
parent6b22ef25748fb77030bac02e2147f6d738212ac3 (diff)
drm/amdkfd: Rework kfd_locked handling
Currently, even if kfd_locked is set, a process is first created and then removed to work around a race condition in updating kfd_locked flag. Rework kfd_locked handling to ensure no processes is created if kfd_locked is set. This is achieved by updating kfd_locked under kfd_processes_mutex. With this there is no need for kfd_locked to be an atomic counter. Instead, it can be a regular integer. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c8
4 files changed, 25 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 45e8da125f70..8b9accecf49b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep)
if (IS_ERR(process))
return PTR_ERR(process);
- if (kfd_is_locked()) {
- dev_dbg(kfd_device, "kfd is locked!\n"
- "process %d unreferenced", process->pasid);
- kfd_unref_process(process);
- return -EAGAIN;
- }
-
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index df96c4c508a0..eb2b44fddf74 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -42,7 +42,7 @@
* once locked, kfd driver will stop any further GPU execution.
* create process (open) will return -EAGAIN.
*/
-static atomic_t kfd_locked = ATOMIC_INIT(0);
+static int kfd_locked;
#ifdef CONFIG_DRM_AMDGPU_CIK
extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
@@ -880,7 +880,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
return ret;
}
- atomic_dec(&kfd_locked);
+ mutex_lock(&kfd_processes_mutex);
+ --kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
@@ -893,21 +895,27 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
bool kfd_is_locked(void)
{
- return (atomic_read(&kfd_locked) > 0);
+ lockdep_assert_held(&kfd_processes_mutex);
+ return (kfd_locked > 0);
}
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
+ int count;
if (!kfd->init_complete)
return;
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
+ mutex_lock(&kfd_processes_mutex);
+ count = ++kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
+
/* For first KFD device suspend all the KFD processes */
- if (atomic_inc_return(&kfd_locked) == 1)
+ if (count == 1)
kfd_suspend_all_processes();
}
@@ -933,7 +941,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
- count = atomic_dec_return(&kfd_locked);
+ mutex_lock(&kfd_processes_mutex);
+ count = --kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
+
WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
if (count == 0)
ret = kfd_resume_all_processes();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 5cfebcc8b305..400b4dcbdf05 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -201,6 +201,8 @@ extern int amdgpu_no_queue_eviction_on_vm_fault;
/* Enable eviction debug messages */
extern bool debug_evictions;
+extern struct mutex kfd_processes_mutex;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 9b1e84d33cdc..c3d43e6e5236 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -50,7 +50,7 @@ struct mm_struct;
* Unique/indexed by mm_struct*
*/
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
-static DEFINE_MUTEX(kfd_processes_mutex);
+DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_SRCU(kfd_processes_srcu);
@@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep)
*/
mutex_lock(&kfd_processes_mutex);
+ if (kfd_is_locked()) {
+ mutex_unlock(&kfd_processes_mutex);
+ pr_debug("KFD is locked! Cannot create process");
+ return ERR_PTR(-EINVAL);
+ }
+
/* A prior open of /dev/kfd could have already created the process. */
process = find_process(thread, false);
if (process) {