summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c123
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c6
12 files changed, 141 insertions, 67 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 62e88e5362e9..16e12c9913f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT))
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT) || (LOONGARCH && 64BIT))
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a2149afa5803..828a9ceef1e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -22,7 +22,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/file.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index bf0854bd5555..7e749f9b6d69 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -971,7 +971,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd,
kfd_smi_event_update_gpu_reset(node, false, reset_context);
}
- kgd2kfd_suspend(kfd, false);
+ kgd2kfd_suspend(kfd, true);
for (i = 0; i < kfd->num_nodes; i++)
kfd_signal_reset_event(kfd->nodes[i]);
@@ -1013,13 +1013,33 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
return 0;
}
-bool kfd_is_locked(void)
+bool kfd_is_locked(struct kfd_dev *kfd)
{
+ uint8_t id = 0;
+ struct kfd_node *dev;
+
lockdep_assert_held(&kfd_processes_mutex);
- return (kfd_locked > 0);
+
+ /* check reset/suspend lock */
+ if (kfd_locked > 0)
+ return true;
+
+ if (kfd)
+ return kfd->kfd_dev_lock > 0;
+
+ /* check lock on all cgroup accessible devices */
+ while (kfd_topology_enum_kfd_devices(id++, &dev) == 0) {
+ if (!dev || kfd_devcgroup_check_permission(dev))
+ continue;
+
+ if (dev->kfd->kfd_dev_lock > 0)
+ return true;
+ }
+
+ return false;
}
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
{
struct kfd_node *node;
int i;
@@ -1027,14 +1047,8 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
if (!kfd->init_complete)
return;
- /* for runtime suspend, skip locking kfd */
- if (!run_pm) {
- mutex_lock(&kfd_processes_mutex);
- /* For first KFD device suspend all the KFD processes */
- if (++kfd_locked == 1)
- kfd_suspend_all_processes();
- mutex_unlock(&kfd_processes_mutex);
- }
+ if (suspend_proc)
+ kgd2kfd_suspend_process(kfd);
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
@@ -1042,7 +1056,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
}
}
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
{
int ret, i;
@@ -1055,14 +1069,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return ret;
}
- /* for runtime resume, skip unlocking kfd */
- if (!run_pm) {
- mutex_lock(&kfd_processes_mutex);
- if (--kfd_locked == 0)
- ret = kfd_resume_all_processes();
- WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
- mutex_unlock(&kfd_processes_mutex);
- }
+ if (resume_proc)
+ ret = kgd2kfd_resume_process(kfd);
+
+ return ret;
+}
+
+void kgd2kfd_suspend_process(struct kfd_dev *kfd)
+{
+ if (!kfd->init_complete)
+ return;
+
+ mutex_lock(&kfd_processes_mutex);
+ /* For first KFD device suspend all the KFD processes */
+ if (++kfd_locked == 1)
+ kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
+}
+
+int kgd2kfd_resume_process(struct kfd_dev *kfd)
+{
+ int ret = 0;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ mutex_lock(&kfd_processes_mutex);
+ if (--kfd_locked == 0)
+ ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
return ret;
}
@@ -1442,24 +1478,53 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
kfd_get_num_sdma_engines(node);
}
-int kgd2kfd_check_and_lock_kfd(void)
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
{
+ struct kfd_process *p;
+ int r = 0, temp, idx;
+
mutex_lock(&kfd_processes_mutex);
- if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
- return -EBUSY;
+
+ if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
+ goto out;
+
+ /* fail under system reset/resume or kfd device is partition switching. */
+ if (kfd_is_locked(kfd)) {
+ r = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * ensure all running processes are cgroup excluded from device before mode switch.
+ * i.e. no pdd was created on the process socket.
+ */
+ idx = srcu_read_lock(&kfd_processes_srcu);
+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->dev->kfd != kfd)
+ continue;
+
+ r = -EBUSY;
+ goto proc_check_unlock;
+ }
}
- ++kfd_locked;
+proc_check_unlock:
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+out:
+ if (!r)
+ ++kfd->kfd_dev_lock;
mutex_unlock(&kfd_processes_mutex);
- return 0;
+ return r;
}
-void kgd2kfd_unlock_kfd(void)
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
{
mutex_lock(&kfd_processes_mutex);
- --kfd_locked;
+ --kfd->kfd_dev_lock;
mutex_unlock(&kfd_processes_mutex);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 76359c6a3f3a..2d91027e2a74 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2312,7 +2312,7 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
continue;
/* Reset engine and check. */
- if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) ||
+ if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) ||
dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
!set_sdma_queue_as_reset(dqm, doorbell_off)) {
r = -ENOTRECOVERABLE;
@@ -2339,9 +2339,18 @@ reset_fail:
static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma)
{
+ struct amdgpu_device *adev = dqm->dev->adev;
+
while (halt_if_hws_hang)
schedule();
+ if (adev->debug_disable_gpu_ring_reset) {
+ dev_info_once(adev->dev,
+ "%s queue hung, but ring reset disabled",
+ is_sdma ? "sdma" : "compute");
+
+ return -EPERM;
+ }
if (!amdgpu_gpu_recovery)
return -ENOTRECOVERABLE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 2b294ada3ec0..82905f3e54dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1302,7 +1302,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
if (ti) {
dev_err(dev->adev->dev,
"Queues reset on process %s tid %d thread %s pid %d\n",
- ti->process_name, ti->tgid, ti->task_name, ti->pid);
+ ti->process_name, ti->tgid, ti->task.comm, ti->task.pid);
amdgpu_vm_put_task_info(ti);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index dbcb60eb54b2..1d170dc50df3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -23,7 +23,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/sched.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8fa6489b6f5d..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d221c58dccc3..67694bcd9464 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -372,6 +372,9 @@ struct kfd_dev {
/* bitmap for dynamic doorbell allocation from doorbell object */
unsigned long *doorbell_bitmap;
+
+ /* for dynamic partitioning */
+ int kfd_dev_lock;
};
enum kfd_mempool {
@@ -1536,7 +1539,7 @@ static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
int kfd_send_exception_to_runtime(struct kfd_process *p,
unsigned int queue_id,
uint64_t error_reason);
-bool kfd_is_locked(void);
+bool kfd_is_locked(struct kfd_dev *kfd);
/* Compute profile */
void kfd_inc_compute_active(struct kfd_node *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 722ac1662bdc..5be28c6c4f6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -854,7 +854,7 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
*/
mutex_lock(&kfd_processes_mutex);
- if (kfd_is_locked()) {
+ if (kfd_is_locked(NULL)) {
pr_debug("KFD is locked! Cannot create process");
process = ERR_PTR(-EINVAL);
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 83d9384ac815..a499449fcb06 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -253,9 +253,9 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
- if (task_info->pid)
+ if (task_info->task.pid)
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
- task_info->pid, task_info->task_name));
+ task_info->task.pid, task_info->task.comm));
amdgpu_vm_put_task_info(task_info);
}
}
@@ -359,8 +359,8 @@ void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
kfd_smi_event_add(0, pdd->dev,
start ? KFD_SMI_EVENT_PROCESS_START :
KFD_SMI_EVENT_PROCESS_END,
- KFD_EVENT_FMT_PROCESS(task_info->pid,
- task_info->task_name));
+ KFD_EVENT_FMT_PROCESS(task_info->task.pid,
+ task_info->task.comm));
amdgpu_vm_put_task_info(task_info);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 865dca2547de..a0f22ea6d15a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start,
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
@@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- if (gc_ip_version < IP_VERSION(9, 5, 0))
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
@@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
-
- /* Pairs with mmput in deferred_list_work */
- mmget(mm);
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l, trigger);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
- if (!mmget_not_zero(mni->mm))
- return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
@@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
}
svm_range_unlock(prange);
- mmput(mni->mm);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index baa2374acdeb..4ec73f33535e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
@@ -2008,8 +2012,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (!amdgpu_sriov_vf(dev->gpu->adev))
dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
- if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
- dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;