summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c152
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c150
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
13 files changed, 286 insertions, 134 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 3f0300e53727..0ec5f25adf56 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -127,6 +127,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
return PTR_ERR(process);
if (kfd_is_locked()) {
+ dev_dbg(kfd_device, "kfd is locked!\n"
+ "process %d unreferenced", process->pasid);
kfd_unref_process(process);
return -EAGAIN;
}
@@ -1167,7 +1169,7 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
if (!dev)
return -EINVAL;
- dev->kfd2kgd->get_tile_config(dev->kgd, &config);
+ amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
args->gb_addr_config = config.gb_addr_config;
args->num_banks = config.num_banks;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 2a9e40131735..d5386f15c4a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -648,6 +648,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (kfd->kfd2kgd->get_hive_id)
kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
+ if (kfd->kfd2kgd->get_unique_id)
+ kfd->unique_id = kfd->kfd2kgd->get_unique_id(kfd->kgd);
+
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
goto kfd_interrupt_error;
@@ -710,7 +713,7 @@ out:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
- kgd2kfd_suspend(kfd);
+ kgd2kfd_suspend(kfd, false);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
@@ -731,7 +734,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
kfd->dqm->ops.pre_reset(kfd->dqm);
- kgd2kfd_suspend(kfd);
+ kgd2kfd_suspend(kfd, false);
kfd_signal_reset_event(kfd);
return 0;
@@ -765,21 +768,23 @@ bool kfd_is_locked(void)
return (atomic_read(&kfd_locked) > 0);
}
-void kgd2kfd_suspend(struct kfd_dev *kfd)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
if (!kfd->init_complete)
return;
- /* For first KFD device suspend all the KFD processes */
- if (atomic_inc_return(&kfd_locked) == 1)
- kfd_suspend_all_processes();
+ /* for runtime suspend, skip locking kfd */
+ if (!run_pm) {
+ /* For first KFD device suspend all the KFD processes */
+ if (atomic_inc_return(&kfd_locked) == 1)
+ kfd_suspend_all_processes();
+ }
kfd->dqm->ops.stop(kfd->dqm);
-
kfd_iommu_suspend(kfd);
}
-int kgd2kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
int ret, count;
@@ -790,10 +795,13 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
if (ret)
return ret;
- count = atomic_dec_return(&kfd_locked);
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
- ret = kfd_resume_all_processes();
+ /* for runtime resume, skip unlocking kfd */
+ if (!run_pm) {
+ count = atomic_dec_return(&kfd_locked);
+ WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
+ if (count == 0)
+ ret = kfd_resume_all_processes();
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 80d22bf702e8..77ea0f0cb163 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -78,14 +78,14 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
/* queue is available for KFD usage if bit is 1 */
for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
if (test_bit(pipe_offset + i,
- dqm->dev->shared_resources.queue_bitmap))
+ dqm->dev->shared_resources.cp_queue_bitmap))
return true;
return false;
}
-unsigned int get_queues_num(struct device_queue_manager *dqm)
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
{
- return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+ return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
KGD_MAX_QUEUES);
}
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
return dqm->dev->device_info->num_xgmi_sdma_engines;
}
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+ return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_sdma_engines
@@ -132,6 +137,22 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
+void increment_queue_count(struct device_queue_manager *dqm,
+ enum kfd_queue_type type)
+{
+ dqm->active_queue_count++;
+ if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ dqm->active_cp_queue_count++;
+}
+
+void decrement_queue_count(struct device_queue_manager *dqm,
+ enum kfd_queue_type type)
+{
+ dqm->active_queue_count--;
+ if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ dqm->active_cp_queue_count--;
+}
+
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -281,8 +302,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
- print_queue(q);
-
dqm_lock(dqm);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -359,12 +378,7 @@ add_queue_to_list:
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active)
- dqm->queue_count++;
-
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- dqm->sdma_queue_count++;
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
- dqm->xgmi_sdma_queue_count++;
+ increment_queue_count(dqm, q->properties.type);
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -446,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
- if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- dqm->sdma_queue_count--;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- dqm->xgmi_sdma_queue_count--;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
- } else {
+ else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -494,7 +506,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
}
qpd->queue_count--;
if (q->properties.is_active)
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
return retval;
}
@@ -563,13 +575,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
/*
* check active state vs. the previous state and modify
* counter accordingly. map_queues_cpsch uses the
- * dqm->queue_count to determine whether a new runlist must be
+ * dqm->active_queue_count to determine whether a new runlist must be
* uploaded.
*/
if (q->properties.is_active && !prev_active)
- dqm->queue_count++;
+ increment_queue_count(dqm, q->properties.type);
else if (!q->properties.is_active && prev_active)
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
@@ -618,7 +630,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -662,7 +674,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = false;
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
@@ -731,7 +743,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
- dqm->queue_count++;
+ increment_queue_count(dqm, q->properties.type);
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -786,7 +798,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = true;
- dqm->queue_count++;
+ increment_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -899,16 +911,15 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock_hidden);
INIT_LIST_HEAD(&dqm->queues);
- dqm->queue_count = dqm->next_pipe_to_allocate = 0;
- dqm->sdma_queue_count = 0;
- dqm->xgmi_sdma_queue_count = 0;
+ dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->active_cp_queue_count = 0;
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
if (test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.queue_bitmap))
+ dqm->dev->shared_resources.cp_queue_bitmap))
dqm->allocated_queues[pipe] |= 1 << queue;
}
@@ -924,7 +935,7 @@ static void uninitialize(struct device_queue_manager *dqm)
{
int i;
- WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+ WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -966,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- if (dqm->sdma_bitmap == 0)
+ if (dqm->sdma_bitmap == 0) {
+ pr_err("No more SDMA queue to allocate\n");
return -ENOMEM;
+ }
+
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -976,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (dqm->xgmi_sdma_bitmap == 0)
+ if (dqm->xgmi_sdma_bitmap == 0) {
+ pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
+ }
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -1029,7 +1045,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
/ dqm->dev->shared_resources.num_pipe_per_mec;
- if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+ if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
continue;
/* only acquire queues from the first MEC */
@@ -1064,9 +1080,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock_hidden);
INIT_LIST_HEAD(&dqm->queues);
- dqm->queue_count = dqm->processes_count = 0;
- dqm->sdma_queue_count = 0;
- dqm->xgmi_sdma_queue_count = 0;
+ dqm->active_queue_count = dqm->processes_count = 0;
+ dqm->active_cp_queue_count = 0;
+
dqm->active_runlist = false;
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1158,7 +1174,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
dqm->total_queue_count);
list_add(&kq->list, &qpd->priv_queue_list);
- dqm->queue_count++;
+ increment_queue_count(dqm, kq->queue->properties.type);
qpd->is_debug = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
@@ -1172,7 +1188,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
{
dqm_lock(dqm);
list_del(&kq->list);
- dqm->queue_count--;
+ decrement_queue_count(dqm, kq->queue->properties.type);
qpd->is_debug = false;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
/*
@@ -1238,13 +1254,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- dqm->sdma_queue_count++;
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
- dqm->xgmi_sdma_queue_count++;
-
if (q->properties.is_active) {
- dqm->queue_count++;
+ increment_queue_count(dqm, q->properties.type);
+
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
}
@@ -1298,20 +1310,6 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
return 0;
}
-static int unmap_sdma_queues(struct device_queue_manager *dqm)
-{
- int i, retval = 0;
-
- for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
- dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
- retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
- if (retval)
- return retval;
- }
- return retval;
-}
-
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
@@ -1319,7 +1317,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
if (!dqm->sched_running)
return 0;
- if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
+ if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
return 0;
if (dqm->active_runlist)
return 0;
@@ -1349,12 +1347,6 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
- pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
- dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
-
- if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
- unmap_sdma_queues(dqm);
-
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
filter, filter_param, false, 0);
if (retval)
@@ -1427,18 +1419,15 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
deallocate_doorbell(qpd, q);
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- dqm->sdma_queue_count--;
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- dqm->xgmi_sdma_queue_count--;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
- }
list_del(&q->list);
qpd->queue_count--;
if (q->properties.is_active) {
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (retval == -ETIME)
@@ -1648,7 +1637,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
/* Clean all kernel queues */
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
list_del(&kq->list);
- dqm->queue_count--;
+ decrement_queue_count(dqm, kq->queue->properties.type);
qpd->is_debug = false;
dqm->total_queue_count--;
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1656,16 +1645,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
/* Clear all user mode queues */
list_for_each_entry(q, &qpd->queues_list, list) {
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- dqm->sdma_queue_count--;
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- dqm->xgmi_sdma_queue_count--;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
- }
if (q->properties.is_active)
- dqm->queue_count--;
+ decrement_queue_count(dqm, q->properties.type);
dqm->total_queue_count--;
}
@@ -1742,14 +1728,13 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
- (dev->device_info->num_sdma_engines +
- dev->device_info->num_xgmi_sdma_engines) *
+ get_num_all_sdma_engines(dqm) *
dev->device_info->num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
- (void *)&(mem_obj->cpu_ptr), true);
+ (void *)&(mem_obj->cpu_ptr), false);
return retval;
}
@@ -1979,7 +1964,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
if (!test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.queue_bitmap))
+ dqm->dev->shared_resources.cp_queue_bitmap))
continue;
r = dqm->dev->kfd2kgd->hqd_dump(
@@ -1995,8 +1980,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
}
}
- for (pipe = 0; pipe < get_num_sdma_engines(dqm) +
- get_num_xgmi_sdma_engines(dqm); pipe++) {
+ for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
queue < dqm->dev->device_info->num_sdma_queues_per_engine;
queue++) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 871d3b628d2d..50d919f814e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -180,9 +180,8 @@ struct device_queue_manager {
struct list_head queues;
unsigned int saved_flags;
unsigned int processes_count;
- unsigned int queue_count;
- unsigned int sdma_queue_count;
- unsigned int xgmi_sdma_queue_count;
+ unsigned int active_queue_count;
+ unsigned int active_cp_queue_count;
unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
@@ -219,7 +218,7 @@ void device_queue_manager_init_v10_navi10(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
-unsigned int get_queues_num(struct device_queue_manager *dqm);
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 1f8365575b12..15476fca8fa6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -187,7 +187,7 @@ static int create_signal_event(struct file *devkfd,
if (p->signal_mapped_size &&
p->signal_event_count == p->signal_mapped_size / 8) {
if (!p->signal_event_limit_reached) {
- pr_warn("Signal event wasn't created because limit was reached\n");
+ pr_debug("Signal event wasn't created because limit was reached\n");
p->signal_event_limit_reached = true;
}
return -ENOSPC;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index bb77b8890e77..78714f9a8b11 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -316,7 +316,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
{
/*
* node id couldn't be 0 - the three MSB bits of
- * aperture shoudn't be 0
+ * aperture shouldn't be 0
*/
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 436b7f518979..48cda3073b70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -87,9 +87,21 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
- /* From V9, for CWSR, the control stack is located on the next page
- * boundary after the mqd, we will use the gtt allocation function
- * instead of sub-allocation function.
+ /* For V9 only, due to a HW bug, the control stack of a user mode
+ * compute queue needs to be allocated just behind the page boundary
+ * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
+ * the first page of the buffer serves as the regular MQD buffer
+ * purpose and the remaining is for control stack. Although the two
+ * parts are in the same buffer object, they need different memory
+ * types: MQD part needs UC (uncached) as usual, while control stack
+ * needs NC (non coherent), which is different from the UC type which
+ * is used when control stack is allocated in user space.
+ *
+ * Because of all those, we use the gtt allocation function instead
+ * of sub-allocation function for this enlarged MQD buffer. Moreover,
+ * in order to achieve two memory types in a single buffer object, we
+ * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
+ * amdgpu memory functions to do so.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index dc406e6dee23..efdb75e7677b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -47,9 +47,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
struct kfd_dev *dev = pm->dqm->dev;
process_count = pm->dqm->processes_count;
- queue_count = pm->dqm->queue_count;
- compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
- pm->dqm->xgmi_sdma_queue_count;
+ queue_count = pm->dqm->active_queue_count;
+ compute_queue_count = pm->dqm->active_cp_queue_count;
/* check if there is over subscription
* Note: the arbitration between the number of VMIDs and
@@ -62,7 +61,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
max_proc_per_quantum = dev->max_proc_per_quantum;
if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_queues_num(pm->dqm)) {
+ compute_queue_count > get_cp_queues_num(pm->dqm)) {
*over_subscription = true;
pr_debug("Over subscribed runlist\n");
}
@@ -141,7 +140,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pm->ib_size_bytes = alloc_size_bytes;
pr_debug("Building runlist ib process count: %d queues count %d\n",
- pm->dqm->processes_count, pm->dqm->queue_count);
+ pm->dqm->processes_count, pm->dqm->active_queue_count);
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6af1b5881f43..4a3049841086 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -41,6 +41,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_device.h>
#include <kgd_kfd_interface.h>
+#include <linux/swap.h>
#include "amd_shared.h"
@@ -293,6 +294,9 @@ struct kfd_dev {
/* xGMI */
uint64_t hive_id;
+
+ /* UUID */
+ uint64_t unique_id;
bool pci_atomic_requested;
@@ -502,6 +506,9 @@ struct queue {
struct kfd_process *process;
struct kfd_dev *device;
void *gws;
+
+ /* procfs */
+ struct kobject kobj;
};
/*
@@ -646,6 +653,7 @@ struct kfd_process_device {
* function.
*/
bool already_dequeued;
+ bool runtime_inuse;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
@@ -729,6 +737,7 @@ struct kfd_process {
/* Kobj for our procfs */
struct kobject *kobj;
+ struct kobject *kobj_queues;
struct attribute attr_pasid;
};
@@ -835,6 +844,8 @@ extern struct device *kfd_device;
/* KFD's procfs */
void kfd_procfs_init(void);
void kfd_procfs_shutdown(void);
+int kfd_procfs_add_queue(struct queue *q);
+void kfd_procfs_del_queue(struct queue *q);
/* Topology */
int kfd_topology_init(void);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 25b90f70aecd..fe0cd49d4ea7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -31,6 +31,7 @@
#include <linux/compat.h>
#include <linux/mman.h>
#include <linux/file.h>
+#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
@@ -132,6 +133,88 @@ void kfd_procfs_shutdown(void)
}
}
+static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
+ struct attribute *attr, char *buffer)
+{
+ struct queue *q = container_of(kobj, struct queue, kobj);
+
+ if (!strcmp(attr->name, "size"))
+ return snprintf(buffer, PAGE_SIZE, "%llu",
+ q->properties.queue_size);
+ else if (!strcmp(attr->name, "type"))
+ return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
+ else if (!strcmp(attr->name, "gpuid"))
+ return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
+ else
+ pr_err("Invalid attribute");
+
+ return 0;
+}
+
+static struct attribute attr_queue_size = {
+ .name = "size",
+ .mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute attr_queue_type = {
+ .name = "type",
+ .mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute attr_queue_gpuid = {
+ .name = "gpuid",
+ .mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute *procfs_queue_attrs[] = {
+ &attr_queue_size,
+ &attr_queue_type,
+ &attr_queue_gpuid,
+ NULL
+};
+
+static const struct sysfs_ops procfs_queue_ops = {
+ .show = kfd_procfs_queue_show,
+};
+
+static struct kobj_type procfs_queue_type = {
+ .sysfs_ops = &procfs_queue_ops,
+ .default_attrs = procfs_queue_attrs,
+};
+
+int kfd_procfs_add_queue(struct queue *q)
+{
+ struct kfd_process *proc;
+ int ret;
+
+ if (!q || !q->process)
+ return -EINVAL;
+ proc = q->process;
+
+ /* Create proc/<pid>/queues/<queue id> folder */
+ if (!proc->kobj_queues)
+ return -EFAULT;
+ ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
+ proc->kobj_queues, "%u", q->properties.queue_id);
+ if (ret < 0) {
+ pr_warn("Creating proc/<pid>/queues/%u failed",
+ q->properties.queue_id);
+ kobject_put(&q->kobj);
+ return ret;
+ }
+
+ return 0;
+}
+
+void kfd_procfs_del_queue(struct queue *q)
+{
+ if (!q)
+ return;
+
+ kobject_del(&q->kobj);
+ kobject_put(&q->kobj);
+}
+
int kfd_process_create_wq(void)
{
if (!kfd_process_wq)
@@ -244,10 +327,10 @@ err_alloc_mem:
static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
{
struct qcm_process_device *qpd = &pdd->qpd;
- uint32_t flags = ALLOC_MEM_FLAGS_GTT |
- ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
- ALLOC_MEM_FLAGS_WRITABLE |
- ALLOC_MEM_FLAGS_EXECUTABLE;
+ uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
+ KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
+ KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
void *kaddr;
int ret;
@@ -323,6 +406,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (ret)
pr_warn("Creating pasid for pid %d failed",
(int)process->lead_thread->pid);
+
+ process->kobj_queues = kobject_create_and_add("queues",
+ process->kobj);
+ if (!process->kobj_queues)
+ pr_warn("Creating KFD proc/queues folder failed");
}
out:
if (!IS_ERR(process))
@@ -440,6 +528,16 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
+ /*
+ * before destroying pdd, make sure to report availability
+ * for auto suspend
+ */
+ if (pdd->runtime_inuse) {
+ pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
+ pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
+ pdd->runtime_inuse = false;
+ }
+
kfree(pdd);
}
}
@@ -457,6 +555,9 @@ static void kfd_process_wq_release(struct work_struct *work)
/* Remove the procfs files */
if (p->kobj) {
sysfs_remove_file(p->kobj, &p->attr_pasid);
+ kobject_del(p->kobj_queues);
+ kobject_put(p->kobj_queues);
+ p->kobj_queues = NULL;
kobject_del(p->kobj);
kobject_put(p->kobj);
p->kobj = NULL;
@@ -540,6 +641,11 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
+ /* Signal the eviction fence after user mode queues are
+ * destroyed. This allows any BOs to be freed without
+ * triggering pointless evictions or waiting for fences.
+ */
+ dma_fence_signal(p->ef);
mutex_unlock(&p->mutex);
@@ -591,8 +697,9 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct qcm_process_device *qpd = &pdd->qpd;
- uint32_t flags = ALLOC_MEM_FLAGS_GTT |
- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
+ uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
+ | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
void *kaddr;
int ret;
@@ -754,6 +861,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->process = p;
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
+ pdd->runtime_inuse = false;
list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */
@@ -843,15 +951,41 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
return ERR_PTR(-ENOMEM);
}
+ /*
+ * signal runtime-pm system to auto resume and prevent
+ * further runtime suspend once device pdd is created until
+ * pdd is destroyed.
+ */
+ if (!pdd->runtime_inuse) {
+ err = pm_runtime_get_sync(dev->ddev->dev);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
err = kfd_iommu_bind_process_to_device(pdd);
if (err)
- return ERR_PTR(err);
+ goto out;
err = kfd_process_device_init_vm(pdd, NULL);
if (err)
- return ERR_PTR(err);
+ goto out;
+
+ /*
+ * make sure that runtime_usage counter is incremented just once
+ * per pdd
+ */
+ pdd->runtime_inuse = true;
return pdd;
+
+out:
+ /* balance runpm reference count and exit with error */
+ if (!pdd->runtime_inuse) {
+ pm_runtime_mark_last_busy(dev->ddev->dev);
+ pm_runtime_put_autosuspend(dev->ddev->dev);
+ }
+
+ return ERR_PTR(err);
}
struct kfd_process_device *kfd_get_first_process_device_data(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 31fcd1b51f00..084c35f55d59 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -241,23 +241,18 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
- if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
- >= get_num_sdma_queues(dev->dqm)) ||
- (type == KFD_QUEUE_TYPE_SDMA_XGMI &&
- dev->dqm->xgmi_sdma_queue_count
- >= get_num_xgmi_sdma_queues(dev->dqm))) {
- pr_debug("Over-subscription is not allowed for SDMA.\n");
- retval = -EPERM;
- goto err_create_queue;
- }
-
+ /* SDMA queues are always allocated statically no matter
+ * which scheduler mode is used. We also do not need to
+ * check whether a SDMA queue can be allocated here, because
+ * allocate_sdma_queue() in create_queue() has the
+ * corresponding check logic.
+ */
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
- pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
@@ -266,7 +261,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if ((dev->dqm->sched_policy ==
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
- (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
+ (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
retval = -EPERM;
goto err_create_queue;
@@ -278,7 +273,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
- pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -299,7 +293,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
+ pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
pqm->process->pasid, type, retval);
goto err_create_queue;
}
@@ -322,12 +316,16 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (q) {
pr_debug("PQM done creating queue\n");
+ kfd_procfs_add_queue(q);
print_queue_properties(&q->properties);
}
return retval;
err_create_queue:
+ uninit_queue(q);
+ if (kq)
+ kernel_queue_uninit(kq, false);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -378,6 +376,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
}
if (pqn->q) {
+ kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 203c823d65f1..aa0bfa78a667 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -490,6 +490,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.num_sdma_queues_per_engine);
sysfs_show_32bit_prop(buffer, "num_cp_queues",
dev->node_props.num_cp_queues);
+ sysfs_show_64bit_prop(buffer, "unique_id",
+ dev->node_props.unique_id);
if (dev->gpu) {
log_max_watch_addr =
@@ -1318,7 +1320,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_gws = (hws_gws_support &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
- dev->node_props.num_cp_queues = get_queues_num(dev->gpu->dqm);
+ dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
+ dev->node_props.unique_id = gpu->unique_id;
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 74e9b1682af8..46eeecaf1b68 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -54,6 +54,7 @@
struct kfd_node_properties {
uint64_t hive_id;
+ uint64_t unique_id;
uint32_t cpu_cores_count;
uint32_t simd_count;
uint32_t mem_banks_count;