summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c251
1 files changed, 182 insertions, 69 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index d8e940f03102..b1a6eb349bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +28,11 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -39,12 +45,14 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription,
+ int xnack_conflict)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
unsigned int max_proc_per_quantum = 1;
- struct kfd_dev *dev = pm->dqm->dev;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->active_queue_count;
@@ -56,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
- if (dev->max_proc_per_quantum > 1)
- max_proc_per_quantum = dev->max_proc_per_quantum;
+ if (node->max_proc_per_quantum > 1)
+ max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
- pr_debug("Over subscribed runlist\n");
- }
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+ if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
+
+ if (*over_subscription)
+ dev_dbg(dev, "Over subscribed runlist\n");
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -80,29 +93,32 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
if (*over_subscription)
*rlib_size += pm->pmf->runlist_size;
- pr_debug("runlist ib size %d\n", *rlib_size);
+ dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
}
static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription,
+ int xnack_conflict)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval;
if (WARN_ON(pm->allocated))
return -EINVAL;
- pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
+ xnack_conflict);
mutex_lock(&pm->lock);
- retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
- &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
if (retval) {
- pr_err("Failed to allocate runlist IB\n");
+ dev_err(dev, "Failed to allocate runlist IB\n");
goto out;
}
@@ -124,32 +140,54 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
+ int xnack_enabled = -1;
+ bool xnack_conflict = 0;
rl_wptr = retval = processes_mapped = 0;
+ /* Check if processes set different xnack modes */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ if (xnack_enabled < 0)
+ /* First process */
+ xnack_enabled = qpd->pqm->process->xnack_enabled;
+ else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
+ /* Found a process with a different xnack mode */
+ xnack_conflict = 1;
+ break;
+ }
+ }
+
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
- &alloc_size_bytes, &is_over_subscription);
+ &alloc_size_bytes, &is_over_subscription,
+ xnack_conflict);
if (retval)
return retval;
*rl_size_bytes = alloc_size_bytes;
pm->ib_size_bytes = alloc_size_bytes;
- pr_debug("Building runlist ib process count: %d queues count %d\n",
+ dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->active_queue_count);
+build_runlist_ib:
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
+ /* group processes with the same xnack mode together */
+ if (qpd->pqm->process->xnack_enabled != xnack_enabled)
+ continue;
/* build map process packet */
if (processes_mapped >= pm->dqm->processes_count) {
- pr_debug("Not enough space left in runlist IB\n");
+ dev_dbg(dev, "Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
}
@@ -166,7 +204,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!kq->queue->properties.is_active)
continue;
- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -185,7 +224,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!q->properties.is_active)
continue;
- pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -201,18 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
alloc_size_bytes);
}
}
+ if (xnack_conflict) {
+ /* pick up processes with the other xnack mode */
+ xnack_enabled = !xnack_enabled;
+ xnack_conflict = 0;
+ goto build_runlist_ib;
+ }
- pr_debug("Finished map process and queues to runlist\n");
+ dev_dbg(dev, "Finished map process and queues to runlist\n");
if (is_over_subscription) {
if (!pm->is_over_subscription)
- pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
+ " xnack on/off processes mixed on gfx9" : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -223,7 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
- switch (dqm->dev->device_info->asic_family) {
+ switch (dqm->dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
/* PM4 packet structures on CIK are the same as on VI */
@@ -236,30 +291,19 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- pm->pmf = &kfd_v9_pm_funcs;
- break;
- case CHIP_ALDEBARAN:
- pm->pmf = &kfd_aldebaran_pm_funcs;
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dqm->dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
+ pm->pmf = &kfd_aldebaran_pm_funcs;
+ else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
+ pm->pmf = &kfd_v9_pm_funcs;
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->adev->asic_type);
+ return -EINVAL;
+ }
}
pm->dqm = dqm;
@@ -274,15 +318,18 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
return 0;
}
-void pm_uninit(struct packet_manager *pm, bool hanging)
+void pm_uninit(struct packet_manager *pm)
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue, hanging);
+ kernel_queue_uninit(pm->priv_queue);
+ pm->priv_queue = NULL;
}
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -292,14 +339,14 @@ int pm_send_set_resources(struct packet_manager *pm,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->set_resources(pm, buffer, res);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -336,7 +383,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_create_runlist;
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
@@ -354,6 +401,8 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -365,14 +414,14 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -381,11 +430,71 @@ out:
return retval;
}
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
+{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
+ int retval = 0;
+ uint32_t *buffer, size;
+
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
+
+ mutex_lock(&pm->lock);
+
+ if (size) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+
+ if (!buffer) {
+ dev_err(dev,
+ "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
+ if (!retval) {
+ retval = kq_submit_packet(pm->priv_queue);
+
+ /* If default value is modified, cache that in dqm->wait_times */
+ if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+ } else {
+ kq_rollback_packet(pm->priv_queue);
+ }
+ }
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
+}
+
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -394,15 +503,14 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
- reset, sdma_engine);
+ retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -444,24 +552,29 @@ out:
int pm_debugfs_hang_hws(struct packet_manager *pm)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int r = 0;
+ if (!pm->priv_queue)
+ return -EAGAIN;
+
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
r = -ENOMEM;
goto out;
}
memset(buffer, 0x55, size);
kq_submit_packet(pm->priv_queue);
- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
- buffer[0], buffer[1], buffer[2], buffer[3],
- buffer[4], buffer[5], buffer[6]);
+ dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
+ buffer[5], buffer[6]);
out:
mutex_unlock(&pm->lock);
return r;