diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 215 |
1 files changed, 169 insertions, 46 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 1439420925a0..b1a6eb349bb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,6 +28,11 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) +#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) +#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) +#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3) + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) { @@ -39,12 +45,14 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - bool *over_subscription) + int *over_subscription, + int xnack_conflict) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; unsigned int max_proc_per_quantum = 1; - struct kfd_dev *dev = pm->dqm->dev; + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; process_count = pm->dqm->processes_count; queue_count = pm->dqm->active_queue_count; @@ -56,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; + *over_subscription = 0; - if (dev->max_proc_per_quantum > 1) - max_proc_per_quantum = dev->max_proc_per_quantum; + if (node->max_proc_per_quantum > 1) + max_proc_per_quantum = node->max_proc_per_quantum; - if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm) || - gws_queue_count > 1) { - *over_subscription = true; - pr_debug("Over subscribed runlist\n"); - } + if (process_count > max_proc_per_quantum) + *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT; + if (compute_queue_count > get_cp_queues_num(pm->dqm)) + *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; + if (gws_queue_count > 1) + *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) + *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT; + + if (*over_subscription) + dev_dbg(dev, "Over subscribed runlist\n"); map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ @@ -80,29 +93,32 @@ static void pm_calc_rlib_size(struct packet_manager *pm, if (*over_subscription) *rlib_size += pm->pmf->runlist_size; - pr_debug("runlist ib size %d\n", *rlib_size); + dev_dbg(dev, "runlist ib size %d\n", *rlib_size); } static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - bool *is_over_subscription) + int *is_over_subscription, + int xnack_conflict) { + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; int retval; if (WARN_ON(pm->allocated)) return -EINVAL; - pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription, + xnack_conflict); mutex_lock(&pm->lock); - retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, - &pm->ib_buffer_obj); + retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj); if (retval) { - pr_err("Failed to allocate runlist IB\n"); + dev_err(dev, "Failed to allocate runlist IB\n"); goto out; } @@ -124,32 +140,54 @@ static int pm_create_runlist_ib(struct packet_manager *pm, { unsigned int alloc_size_bytes; unsigned int *rl_buffer, rl_wptr, i; + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; int retval, processes_mapped; struct device_process_node *cur; struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription; + int is_over_subscription; + int xnack_enabled = -1; + bool xnack_conflict = 0; rl_wptr = retval = processes_mapped = 0; + /* Check if processes set different xnack modes */ + list_for_each_entry(cur, queues, list) { + qpd = cur->qpd; + if (xnack_enabled < 0) + /* First process */ + xnack_enabled = qpd->pqm->process->xnack_enabled; + else if (qpd->pqm->process->xnack_enabled != xnack_enabled) { + /* Found a process with a different xnack mode */ + xnack_conflict = 1; + break; + } + } + retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, - &alloc_size_bytes, &is_over_subscription); + &alloc_size_bytes, &is_over_subscription, + xnack_conflict); if (retval) return retval; *rl_size_bytes = alloc_size_bytes; pm->ib_size_bytes = alloc_size_bytes; - pr_debug("Building runlist ib process count: %d queues count %d\n", + dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->active_queue_count); +build_runlist_ib: /* build the run list ib packet */ list_for_each_entry(cur, queues, list) { qpd = cur->qpd; + /* group processes with the same xnack mode together */ + if (qpd->pqm->process->xnack_enabled != xnack_enabled) + continue; /* build map process packet */ if (processes_mapped >= pm->dqm->processes_count) { - pr_debug("Not enough space left in runlist IB\n"); + dev_dbg(dev, "Not enough space left in runlist IB\n"); pm_release_ib(pm); return -ENOMEM; } @@ -166,7 +204,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (!kq->queue->properties.is_active) continue; - pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", + dev_dbg(dev, + "static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); retval = pm->pmf->map_queues(pm, @@ -185,7 +224,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (!q->properties.is_active) continue; - pr_debug("static_queue, mapping user queue %d, is debug status %d\n", + dev_dbg(dev, + "static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); retval = pm->pmf->map_queues(pm, @@ -201,18 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm, alloc_size_bytes); } } + if (xnack_conflict) { + /* pick up processes with the other xnack mode */ + xnack_enabled = !xnack_enabled; + xnack_conflict = 0; + goto build_runlist_ib; + } - pr_debug("Finished map process and queues to runlist\n"); + dev_dbg(dev, "Finished map process and queues to runlist\n"); if (is_over_subscription) { if (!pm->is_over_subscription) - pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); + dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n", + is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? + " too many processes" : "", + is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? + " too many queues" : "", + is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? + " multiple processes using cooperative launch" : "", + is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ? + " xnack on/off processes mixed on gfx9" : ""); + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); } - pm->is_over_subscription = is_over_subscription; + pm->is_over_subscription = !!is_over_subscription; for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); @@ -237,7 +292,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) pm->pmf = &kfd_vi_pm_funcs; break; default: - if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0)) pm->pmf = &kfd_aldebaran_pm_funcs; else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) pm->pmf = &kfd_v9_pm_funcs; @@ -260,16 +318,18 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) return 0; } -void pm_uninit(struct packet_manager *pm, bool hanging) +void pm_uninit(struct packet_manager *pm) { mutex_destroy(&pm->lock); - kernel_queue_uninit(pm->priv_queue, hanging); + kernel_queue_uninit(pm->priv_queue); pm->priv_queue = NULL; } int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; uint32_t *buffer, size; int retval = 0; @@ -279,14 +339,14 @@ int pm_send_set_resources(struct packet_manager *pm, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { - pr_err("Failed to allocate buffer on kernel queue\n"); + dev_err(dev, "Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } retval = pm->pmf->set_resources(pm, buffer, res); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -323,7 +383,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_create_runlist; - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); @@ -341,6 +401,8 @@ fail_create_runlist_ib: int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint64_t fence_value) { + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; uint32_t *buffer, size; int retval = 0; @@ -352,14 +414,14 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { - pr_err("Failed to allocate buffer on kernel queue\n"); + dev_err(dev, "Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -368,11 +430,71 @@ out: return retval; } -int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts + * by writing to CP_IQ_WAIT_TIME2 registers. + * + * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition + * @value: Depends on the cmd. This parameter is unused for + * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For + * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set + * + */ +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) +{ + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; + int retval = 0; + uint32_t *buffer, size; + + if (!pm->pmf->config_dequeue_wait_counts || + !pm->pmf->config_dequeue_wait_counts_size) + return 0; + + if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) + return 0; + + size = pm->pmf->config_dequeue_wait_counts_size; + + mutex_lock(&pm->lock); + + if (size) { + kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), + (unsigned int **)&buffer); + + if (!buffer) { + dev_err(dev, + "Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } + + retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, + cmd, value); + if (!retval) { + retval = kq_submit_packet(pm->priv_queue); + + /* If default value is modified, cache that in dqm->wait_times */ + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + } else { + kq_rollback_packet(pm->priv_queue); + } + } +out: + mutex_unlock(&pm->lock); + return retval; +} + +int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset, - unsigned int sdma_engine) + uint32_t filter_param, bool reset) { + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; uint32_t *buffer, size; int retval = 0; @@ -381,15 +503,14 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { - pr_err("Failed to allocate buffer on kernel queue\n"); + dev_err(dev, "Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } - retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, - reset, sdma_engine); + retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -431,6 +552,8 @@ out: int pm_debugfs_hang_hws(struct packet_manager *pm) { + struct kfd_node *node = pm->dqm->dev; + struct device *dev = node->adev->dev; uint32_t *buffer, size; int r = 0; @@ -442,16 +565,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { - pr_err("Failed to allocate buffer on kernel queue\n"); + dev_err(dev, "Failed to allocate buffer on kernel queue\n"); r = -ENOMEM; goto out; } memset(buffer, 0x55, size); kq_submit_packet(pm->priv_queue); - pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", - buffer[0], buffer[1], buffer[2], buffer[3], - buffer[4], buffer[5], buffer[6]); + dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", + buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], + buffer[5], buffer[6]); out: mutex_unlock(&pm->lock); return r; |
