summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c30
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c22
12 files changed, 192 insertions, 77 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 00350eccd571..9044bdb38cf4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -36,7 +36,6 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
}
static int criu_get_prime_handle(struct kgd_mem *mem,
- int flags, u32 *shared_fd)
+ int flags, u32 *shared_fd,
+ struct file **file)
{
struct dma_buf *dmabuf;
int ret;
@@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret;
}
- ret = dma_buf_fd(dmabuf, flags);
+ ret = get_unused_fd_flags(flags);
if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf;
}
*shared_fd = ret;
+ *file = dmabuf->file;
return 0;
out_free_dmabuf:
@@ -1860,6 +1861,25 @@ out_free_dmabuf:
return ret;
}
+static void commit_files(struct file **files,
+ struct kfd_criu_bo_bucket *bo_buckets,
+ unsigned int count,
+ int err)
+{
+ while (count--) {
+ struct file *file = files[count];
+
+ if (!file)
+ continue;
+ if (err) {
+ fput(file);
+ put_unused_fd(bo_buckets[count].dmabuf_fd);
+ } else {
+ fd_install(bo_buckets[count].dmabuf_fd, file);
+ }
+ }
+}
+
static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos,
uint8_t __user *user_bos,
@@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs;
+ struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id;
void *mem;
@@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
+ files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo;
@@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret)
goto exit;
} else {
@@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
exit:
- while (ret && bo_index--) {
- if (bo_buckets[bo_index].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[bo_index].dmabuf_fd);
- }
-
+ commit_files(files, bo_buckets, bo_index, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv)
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct file **file)
{
struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem;
@@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, file);
if (ret)
return ret;
} else {
@@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ struct file **files = NULL;
int ret = 0;
uint32_t i = 0;
@@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets)
return -ENOMEM;
+ files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets));
if (ret) {
@@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */
for (; i < args->num_bos; i++) {
- ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit;
@@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT;
exit:
- while (ret && i--) {
- if (bo_buckets[i].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[i].dmabuf_fd);
- }
+ commit_files(files, bo_buckets, i, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cd7b81b7b939..48caecf7e72e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
- pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
i++;
}
/* GL1 Data Cache per SA */
@@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 0;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
i++;
}
/* L3 Data Cache per GPU */
@@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 0;
i++;
}
return i;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 577d121cc6d1..648f40091aa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2407,10 +2407,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
- list_del(&q->list);
- qpd->queue_count--;
if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
+ q->properties.is_active = false;
if (!dqm->dev->kfd->shared_resources.enable_mes) {
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@@ -2421,6 +2420,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = remove_queue_mes(dqm, q, qpd);
}
}
+ list_del(&q->list);
+ qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's
@@ -3539,6 +3540,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
return debug_map_and_unlock(dqm);
}
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format)
+{
+ struct queue *q;
+ bool r = false;
+
+ if (!queue_format)
+ return r;
+
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.doorbell_off == doorbell_off) {
+ *queue_format = q->properties.format;
+ r = true;
+ goto out;
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ return r;
+}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 08b40826ad1e..09ab36f8e8c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm);
int debug_map_and_unlock(struct device_queue_manager *dqm);
int debug_refresh_runlist(struct device_queue_manager *dqm);
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index bb8cbfc39b90..37b69fe0ede3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -306,23 +306,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
- uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
- uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
- int hub_inst = 0;
struct kfd_hsa_memory_exception_data exception_data;
- /* gfxhub */
- if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
- hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
- node_id);
- if (hub_inst < 0)
- hub_inst = 0;
- }
-
- /* mmhub */
- if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
- hub_inst = node_id / 4;
-
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index fecdbbab9894..d46a13156ee9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -167,11 +167,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
block = AMDGPU_RAS_BLOCK__GFX;
- if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
- reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
- else
+ if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
break;
case SOC15_IH_CLIENTID_VMC:
case SOC15_IH_CLIENTID_VMC1:
@@ -184,11 +196,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
block = AMDGPU_RAS_BLOCK__SDMA;
- if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
- reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
- else
+ if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
break;
default:
dev_warn(dev->adev->dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
index d163d92a692f..2b72d5b4949b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -341,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9ae9abc6eb43..d6530febabad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -868,6 +868,12 @@ struct svm_range_list {
struct task_struct *faulting_task;
/* check point ts decides if page fault recovery need be dropped */
uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
+
+ /* Default granularity to use in buffer migration
+ * and restoration of backing memory while handling
+ * recoverable page faults
+ */
+ uint8_t default_granularity;
};
/* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a902950cc060..d07acf1b2f93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -270,6 +270,11 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
+ int i;
+ struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
+ u32 queue_format;
+
+ memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@@ -287,8 +292,29 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
- &max_waves_per_cu, 0);
+
+ /*
+ * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+ * For AQL queues, because of cooperative dispatch we multiply the wave count
+ * by number of XCCs in the partition to get the total wave counts across all
+ * XCCs in the partition.
+ * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+ */
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
+ &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
+ if (cu_occupancy[i].wave_cnt != 0 &&
+ kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
+ cu_occupancy[i].doorbell_off,
+ &queue_format)) {
+ if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+ wave_cnt += cu_occupancy[i].wave_cnt;
+ else
+ wave_cnt += (NUM_XCC(dev->xcc_mask) *
+ cu_occupancy[i].wave_cnt);
+ }
+ }
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 20ea745729ee..01b960b15274 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval)
goto err_destroy_queue;
- kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
@@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME)
goto err_destroy_queue;
}
+ kfd_procfs_del_queue(pqn->q);
kfd_queue_release_buffers(pdd, &pqn->q->properties);
pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
@@ -1046,6 +1046,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ea6a8e43bd5b..de8b9abf7afc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
amdgpu_reset_get_desc(reset_context, reset_cause,
sizeof(reset_cause));
- kfd_smi_event_add(0, dev, event, "%x %s\n",
- dev->reset_seq_num,
- reset_cause);
+ kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
- amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
@@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->pid)
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
- task_info->pid, task_info->task_name);
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->pid, task_info->task_name));
amdgpu_vm_put_task_info(task_info);
}
}
@@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
ktime_t ts)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
- "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
- address, node->id, write_fault ? 'W' : 'R');
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
+ address, node->id, write_fault ? 'W' : 'R'));
}
void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
- "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
- pid, address, node->id, migration ? 'M' : 'U');
+ KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
+ pid, address, node->id, migration ? 'M' : 'U'));
}
void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
@@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
- "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_START(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, prefetch_loc, preferred_loc, trigger);
+ from, to, prefetch_loc, preferred_loc, trigger));
}
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
@@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
uint32_t from, uint32_t to, uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
- "%lld -%d @%lx(%lx) %x->%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger);
+ from, to, trigger));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
- "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
- node->id, trigger);
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
+ node->id, trigger));
}
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
- node->id);
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
+ node->id, 0));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x %c\n", ktime_get_boottime_ns(),
- p->lead_thread->pid, pdd->dev->id, 'R');
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
@@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
- "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
- pid, address, last - address + 1, node->id, trigger);
+ KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
+ pid, address, last - address + 1, node->id, trigger));
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 40c94c4cdd96..04e746923697 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
}
static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
- uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
+ int32_t *prefetch_loc, uint8_t *granularity,
+ uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
- *granularity = 9;
+ *granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
@@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
+ svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -2703,9 +2704,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
- (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
- (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
+
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
@@ -3249,6 +3251,12 @@ int svm_range_list_init(struct kfd_process *p)
if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
+ /* Value of default granularity cannot exceed 0x1B, the
+ * number of pages supported by a 4-level paging table
+ */
+ svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
+ pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
+
return 0;
}
@@ -3776,7 +3784,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
- svm_range_set_default_attributes(&location, &prefetch_loc,
+ svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)