summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2020-07-27 09:06:18 -0400
committerAlex Deucher <alexander.deucher@amd.com>2020-08-06 15:43:08 -0400
commitb80f050ff23d48c9b191e3da7e31ee520d1b2126 (patch)
tree5a10a07aa20af5d5414d7991a7a00b5c6652e59a
parentc35376137e940c3389df2726a92649c01a9844b4 (diff)
drm/amdkfd: option to disable system mem limit
If multiple process share system memory through /dev/shm, KFD allocate memory should not fail if it reaches the system memory limit because one copy of physical system memory are shared by multiple process. Add module parameter no_system_mem_limit to provide user option to disable system memory limit check at runtime using sysfs or during driver module init using kernel boot argument. By default the system memory limit is on. Print out debug message to warn user if KFD allocate memory failed because system memory reaches limit. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c9
3 files changed, 16 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a5ea70697fba..3e82a11577d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -188,9 +188,11 @@ extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
+extern bool no_system_mem_limit;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
static const bool debug_evictions; /* = false */
+static const bool no_system_mem_limit;
#endif
extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a21cf84b882c..fcf72f337785 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -148,8 +148,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
+ kfd_mem_limit.max_system_mem_limit)
+ pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit) ||
+ kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev->kfd.vram_used + vram_needed >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6d66705a1141..5156c67ec67b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -717,6 +717,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1
bool debug_evictions;
module_param(debug_evictions, bool, 0644);
MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+
#endif
/**