diff options
author | Daniel Phillips <Daniel.Phillips@amd.com> | 2022-05-30 11:21:22 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2022-06-14 21:38:40 -0400 |
commit | 9731dd4cadc53251ef80b3655c8d841fed52fa3d (patch) | |
tree | d2385079c1d0da89501f0e8cc3aacb33244d9479 /drivers/gpu | |
parent | 1a65327a84db5b9081a51ccb1c562083f59bfcec (diff) |
drm/amdkfd: Add available memory ioctl
Add a new KFD ioctl to return the largest possible memory size that
can be allocated as a buffer object using
kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
accept/reject criteria as that function so that allocating a new
buffer object of the size returned by this new ioctl is guaranteed to
succeed, barring races with other allocating tasks.
This IOCTL will be used by libhsakmt:
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
Signed-off-by: Daniel Phillips <Daniel.Phillips@amd.com>
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 |
3 files changed, 69 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ffb2b7d9b9a5..648c031942e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f386b0d256d2..8805bd1eed37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -38,6 +38,12 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -1633,6 +1647,22 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1c7016958d6d..3bcf9bf29acb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1; static struct class *kfd_class; struct device *kfd_device; +static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) +{ + struct kfd_process_device *pdd; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, gpu_id); + + if (pdd) + return pdd; + + mutex_unlock(&p->mutex); + return NULL; +} + +static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) +{ + mutex_unlock(&pdd->process->mutex); +} + int kfd_chardev_init(void) { int err = 0; @@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) return false; } +static int kfd_ioctl_get_available_memory(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); + + if (!pdd) + return -EINVAL; + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev); + kfd_unlock_pdd(pdd); + return 0; +} + static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -2648,6 +2680,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) |