summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c53
1 files changed, 47 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ac0a432a9bf7..e93850f2f3b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -31,6 +31,7 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_xgmi.h"
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -96,7 +97,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
uint64_t mem;
si_meminfo(&si);
- mem = si.totalram - si.totalhigh;
+ mem = si.freeram - si.freehigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
@@ -119,6 +120,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
*/
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+static size_t amdgpu_amdkfd_acc_size(uint64_t size)
+{
+ size >>= PAGE_SHIFT;
+ size *= sizeof(dma_addr_t) + sizeof(void *);
+
+ return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) +
+ __roundup_pow_of_two(sizeof(struct ttm_tt)) +
+ PAGE_ALIGN(size);
+}
+
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
@@ -127,8 +138,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
- acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
- sizeof(struct amdgpu_bo));
+ acc_size = amdgpu_amdkfd_acc_size(size);
vram_needed = 0;
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
@@ -175,8 +185,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
{
size_t acc_size;
- acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
- sizeof(struct amdgpu_bo));
+ acc_size = amdgpu_amdkfd_acc_size(size);
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
@@ -404,7 +413,10 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
{
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
+ bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
uint32_t mapping_flags;
+ uint64_t pte_flags;
+ bool snoop = false;
mapping_flags = AMDGPU_VM_PAGE_READABLE;
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
@@ -425,12 +437,41 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
+ case CHIP_ALDEBARAN:
+ if (coherent && uncached) {
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+ snoop = true;
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ if (bo_adev == adev) {
+ mapping_flags |= AMDGPU_VM_MTYPE_RW;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ snoop = true;
+ } else {
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ snoop = true;
+ }
+ } else {
+ snoop = true;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ /* system memory uses NC on A+A */
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ else
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
- return amdgpu_gem_va_map_flags(adev, mapping_flags);
+ pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
+ pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+
+ return pte_flags;
}
/* add_bo_to_vm - Add a BO to a VM