summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c189
1 files changed, 178 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index fe1a39ffda72..4d32233cde92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -32,6 +32,59 @@
#include "amdgpu_xgmi.h"
/**
+ * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for pdb0 and map it for CPU access
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
+{
+ int r;
+ struct amdgpu_bo_param bp;
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
+ uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = PAGE_ALIGN((npdes + 1) * 8);
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
+ if (unlikely(r != 0))
+ goto bo_reserve_failure;
+
+ r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto bo_pin_failure;
+ r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
+ if (r)
+ goto bo_kmap_failure;
+
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+ return 0;
+
+bo_kmap_failure:
+ amdgpu_bo_unpin(adev->gmc.pdb0_bo);
+bo_pin_failure:
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+bo_reserve_failure:
+ amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+ return r;
+}
+
+/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
*
* @bo: the BO to get the PDE for
@@ -158,6 +211,39 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
mc->vram_end, mc->real_vram_size >> 20);
}
+/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * This function is only used if use GART for FB translation. In such
+ * case, we use sysvm aperture (vmid0 page tables) for both vram
+ * and gart (aka system memory) access.
+ *
+ * GPUVM (and our organization of vmid0 page tables) require sysvm
+ * aperture to be placed at a location aligned with 8 times of native
+ * page size. For example, if vm_context0_cntl.page_table_block_size
+ * is 12, then native page size is 8G (2M*2^12), sysvm should start
+ * with a 64G aligned address. For simplicity, we just put sysvm at
+ * address 0. So vram start at address 0 and gart is right after vram.
+ */
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ u64 hive_vram_start = 0;
+ u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
+ mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
+ mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+ mc->gart_start = hive_vram_end + 1;
+ mc->gart_end = mc->gart_start + mc->gart_size - 1;
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+ mc->mc_vram_size >> 20, mc->vram_start,
+ mc->vram_end, mc->real_vram_size >> 20);
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+ mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
/**
* amdgpu_gmc_gart_location - try to find GART location
*
@@ -165,7 +251,6 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
* @mc: memory controller structure holding memory information
*
* Function will place try to place GART before or after VRAM.
- *
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
@@ -176,8 +261,6 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
- mc->gart_size += adev->pm.smu_prv_buffer_size;
-
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
@@ -308,26 +391,46 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
int r;
- if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
- r = adev->umc.funcs->ras_late_init(adev);
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->ras_late_init) {
+ r = adev->umc.ras_funcs->ras_late_init(adev);
if (r)
return r;
}
- if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
- r = adev->mmhub.funcs->ras_late_init(adev);
+ if (adev->mmhub.ras_funcs &&
+ adev->mmhub.ras_funcs->ras_late_init) {
+ r = adev->mmhub.ras_funcs->ras_late_init(adev);
if (r)
return r;
}
- return amdgpu_xgmi_ras_late_init(adev);
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
+
+ if (adev->gmc.xgmi.ras_funcs &&
+ adev->gmc.xgmi.ras_funcs->ras_late_init) {
+ r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
- amdgpu_umc_ras_fini(adev);
- amdgpu_mmhub_ras_fini(adev);
- amdgpu_xgmi_ras_fini(adev);
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->ras_fini)
+ adev->umc.ras_funcs->ras_fini(adev);
+
+ if (adev->mmhub.ras_funcs &&
+ adev->mmhub.ras_funcs->ras_fini)
+ amdgpu_mmhub_ras_fini(adev);
+
+ if (adev->gmc.xgmi.ras_funcs &&
+ adev->gmc.xgmi.ras_funcs->ras_fini)
+ adev->gmc.xgmi.ras_funcs->ras_fini(adev);
}
/*
@@ -384,6 +487,16 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_RAVEN:
+ if (amdgpu_tmz == 0) {
+ adev->gmc.tmz_enabled = false;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
+ } else {
+ adev->gmc.tmz_enabled = true;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature enabled\n");
+ }
+ break;
case CHIP_RENOIR:
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -423,6 +536,8 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
/*
* noretry = 0 will cause kfd page fault tests fail
* for some ASICs, so set default to 1 for these ASICs.
@@ -518,3 +633,55 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
adev->mman.stolen_extended_size = 0;
}
}
+
+/**
+ * amdgpu_gmc_init_pdb0 - initialize PDB0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This function is only used when GART page table is used
+ * for FB address translatioin. In such a case, we construct
+ * a 2-level system VM page table: PDB0->PTB, to cover both
+ * VRAM of the hive and system memory.
+ *
+ * PDB0 is static, initialized once on driver initialization.
+ * The first n entries of PDB0 are used as PTE by setting
+ * P bit to 1, pointing to VRAM. The n+1'th entry points
+ * to a big PTB covering system memory.
+ *
+ */
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
+{
+ int i;
+ uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
+ /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
+ */
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
+ u64 vram_addr = adev->vm_manager.vram_base_offset -
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ u64 vram_end = vram_addr + vram_size;
+ u64 gart_ptb_gpu_pa = amdgpu_bo_gpu_offset(adev->gart.bo) +
+ adev->vm_manager.vram_base_offset - adev->gmc.vram_start;
+
+ flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+ flags |= AMDGPU_PTE_WRITEABLE;
+ flags |= AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
+ flags |= AMDGPU_PDE_PTE;
+
+ /* The first n PDE0 entries are used as PTE,
+ * pointing to vram
+ */
+ for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
+
+ /* The n+1'th PDE0 entry points to a huge
+ * PTB who has more than 512 entries each
+ * pointing to a 4K system page
+ */
+ flags = AMDGPU_PTE_VALID;
+ flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
+ /* Requires gart_ptb_gpu_pa to be 4K aligned */
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+}