diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 385 |
1 files changed, 243 insertions, 142 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e785f128411d..2b931e855abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -61,7 +61,7 @@ #include "amdgpu_res_cursor.h" #include "bif/bif_4_1_d.h" -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); #define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) @@ -123,6 +123,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GWS: case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: placement->num_placement = 0; return; @@ -187,7 +188,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct amdgpu_job *job; void *cpu_addr; uint64_t flags; - unsigned int i; int r; BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < @@ -226,7 +226,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER); if (r) return r; @@ -253,16 +254,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT]; amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr); } else { - dma_addr_t dma_address; - - dma_address = mm_cur->start; - dma_address += adev->vm_manager.vram_base_offset; + u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset; - for (i = 0; i < num_pages; ++i) { - amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address, - flags, cpu_addr); - dma_address += PAGE_SIZE; - } + amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); } dma_fence_put(amdgpu_job_submit(job)); @@ -284,22 +278,24 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, * move and different for a BO to BO copy. * */ -int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - const struct amdgpu_copy_mem *src, - const struct amdgpu_copy_mem *dst, - uint64_t size, bool tmz, - struct dma_resv *resv, - struct dma_fence **f) +__attribute__((nonnull)) +static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, + uint64_t size, bool tmz, + struct dma_resv *resv, + struct dma_fence **f) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor src_mm, dst_mm; struct dma_fence *fence = NULL; int r = 0; - uint32_t copy_flags = 0; + struct amdgpu_bo *abo_src, *abo_dst; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -308,7 +304,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { - uint64_t from, to, cur_size; + uint64_t from, to, cur_size, tiling_flags; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -325,8 +322,28 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (r) goto error; + abo_src = ttm_to_amdgpu_bo(src->bo); + abo_dst = ttm_to_amdgpu_bo(dst->bo); if (tmz) copy_flags |= AMDGPU_COPY_FLAGS_TMZ; + if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (abo_src->tbo.resource->mem_type == TTM_PL_VRAM)) + copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED; + if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (dst->mem->mem_type == TTM_PL_VRAM)) { + copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED; + amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags); + max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); + num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); + data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); + copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | + AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); + } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, &next, false, true, copy_flags); @@ -341,9 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } error: mutex_unlock(&adev->mman.gtt_window_lock); - if (f) - *f = dma_fence_get(fence); - dma_fence_put(fence); + *f = fence; return r; } @@ -384,7 +399,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, - false); + false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; } else if (wipe_fence) { @@ -425,7 +440,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, return false; if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || - res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL || + res->mem_type == AMDGPU_PL_MMIO_REMAP) return true; if (res->mem_type != TTM_PL_VRAM) @@ -516,10 +532,12 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || old_mem->mem_type == AMDGPU_PL_DOORBELL || + old_mem->mem_type == AMDGPU_PL_MMIO_REMAP || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || new_mem->mem_type == AMDGPU_PL_OA || - new_mem->mem_type == AMDGPU_PL_DOORBELL) { + new_mem->mem_type == AMDGPU_PL_DOORBELL || + new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) { /* Nothing to save here */ amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); @@ -607,6 +625,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.is_iomem = true; mem->bus.caching = ttm_uncached; break; + case AMDGPU_PL_MMIO_REMAP: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->rmmio_remap.bus_addr; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_uncached; + break; default: return -EINVAL; } @@ -624,6 +648,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; + else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT; return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; } @@ -671,10 +697,11 @@ struct amdgpu_ttm_tt { * memory and start HMM tracking CPU page table update * * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only - * once afterwards to stop HMM tracking + * once afterwards to stop HMM tracking. Its the caller responsibility to ensure + * that range is a valid memory and it is freed too. */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, - struct hmm_range **range) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, + struct amdgpu_hmm_range *range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -684,9 +711,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, bool readonly; int r = 0; - /* Make sure get_user_pages_done() can cleanup gracefully */ - *range = NULL; - mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); @@ -710,7 +734,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, - readonly, NULL, pages, range); + readonly, NULL, range); out_unlock: mmap_read_unlock(mm); if (r) @@ -721,38 +745,6 @@ out_unlock: return r; } -/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations - */ -void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, - struct hmm_range *range) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - - if (gtt && gtt->userptr && range) - amdgpu_hmm_range_get_pages_done(range); -} - -/* - * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change - * Check if the pages backing this ttm range have been invalidated - * - * Returns: true if pages are still valid - */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, - struct hmm_range *range) -{ - struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - - if (!gtt || !gtt->userptr || !range) - return false; - - DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", - gtt->userptr, ttm->num_pages); - - WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - - return !amdgpu_hmm_range_get_pages_done(range); -} #endif /* @@ -762,12 +754,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, * that backs user memory and will ultimately be mapped into the device * address space. */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range) { unsigned long i; for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i] = pages ? pages[i] : NULL; + ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL; } /* @@ -795,7 +787,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, /* Map SG to device */ r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); if (r) - goto release_sg; + goto release_sg_table; /* convert SG to linear array of pages and dma addresses */ drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address, @@ -803,6 +795,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, return 0; +release_sg_table: + sg_free_table(ttm->sg); release_sg: kfree(ttm->sg); ttm->sg = NULL; @@ -845,8 +839,7 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); uint64_t page_idx, pages_per_xcc; int i; - uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); pages_per_xcc = total_pages; do_div(pages_per_xcc, num_xcc); @@ -912,7 +905,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, if (gtt->userptr) { r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); if (r) { - DRM_ERROR("failed to pin userptr\n"); + dev_err(adev->dev, "failed to pin userptr\n"); return r; } } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { @@ -1038,7 +1031,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { amdgpu_ttm_tt_unpin_userptr(bdev, ttm); - } else if (ttm->sg && gtt->gobj->import_attach) { + } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) { struct dma_buf_attachment *attach; attach = gtt->gobj->import_attach; @@ -1332,10 +1325,11 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && (mem->mem_type == TTM_PL_TT || mem->mem_type == AMDGPU_PL_DOORBELL || - mem->mem_type == AMDGPU_PL_PREEMPT)) { + mem->mem_type == AMDGPU_PL_PREEMPT || + mem->mem_type == AMDGPU_PL_MMIO_REMAP)) { flags |= AMDGPU_PTE_SYSTEM; - if (ttm->caching == ttm_cached) + if (ttm && ttm->caching == ttm_cached) flags |= AMDGPU_PTE_SNOOPED; } @@ -1401,7 +1395,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, */ dma_resv_for_each_fence(&resv_cursor, bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, f) { - if (amdkfd_fence_check_mm(f, current->mm)) + if (amdkfd_fence_check_mm(f, current->mm) && + !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) return false; } @@ -1486,10 +1481,12 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, - &job); + &job, + AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA); if (r) goto out; + mutex_lock(&adev->mman.gtt_window_lock); amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; @@ -1504,6 +1501,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, WARN_ON(job->ibs[0].length_dw > num_dw); fence = amdgpu_job_submit(job); + mutex_unlock(&adev->mman.gtt_window_lock); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -1742,7 +1740,9 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); if (!adev->bios && - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; @@ -1756,25 +1756,21 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) &ctx->c2p_bo, NULL); if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret); amdgpu_ttm_training_reserve_vram_fini(adev); return ret; } ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - if (!adev->gmc.is_app_apu) { - ret = amdgpu_bo_create_kernel_at( - adev, adev->gmc.real_vram_size - reserve_size, - reserve_size, &adev->mman.fw_reserved_memory, NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, - NULL, NULL); - return ret; - } - } else { - DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, reserve_size, + &adev->mman.fw_reserved_memory, NULL); + if (ret) { + dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); + return ret; } return 0; @@ -1796,7 +1792,7 @@ static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) for (i = 0; i < adev->gmc.num_mem_partitions; i++) { ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev, adev->gmc.mem_partitions[i].numa.node, - false, false); + TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M))); } return 0; } @@ -1815,6 +1811,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) adev->mman.ttm_pools = NULL; } +/** + * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO + * @adev: amdgpu device + * + * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the + * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host + * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular + * GEM object (amdgpu_bo_create). + * + * Return: + * * 0 on success or intentional skip (feature not present/unsupported) + * * negative errno on allocation failure + */ +static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) +{ + struct amdgpu_bo_param bp; + int r; + + /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */ + if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE) + return 0; + + memset(&bp, 0, sizeof(bp)); + + /* Create exactly one GEM BO in the MMIO_REMAP domain. */ + bp.type = ttm_bo_type_device; /* userspace-mappable GEM */ + bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */ + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP; + bp.flags = 0; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo); + if (r) + return r; + + return 0; +} + +/** + * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO + * @adev: amdgpu device + * + * Frees the kernel-owned MMIO_REMAP BO if it was allocated by + * amdgpu_ttm_mmio_remap_bo_init(). + */ +static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_unref(&adev->rmmio_remap.bo); + adev->rmmio_remap.bo = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1831,29 +1880,36 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); + dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, adev_to_drm(adev)->anon_inode->i_mapping, adev_to_drm(adev)->vma_offset_manager, - adev->need_swiotlb, - dma_addressing_limited(adev->dev)); + (adev->need_swiotlb ? + TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) | + (dma_addressing_limited(adev->dev) ? + TTM_ALLOCATION_POOL_USE_DMA32 : 0) | + TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M))); if (r) { - DRM_ERROR("failed initializing buffer object driver(%d).\n", r); + dev_err(adev->dev, + "failed initializing buffer object driver(%d).\n", r); return r; } r = amdgpu_ttm_pools_init(adev); if (r) { - DRM_ERROR("failed to init ttm pools(%d).\n", r); + dev_err(adev->dev, "failed to init ttm pools(%d).\n", r); return r; } adev->mman.initialized = true; - /* Initialize VRAM pool with all of VRAM divided into pages */ - r = amdgpu_vram_mgr_init(adev); - if (r) { - DRM_ERROR("Failed initializing VRAM heap.\n"); - return r; + if (!adev->gmc.is_app_apu) { + /* Initialize VRAM pool with all of VRAM divided into pages */ + r = amdgpu_vram_mgr_init(adev); + if (r) { + dev_err(adev->dev, "Failed initializing VRAM heap.\n"); + return r; + } } /* Change the size here instead of the init above so only lpfn is affected */ @@ -1882,19 +1938,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; /* - *The reserved vram for driver must be pinned to the specified - *place on the VRAM, so reserve it early. + * The reserved VRAM for the driver must be pinned to a specific + * location in VRAM, so reserve it early. */ r = amdgpu_ttm_drv_reserve_vram_init(adev); if (r) return r; /* - * only NAVI10 and onwards ASIC support for IP discovery. - * If IP discovery enabled, a block of memory should be - * reserved for IP discovey. + * only NAVI10 and later ASICs support IP discovery. + * If IP discovery is enabled, a block of memory should be + * reserved for it. */ - if (adev->mman.discovery_bin) { + if (adev->discovery.reserve_tmr) { r = amdgpu_ttm_reserve_tmr(adev); if (r) return r; @@ -1932,63 +1988,89 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", + dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n", (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); /* Compute GTT size, either based on TTM limit * or whatever the user passed on module init. */ - if (amdgpu_gtt_size == -1) - gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; - else - gtt_size = (uint64_t)amdgpu_gtt_size << 20; + gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; + if (amdgpu_gtt_size != -1) { + uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20; + + drm_warn(&adev->ddev, + "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n"); + if (gtt_size != configured_size) + drm_warn(&adev->ddev, + "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n", + configured_size, gtt_size); + + gtt_size = configured_size; + } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); if (r) { - DRM_ERROR("Failed initializing GTT heap.\n"); + dev_err(adev->dev, "Failed initializing GTT heap.\n"); return r; } - DRM_INFO("amdgpu: %uM of GTT memory ready.\n", + dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); - /* Initiailize doorbell pool on PCI BAR */ + if (adev->flags & AMD_IS_APU) { + if (adev->gmc.real_vram_size < gtt_size) + adev->apu_prefer_gtt = true; + } + + /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { - DRM_ERROR("Failed initializing doorbell heap.\n"); + dev_err(adev->dev, "Failed initializing doorbell heap.\n"); return r; } /* Create a boorbell page for kernel usages */ r = amdgpu_doorbell_create_kernel_doorbells(adev); if (r) { - DRM_ERROR("Failed to initialize kernel doorbells.\n"); + dev_err(adev->dev, "Failed to initialize kernel doorbells.\n"); return r; } + /* Initialize MMIO-remap pool (single page 4K) */ + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1); + if (r) { + dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n"); + return r; + } + + /* Allocate the singleton MMIO_REMAP BO (4K) if supported */ + r = amdgpu_ttm_mmio_remap_bo_init(adev); + if (r) + return r; + /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); if (r) { - DRM_ERROR("Failed initializing PREEMPT heap.\n"); + dev_err(adev->dev, "Failed initializing PREEMPT heap.\n"); return r; } /* Initialize various on-chip memory pools */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); + dev_err(adev->dev, "Failed initializing GDS heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size); if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); + dev_err(adev->dev, "Failed initializing gws heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size); if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); + dev_err(adev->dev, "Failed initializing oa heap.\n"); return r; } if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, @@ -2020,12 +2102,16 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) /* return the FW reserved memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, + NULL); if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + + amdgpu_ttm_mmio_remap_bo_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); @@ -2038,15 +2124,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) drm_dev_exit(idx); } - amdgpu_vram_mgr_fini(adev); + if (!adev->gmc.is_app_apu) + amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); + amdgpu_doorbell_fini(adev); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; - DRM_INFO("amdgpu: ttm finalized\n"); + dev_info(adev->dev, "amdgpu: ttm finalized\n"); } /** @@ -2078,8 +2169,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_KERNEL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); return; } @@ -2087,15 +2179,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_NORMAL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); goto error_free_entity; } } else { drm_sched_entity_destroy(&adev->mman.high_pr); drm_sched_entity_destroy(&adev->mman.low_pr); - dma_fence_put(man->move); - man->move = NULL; + /* Drop all the old fences since re-creating the scheduler entities + * will allocate new contexts. + */ + ttm_resource_manager_cleanup(man); } /* this just adjusts TTM size idea, which sets lpfn to the correct value */ @@ -2118,7 +2213,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job, - bool delayed) + bool delayed, u64 k_job_id) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : @@ -2128,7 +2223,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, &adev->mman.high_pr; r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, - num_dw * 4, pool, job); + num_dw * 4, pool, job, k_job_id); if (r) return r; @@ -2159,7 +2254,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int r; if (!direct_submit && !ring->sched.ready) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -2167,7 +2263,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job, false); + resv, vm_needs_flush, &job, false, + AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER); if (r) return r; @@ -2194,7 +2291,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, error_free: amdgpu_job_free(job); - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, "Error scheduling IBs (%d)\n", r); return r; } @@ -2202,7 +2299,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush, bool delayed) + bool vm_needs_flush, bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2215,7 +2313,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job, delayed); + &job, delayed, k_job_id); if (r) return r; @@ -2254,7 +2352,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; @@ -2285,7 +2383,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, goto err; r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, - &next, true, true); + &next, true, true, + AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) goto err; @@ -2304,7 +2403,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, struct dma_fence **f, - bool delayed) + bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2313,7 +2413,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, int r; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to clear memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to clear memory with ring turned off.\n"); return -EINVAL; } @@ -2333,7 +2434,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true, delayed); + &next, true, delayed, k_job_id); if (r) goto error; @@ -2373,7 +2474,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) man = ttm_manager_type(&adev->mman.bdev, mem_type); break; default: - DRM_ERROR("Trying to evict invalid memory type\n"); + dev_err(adev->dev, "Trying to evict invalid memory type\n"); return -EINVAL; } |
