diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 663 |
1 files changed, 440 insertions, 223 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b692ad402252..a7d8f1ce6ac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -27,35 +27,46 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <drm/drmP.h> + #include <drm/drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vce.h" +#include "amdgpu_cs.h" #include "cikd.h" /* 1 second timeout */ #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) /* Firmware Names */ +#ifdef CONFIG_DRM_AMDGPU_SI +#define FIRMWARE_VCE_V1_0 "amdgpu/vce_1_0_0.bin" +#endif #ifdef CONFIG_DRM_AMDGPU_CIK -#define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin" -#define FIRMWARE_KABINI "radeon/kabini_vce.bin" -#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" -#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" -#define FIRMWARE_MULLINS "radeon/mullins_vce.bin" +#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin" +#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin" +#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin" +#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin" +#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin" #endif #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" -#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" -#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" +#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" +#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" +#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" +#ifdef CONFIG_DRM_AMDGPU_SI +MODULE_FIRMWARE(FIRMWARE_VCE_V1_0); +#endif #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); MODULE_FIRMWARE(FIRMWARE_KABINI); @@ -70,88 +81,106 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); +MODULE_FIRMWARE(FIRMWARE_VEGA12); +MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence); +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence); /** - * amdgpu_vce_init - allocate memory, load vce firmware + * amdgpu_vce_firmware_name() - determine the firmware file name for VCE * * @adev: amdgpu_device pointer * - * First step to get VCE online, allocate memory and load the firmware + * Each chip that has VCE IP may need a different firmware. + * This function returns the name of the VCE firmware file + * appropriate for the current chip. */ -int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) +static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - struct amd_sched_rq *rq; - const char *fw_name; - const struct common_firmware_header *hdr; - unsigned ucode_version, version_major, version_minor, binary_id; - int i, r; - switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_PITCAIRN: + case CHIP_TAHITI: + case CHIP_VERDE: + return FIRMWARE_VCE_V1_0; +#endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: - fw_name = FIRMWARE_BONAIRE; - break; + return FIRMWARE_BONAIRE; case CHIP_KAVERI: - fw_name = FIRMWARE_KAVERI; - break; + return FIRMWARE_KAVERI; case CHIP_KABINI: - fw_name = FIRMWARE_KABINI; - break; + return FIRMWARE_KABINI; case CHIP_HAWAII: - fw_name = FIRMWARE_HAWAII; - break; + return FIRMWARE_HAWAII; case CHIP_MULLINS: - fw_name = FIRMWARE_MULLINS; - break; + return FIRMWARE_MULLINS; #endif case CHIP_TONGA: - fw_name = FIRMWARE_TONGA; - break; + return FIRMWARE_TONGA; case CHIP_CARRIZO: - fw_name = FIRMWARE_CARRIZO; - break; + return FIRMWARE_CARRIZO; case CHIP_FIJI: - fw_name = FIRMWARE_FIJI; - break; + return FIRMWARE_FIJI; case CHIP_STONEY: - fw_name = FIRMWARE_STONEY; - break; + return FIRMWARE_STONEY; case CHIP_POLARIS10: - fw_name = FIRMWARE_POLARIS10; - break; + return FIRMWARE_POLARIS10; case CHIP_POLARIS11: - fw_name = FIRMWARE_POLARIS11; - break; - case CHIP_VEGA10: - fw_name = FIRMWARE_VEGA10; - break; + return FIRMWARE_POLARIS11; case CHIP_POLARIS12: - fw_name = FIRMWARE_POLARIS12; - break; + return FIRMWARE_POLARIS12; + case CHIP_VEGAM: + return FIRMWARE_VEGAM; + case CHIP_VEGA10: + return FIRMWARE_VEGA10; + case CHIP_VEGA12: + return FIRMWARE_VEGA12; + case CHIP_VEGA20: + return FIRMWARE_VEGA20; default: - return -EINVAL; + return NULL; } +} - r = request_firmware(&adev->vce.fw, fw_name, adev->dev); - if (r) { - dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n", - fw_name); - return r; - } +/** + * amdgpu_vce_early_init() - try to load VCE firmware + * + * @adev: amdgpu_device pointer + * + * Tries to load the VCE firmware. + * + * When not found, returns ENOENT so that the driver can + * still load and initialize the rest of the IP blocks. + * The GPU can function just fine without VCE, they will just + * not support video encoding. + */ +int amdgpu_vce_early_init(struct amdgpu_device *adev) +{ + const char *fw_name = amdgpu_vce_firmware_name(adev); + const struct common_firmware_header *hdr; + unsigned int ucode_version, version_major, version_minor, binary_id; + int r; - r = amdgpu_ucode_validate(adev->vce.fw); + if (!fw_name) + return -ENOENT; + + r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { - dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", - fw_name); - release_firmware(adev->vce.fw); - adev->vce.fw = NULL; - return r; + dev_err(adev->dev, + "amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n", + fw_name, r); + + amdgpu_ucode_release(&adev->vce.fw); + return -ENOENT; } hdr = (const struct common_firmware_header *)adev->vce.fw->data; @@ -160,28 +189,45 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) version_major = (ucode_version >> 20) & 0xfff; version_minor = (ucode_version >> 8) & 0xfff; binary_id = ucode_version & 0xff; - DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n", + dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n", version_major, version_minor, binary_id); adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | (binary_id << 8)); + return 0; +} + +/** + * amdgpu_vce_sw_init() - allocate memory for VCE BO + * + * @adev: amdgpu_device pointer + * @size: size for the new BO + * + * First step to get VCE online: allocate memory for VCE BO. + * The VCE firmware binary is copied into the VCE BO later, + * in amdgpu_vce_resume. The VCE executes its code from the + * VCE BO and also uses the space in this BO for its stack and data. + * + * Ideally this BO should be placed in VRAM for optimal performance, + * although technically it also runs from system RAM (albeit slowly). + */ +int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) +{ + int i, r; + + if (!adev->vce.fw) + return -ENOENT; + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->vce.vcpu_bo, &adev->vce.gpu_addr, &adev->vce.cpu_addr); if (r) { dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); return r; } - ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; - } - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { atomic_set(&adev->vce.handles[i], 0); adev->vce.filp[i] = NULL; @@ -194,7 +240,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) } /** - * amdgpu_vce_fini - free memory + * amdgpu_vce_sw_fini - free memory * * @adev: amdgpu_device pointer * @@ -202,22 +248,47 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) */ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) { - unsigned i; + unsigned int i; if (adev->vce.vcpu_bo == NULL) return 0; - amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); - - amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, - (void **)&adev->vce.cpu_addr); + drm_sched_entity_destroy(&adev->vce.entity); for (i = 0; i < adev->vce.num_rings; i++) amdgpu_ring_fini(&adev->vce.ring[i]); - release_firmware(adev->vce.fw); + amdgpu_ucode_release(&adev->vce.fw); mutex_destroy(&adev->vce.idle_mutex); + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, + (void **)&adev->vce.cpu_addr); + + return 0; +} + +/** + * amdgpu_vce_entity_init - init entity + * + * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check + * + * Initialize the entity used for handle management in the kernel driver. + */ +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (ring == &adev->vce.ring[0]) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; + + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } + } + return 0; } @@ -231,6 +302,8 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) { int i; + cancel_delayed_work_sync(&adev->vce.idle_work); + if (adev->vce.vcpu_bo == NULL) return 0; @@ -241,7 +314,6 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_VCE_HANDLES) return 0; - cancel_delayed_work_sync(&adev->vce.idle_work); /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } @@ -254,35 +326,22 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) */ int amdgpu_vce_resume(struct amdgpu_device *adev) { - void *cpu_addr; const struct common_firmware_header *hdr; - unsigned offset; - int r; + unsigned int offset; + int idx; if (adev->vce.vcpu_bo == NULL) return -EINVAL; - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); - if (r) { - dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); - return r; - } - - r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) VCE map failed\n", r); - return r; - } - hdr = (const struct common_firmware_header *)adev->vce.fw->data; offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(cpu_addr, adev->vce.fw->data + offset, - adev->vce.fw->size - offset); - - amdgpu_bo_kunmap(adev->vce.vcpu_bo); - amdgpu_bo_unreserve(adev->vce.vcpu_bo); + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset, + adev->vce.fw->size - offset); + drm_dev_exit(idx); + } return 0; } @@ -298,10 +357,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vce.idle_work.work); - unsigned i, count = 0; - - if (amdgpu_sriov_vf(adev)) - return; + unsigned int i, count = 0; for (i = 0; i < adev->vce.num_rings; i++) count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); @@ -311,10 +367,10 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) amdgpu_dpm_enable_vce(adev, false); } else { amdgpu_asic_set_vce_clocks(adev, 0, 0); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); } } else { schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); @@ -343,10 +399,10 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) amdgpu_dpm_enable_vce(adev, true); } else { amdgpu_asic_set_vce_clocks(adev, 53300, 40000); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); } } @@ -362,7 +418,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) */ void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); + if (!amdgpu_sriov_vf(ring->adev)) + schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); } /** @@ -377,6 +434,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) { struct amdgpu_ring *ring = &adev->vce.ring[0]; int i, r; + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { uint32_t handle = atomic_read(&adev->vce.handles[i]); @@ -393,32 +451,61 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } /** - * amdgpu_vce_get_create_msg - generate a VCE create msg + * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE * * @adev: amdgpu_device pointer + * + * Returns how many GART pages we need before GTT for the VCE IP block. + * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details. + * For VCE2+, this is not needed so return zero. + */ +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev) +{ + /* VCE IP block not added yet, so can't use amdgpu_ip_version */ + if (adev->family == AMDGPU_FAMILY_SI) + return 512; + + return 0; +} + +/** + * amdgpu_vce_get_create_msg - generate a VCE create msg + * * @ring: ring we should submit the msg to * @handle: VCE session handle to use * @fence: optional fence to return * * Open up a stream for HW test */ -int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) { - const unsigned ib_size_dw = 1024; + const unsigned int ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; + struct amdgpu_ib ib_msg; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; - ib = &job->ibs[0]; + memset(&ib_msg, 0, sizeof(ib_msg)); + /* only one gpu page is needed, alloc +1 page to make addr aligned. */ + r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, + AMDGPU_IB_POOL_DIRECT, + &ib_msg); + if (r) + goto err; - dummy = ib->gpu_addr + 1024; + ib = &job->ibs[0]; + /* let addr point to page boundary */ + addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr); /* stitch together an VCE create msg */ ib->length_dw = 0; @@ -450,19 +537,18 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000001; for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); + r = amdgpu_job_submit_direct(job, ring, &f); + amdgpu_ib_free(&ib_msg, f); if (r) goto err; - amdgpu_job_free(job); if (fence) *fence = dma_fence_get(f); dma_fence_put(f); @@ -476,23 +562,28 @@ err: /** * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: VCE session handle to use + * @direct: direct or delayed pool * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so */ -int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) { - const unsigned ib_size_dw = 1024; + const unsigned int ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, + direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -519,19 +610,12 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err; - - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err; - } + if (direct) + r = amdgpu_job_submit_direct(job, ring, &f); + else + f = amdgpu_job_submit(job); + if (r) + goto err; if (fence) *fence = dma_fence_get(f); @@ -544,39 +628,93 @@ err: } /** + * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary + * + * @p: cs parser + * @ib: indirect buffer to use + * @lo: address of lower dword + * @hi: address of higher dword + * @size: minimum size + * @index: bs/fb index + * + * Make sure that no BO cross a 4GB boundary. + */ +static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, + struct amdgpu_ib *ib, int lo, int hi, + unsigned int size, int32_t index) +{ + int64_t offset = ((uint64_t)size) * ((int64_t)index); + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *mapping; + unsigned int i, fpfn, lpfn; + struct amdgpu_bo *bo; + uint64_t addr; + int r; + + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; + if (index >= 0) { + addr += offset; + fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; + lpfn = 0x100000000ULL >> PAGE_SHIFT; + } else { + fpfn = 0; + lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT; + } + + r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", + addr, lo, hi, size, index); + return r; + } + + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn); + bo->placements[i].lpfn = bo->placements[i].lpfn ? + min(bo->placements[i].lpfn, lpfn) : lpfn; + } + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + + +/** * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size + * @index: bs/fb index * * Patch relocation inside command stream with real buffer address */ -static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, - int lo, int hi, unsigned size, uint32_t index) +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, + int lo, int hi, unsigned int size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; uint64_t addr; + int r; if (index == 0xffffffff) index = 0; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; addr += ((uint64_t)size) * ((uint64_t)index); - mapping = amdgpu_cs_find_mapping(p, addr, &bo); - if (mapping == NULL) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", addr, lo, hi, size, index); - return -EINVAL; + return r; } if ((addr + (uint64_t)size) > (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", + DRM_ERROR("BO too small for addr 0x%010llx %d %d\n", addr, lo, hi); return -EINVAL; } @@ -585,8 +723,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); - amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); - amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); + amdgpu_ib_set_value(ib, lo, lower_32_bits(addr)); + amdgpu_ib_set_value(ib, hi, upper_32_bits(addr)); return 0; } @@ -599,12 +737,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, * @allocated: allocated a new handle? * * Validates the handle and return the found session index or -EINVAL - * we we don't have another free session index. + * we don't have another free session index. */ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, uint32_t handle, uint32_t *allocated) { - unsigned i; + unsigned int i; /* validate the handle */ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { @@ -632,33 +770,32 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, } /** - * amdgpu_vce_cs_parse - parse and validate the command stream + * amdgpu_vce_ring_parse_cs - parse and validate the command stream * * @p: parser context - * + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; - unsigned fb_idx = 0, bs_idx = 0; + unsigned int fb_idx = 0, bs_idx = 0; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; uint32_t allocated = 0; uint32_t tmp, handle = 0; - uint32_t *size = &tmp; - int i, r, idx = 0; + uint32_t dummy = 0xffffffff; + uint32_t *size = &dummy; + unsigned int idx; + int i, r = 0; - p->job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); + job->vm = NULL; - r = amdgpu_cs_sysvm_access_required(p); - if (r) - return r; - - while (idx < ib->length_dw) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + for (idx = 0; idx < ib->length_dw;) { + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -667,8 +804,68 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) } switch (cmd) { + case 0x00000002: /* task info */ + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); + break; + + case 0x03000001: /* encode */ + r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9, + 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11, + 0, 0); + if (r) + goto out; + break; + + case 0x05000001: /* context buffer */ + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); + if (r) + goto out; + break; + + case 0x05000004: /* video bitstream buffer */ + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + tmp, bs_idx); + if (r) + goto out; + break; + + case 0x05000005: /* feedback buffer */ + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 4096, fb_idx); + if (r) + goto out; + break; + + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7, + 0, 0); + if (r) + goto out; + break; + } + + idx += len / 4; + } + + for (idx = 0; idx < ib->length_dw;) { + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); + + switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { @@ -679,8 +876,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x01000001: /* create */ @@ -695,8 +892,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; } - *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * - amdgpu_get_ib_value(p, ib_idx, idx + 10) * + *size = amdgpu_ib_get_value(ib, idx + 8) * + amdgpu_ib_get_value(ib, idx + 10) * 8 * 3 / 2; break; @@ -725,12 +922,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: /* encode */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11, *size / 3, 0); if (r) goto out; @@ -741,27 +938,39 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, *size * 2, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, + idx + 2, *size, 0); + if (r) + goto out; + + r = amdgpu_vce_cs_reloc(p, ib, idx + 8, + idx + 7, *size / 12, 0); + if (r) + goto out; + break; + default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); r = -EINVAL; @@ -799,14 +1008,16 @@ out: } /** - * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode + * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode * * @p: parser context - * + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; @@ -815,8 +1026,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) int i, r = 0, idx = 0; while (idx < ib->length_dw) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -826,7 +1037,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { @@ -875,7 +1086,6 @@ out: if (!r) { /* No error, free all destroyed handle slots */ tmp = destroyed; - amdgpu_ib_free(p->adev, ib, NULL); } else { /* Error during parsing, free all allocated handle slots */ tmp = allocated; @@ -892,11 +1102,15 @@ out: * amdgpu_vce_ring_emit_ib - execute indirect buffer * * @ring: engine to use + * @job: job to retrieve vmid from * @ib: the IB to execute + * @flags: unused * */ -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -908,11 +1122,13 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, * amdgpu_vce_ring_emit_fence - add a fence command to the ring * * @ring: engine to use - * @fence: the fence + * @addr: address + * @seq: sequence number + * @flags: fence related flags * */ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) + unsigned int flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -933,37 +1149,31 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); - unsigned i; + uint32_t rptr; + unsigned int i; int r, timeout = adev->usec_timeout; - /* workaround VCE ring test slow issue for sriov*/ + /* skip ring test for sriov*/ if (amdgpu_sriov_vf(adev)) - timeout *= 10; + return 0; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; - DRM_UDELAY(1); + udelay(1); } - if (i < timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= timeout) r = -ETIMEDOUT; - } return r; } @@ -972,6 +1182,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) * amdgpu_vce_ring_test_ib - test if VCE IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -984,28 +1195,34 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) return 0; r = amdgpu_vce_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; } + +enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring) +{ + switch (ring) { + case 0: + return AMDGPU_RING_PRIO_0; + case 1: + return AMDGPU_RING_PRIO_1; + case 2: + return AMDGPU_RING_PRIO_2; + default: + return AMDGPU_RING_PRIO_0; + } +} |
