diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c')
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 905 |
1 files changed, 683 insertions, 222 deletions
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2e4372ef17a3..1c80909e63ca 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -1,49 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * * Copyright (c) 2014 The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/ascii85.h> +#include <linux/interconnect.h> +#include <linux/firmware/qcom/qcom_scm.h> #include <linux/kernel.h> +#include <linux/of_reserved_mem.h> #include <linux/pm_opp.h> #include <linux/slab.h> +#include <linux/soc/qcom/mdt_loader.h> +#include <linux/nvmem-consumer.h> +#include <soc/qcom/ocmem.h> #include "adreno_gpu.h" +#include "a6xx_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" -int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) +static u64 address_space_size = 0; +MODULE_PARM_DESC(address_space_size, "Override for size of processes private GPU address space"); +module_param(address_space_size, ullong, 0600); + +static bool zap_available = true; + +static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, + u32 pasid) +{ + struct device *dev = &gpu->pdev->dev; + const struct firmware *fw; + const char *signed_fwname = NULL; + struct device_node *np; + struct resource r; + phys_addr_t mem_phys; + ssize_t mem_size; + void *mem_region = NULL; + int ret; + + if (!IS_ENABLED(CONFIG_ARCH_QCOM)) { + zap_available = false; + return -EINVAL; + } + + np = of_get_child_by_name(dev->of_node, "zap-shader"); + if (!of_device_is_available(np)) { + zap_available = false; + return -ENODEV; + } + + ret = of_reserved_mem_region_to_resource(np, 0, &r); + if (ret) { + zap_available = false; + return ret; + } + mem_phys = r.start; + + /* + * Check for a firmware-name property. This is the new scheme + * to handle firmware that may be signed with device specific + * keys, allowing us to have a different zap fw path for different + * devices. + * + * If the firmware-name property is found, we bypass the + * adreno_request_fw() mechanism, because we don't need to handle + * the /lib/firmware/qcom/... vs /lib/firmware/... case. + * + * If the firmware-name property is not found, for backwards + * compatibility we fall back to the fwname from the gpulist + * table. + */ + of_property_read_string_index(np, "firmware-name", 0, &signed_fwname); + if (signed_fwname) { + fwname = signed_fwname; + ret = request_firmware_direct(&fw, fwname, gpu->dev->dev); + if (ret) + fw = ERR_PTR(ret); + } else if (fwname) { + /* Request the MDT file from the default location: */ + fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + } else { + /* + * For new targets, we require the firmware-name property, + * if a zap-shader is required, rather than falling back + * to a firmware name specified in gpulist. + * + * Because the firmware is signed with a (potentially) + * device specific key, having the name come from gpulist + * was a bad idea, and is only provided for backwards + * compatibility for older targets. + */ + return -ENOENT; + } + + if (IS_ERR(fw)) { + DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); + return PTR_ERR(fw); + } + + /* Figure out how much memory we need */ + mem_size = qcom_mdt_get_size(fw); + if (mem_size < 0) { + ret = mem_size; + goto out; + } + + if (mem_size > resource_size(&r)) { + DRM_DEV_ERROR(dev, + "memory region is too small to load the MDT\n"); + ret = -E2BIG; + goto out; + } + + /* Allocate memory for the firmware image */ + mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); + if (!mem_region) { + ret = -ENOMEM; + goto out; + } + + /* + * Load the rest of the MDT + * + * Note that we could be dealing with two different paths, since + * with upstream linux-firmware it would be in a qcom/ subdir.. + * adreno_request_fw() handles this, but qcom_mdt_load() does + * not. But since we've already gotten through adreno_request_fw() + * we know which of the two cases it is: + */ + if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) { + ret = qcom_mdt_load(dev, fw, fwname, pasid, + mem_region, mem_phys, mem_size, NULL); + } else { + char *newname; + + newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname); + + ret = qcom_mdt_load(dev, fw, newname, pasid, + mem_region, mem_phys, mem_size, NULL); + kfree(newname); + } + if (ret) + goto out; + + /* Send the image to the secure world */ + ret = qcom_scm_pas_auth_and_reset(pasid); + + /* + * If the scm call returns -EOPNOTSUPP we assume that this target + * doesn't need/support the zap shader so quietly fail + */ + if (ret == -EOPNOTSUPP) + zap_available = false; + else if (ret) + DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); + +out: + if (mem_region) + memunmap(mem_region); + + release_firmware(fw); + + return ret; +} + +int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct platform_device *pdev = gpu->pdev; + + /* Short cut if we determine the zap shader isn't available/needed */ + if (!zap_available) + return -ENODEV; + + /* We need SCM to be able to load the firmware */ + if (!qcom_scm_is_available()) { + DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n"); + return -EPROBE_DEFER; + } + + return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid); +} + +struct drm_gpuvm * +adreno_create_vm(struct msm_gpu *gpu, + struct platform_device *pdev) +{ + return adreno_iommu_create_vm(gpu, pdev, 0); +} + +struct drm_gpuvm * +adreno_iommu_create_vm(struct msm_gpu *gpu, + struct platform_device *pdev, + unsigned long quirks) +{ + struct iommu_domain_geometry *geometry; + struct msm_mmu *mmu; + struct drm_gpuvm *vm; + u64 start, size; + + mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); + if (IS_ERR(mmu)) + return ERR_CAST(mmu); + + geometry = msm_iommu_get_geometry(mmu); + if (IS_ERR(geometry)) + return ERR_CAST(geometry); + + /* + * Use the aperture start or SZ_16M, whichever is greater. This will + * ensure that we align with the allocated pagetable range while still + * allowing room in the lower 32 bits for GMEM and whatnot + */ + start = max_t(u64, SZ_16M, geometry->aperture_start); + size = geometry->aperture_end - start + 1; + + vm = msm_gem_vm_create(gpu->dev, mmu, "gpu", start & GENMASK_ULL(48, 0), + size, true); + + if (IS_ERR(vm) && !IS_ERR(mmu)) + mmu->funcs->destroy(mmu); + + return vm; +} + +u64 adreno_private_vm_size(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); + const struct io_pgtable_cfg *ttbr1_cfg; + + if (address_space_size) + return address_space_size; + + if (adreno_gpu->info->quirks & ADRENO_QUIRK_4GB_VA) + return SZ_4G; + + if (!adreno_smmu || !adreno_smmu->get_ttbr1_cfg) + return SZ_4G; + + ttbr1_cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); + + /* + * Userspace VM is actually using TTBR0, but both are the same size, + * with b48 (sign bit) selecting which TTBRn to use. So if IAS is + * 48, the total (kernel+user) address space size is effectively + * 49 bits. But what userspace is control of is the lower 48. + */ + return BIT(ttbr1_cfg->ias) - ADRENO_VM_START; +} + +void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev->dev_private; + unsigned long flags; + + /* + * Wait until the cooldown period has passed and we would actually + * collect a crashdump to re-enable stall-on-fault. + */ + spin_lock_irqsave(&priv->fault_stall_lock, flags); + if (!priv->stall_enabled && + ktime_after(ktime_get(), priv->stall_reenable_time) && + !READ_ONCE(gpu->crashstate)) { + struct msm_mmu *mmu = to_msm_vm(gpu->vm)->mmu; + + priv->stall_enabled = true; + + mmu->funcs->set_stall(mmu, true); + } + spin_unlock_irqrestore(&priv->fault_stall_lock, flags); +} + +#define ARM_SMMU_FSR_TF BIT(1) +#define ARM_SMMU_FSR_PF BIT(3) +#define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_SS BIT(30) + +int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, + struct adreno_smmu_fault_info *info, const char *block, + u32 scratch[4]) +{ + struct adreno_gpu *adreno_gpu = container_of(gpu, struct adreno_gpu, base); + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_mmu *mmu = to_msm_vm(gpu->vm)->mmu; + const char *type = "UNKNOWN"; + bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) && + !READ_ONCE(gpu->crashstate); + unsigned long irq_flags; + + /* + * In case there is a subsequent storm of pagefaults, disable + * stall-on-fault for at least half a second. + */ + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + if (priv->stall_enabled) { + priv->stall_enabled = false; + + mmu->funcs->set_stall(mmu, false); + } + + priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500); + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); + + /* + * Print a default message if we couldn't get the data from the + * adreno-smmu-priv + */ + if (!info) { + pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n", + iova, flags, + scratch[0], scratch[1], scratch[2], scratch[3]); + + return 0; + } + + if (info->fsr & ARM_SMMU_FSR_TF) + type = "TRANSLATION"; + else if (info->fsr & ARM_SMMU_FSR_PF) + type = "PERMISSION"; + else if (info->fsr & ARM_SMMU_FSR_EF) + type = "EXTERNAL"; + + pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n", + info->ttbr0, iova, + flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", + type, block, + scratch[0], scratch[1], scratch[2], scratch[3]); + + if (do_devcoredump) { + struct msm_gpu_fault_info fault_info = {}; + + /* Turn off the hangcheck timer to keep it from bothering us */ + timer_delete(&gpu->hangcheck_timer); + + /* Let any concurrent GMU transactions know that the MMU may be + * blocked for a while and they should wait on us. + */ + reinit_completion(&adreno_gpu->fault_coredump_done); + + fault_info.ttbr0 = info->ttbr0; + fault_info.iova = iova; + fault_info.flags = flags; + fault_info.type = type; + fault_info.block = block; + + msm_gpu_fault_crashstate_capture(gpu, &fault_info); + + complete_all(&adreno_gpu->fault_coredump_done); + } + + return 0; +} + +int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, + uint32_t param, uint64_t *value, uint32_t *len) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct drm_device *drm = gpu->dev; + /* Note ctx can be NULL when called from rd_open(): */ + struct drm_gpuvm *vm = ctx ? msm_context_vm(drm, ctx) : NULL; + + /* No pointer params yet */ + if (*len != 0) + return UERR(EINVAL, drm, "invalid len"); switch (param) { case MSM_PARAM_GPU_ID: *value = adreno_gpu->info->revn; return 0; case MSM_PARAM_GMEM_SIZE: - *value = adreno_gpu->gmem; + *value = adreno_gpu->info->gmem; return 0; case MSM_PARAM_GMEM_BASE: - *value = 0x100000; + if (adreno_is_a650_family(adreno_gpu) || + adreno_is_a740_family(adreno_gpu)) + *value = 0; + else + *value = 0x100000; return 0; case MSM_PARAM_CHIP_ID: - *value = adreno_gpu->rev.patchid | - (adreno_gpu->rev.minor << 8) | - (adreno_gpu->rev.major << 16) | - (adreno_gpu->rev.core << 24); + *value = adreno_gpu->chip_id; + if (!adreno_gpu->info->revn) + *value |= ((uint64_t) adreno_gpu->speedbin) << 32; return 0; case MSM_PARAM_MAX_FREQ: *value = adreno_gpu->base.fast_rate; @@ -59,12 +401,118 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; - case MSM_PARAM_NR_RINGS: - *value = gpu->nr_rings; + case MSM_PARAM_PRIORITIES: + *value = gpu->nr_rings * NR_SCHED_PRIORITIES; + return 0; + case MSM_PARAM_PP_PGTABLE: + *value = 0; + return 0; + case MSM_PARAM_FAULTS: + if (vm) + *value = gpu->global_faults + to_msm_vm(vm)->faults; + else + *value = gpu->global_faults; + return 0; + case MSM_PARAM_SUSPENDS: + *value = gpu->suspend_count; + return 0; + case MSM_PARAM_VA_START: + if (vm == gpu->vm) + return UERR(EINVAL, drm, "requires per-process pgtables"); + *value = vm->mm_start; + return 0; + case MSM_PARAM_VA_SIZE: + if (vm == gpu->vm) + return UERR(EINVAL, drm, "requires per-process pgtables"); + *value = vm->mm_range; + return 0; + case MSM_PARAM_HIGHEST_BANK_BIT: + *value = adreno_gpu->ubwc_config->highest_bank_bit; + return 0; + case MSM_PARAM_RAYTRACING: + *value = adreno_gpu->has_ray_tracing; + return 0; + case MSM_PARAM_UBWC_SWIZZLE: + *value = adreno_gpu->ubwc_config->ubwc_swizzle; + return 0; + case MSM_PARAM_MACROTILE_MODE: + *value = adreno_gpu->ubwc_config->macrotile_mode; + return 0; + case MSM_PARAM_UCHE_TRAP_BASE: + *value = adreno_gpu->uche_trap_base; + return 0; + case MSM_PARAM_HAS_PRR: + *value = adreno_smmu_has_prr(gpu); return 0; default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); + } +} + +int adreno_set_param(struct msm_gpu *gpu, struct msm_context *ctx, + uint32_t param, uint64_t value, uint32_t len) +{ + struct drm_device *drm = gpu->dev; + + switch (param) { + case MSM_PARAM_COMM: + case MSM_PARAM_CMDLINE: + /* kstrdup_quotable_cmdline() limits to PAGE_SIZE, so + * that should be a reasonable upper bound + */ + if (len > PAGE_SIZE) + return UERR(EINVAL, drm, "invalid len"); + break; + default: + if (len != 0) + return UERR(EINVAL, drm, "invalid len"); + } + + switch (param) { + case MSM_PARAM_COMM: + case MSM_PARAM_CMDLINE: { + char *str, **paramp; + + str = memdup_user_nul(u64_to_user_ptr(value), len); + if (IS_ERR(str)) + return PTR_ERR(str); + + mutex_lock(&gpu->lock); + + if (param == MSM_PARAM_COMM) { + paramp = &ctx->comm; + } else { + paramp = &ctx->cmdline; + } + + kfree(*paramp); + *paramp = str; + + mutex_unlock(&gpu->lock); + + return 0; + } + case MSM_PARAM_SYSPROF: + if (!capable(CAP_SYS_ADMIN)) + return UERR(EPERM, drm, "invalid permissions"); + return msm_context_set_sysprof(ctx, gpu, value); + case MSM_PARAM_EN_VM_BIND: + /* We can only support VM_BIND with per-process pgtables: */ + if (ctx->vm == gpu->vm) + return UERR(EINVAL, drm, "requires per-process pgtables"); + + /* + * We can only swtich to VM_BIND mode if the VM has not yet + * been created: + */ + if (ctx->vm) + return UERR(EBUSY, drm, "VM already created"); + + ctx->userspace_managed_vm = value; + + return 0; + default: + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } @@ -110,7 +558,7 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) ret = request_firmware_direct(&fw, fwname, drm->dev); if (!ret) { DRM_DEV_INFO(drm->dev, "loaded %s from legacy location\n", - newname); + fwname); adreno_gpu->fwloc = FW_LOCATION_LEGACY; goto out; } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { @@ -159,6 +607,10 @@ int adreno_load_fw(struct adreno_gpu *adreno_gpu) if (!adreno_gpu->info->fw[i]) continue; + /* Skip loading GMU firmware with GMU Wrapper */ + if (adreno_has_gmu_wrapper(adreno_gpu) && i == ADRENO_FW_GMU) + continue; + /* Skip if the firmware has already been loaded */ if (adreno_gpu->fw[i]) continue; @@ -179,8 +631,8 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, - MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); + ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4, + MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->vm, &bo, iova); if (IS_ERR(ptr)) return ERR_CAST(ptr); @@ -195,15 +647,19 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret, i; + int ret; - DBG("%s", gpu->name); + VERB("%s", gpu->name); - ret = adreno_load_fw(adreno_gpu); - if (ret) - return ret; + if (adreno_gpu->info->family >= ADRENO_6XX_GEN1 && + qcom_scm_set_gpu_smmu_aperture_is_available()) { + /* We currently always use context bank 0, so hard code this */ + ret = qcom_scm_set_gpu_smmu_aperture(0); + if (ret) + DRM_DEV_ERROR(gpu->dev->dev, "unable to set SMMU aperture: %d\n", ret); + } - for (i = 0; i < gpu->nr_rings; i++) { + for (int i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; if (!ring) @@ -211,30 +667,16 @@ int adreno_hw_init(struct msm_gpu *gpu) ring->cur = ring->start; ring->next = ring->start; - - /* reset completed fence seqno: */ - ring->memptrs->fence = ring->seqno; ring->memptrs->rptr = 0; - } - - /* - * Setup REG_CP_RB_CNTL. The same value is used across targets (with - * the excpetion of A430 that disables the RPTR shadow) - the cacluation - * for the ringbuffer size and block size is moved to msm_gpu.h for the - * pre-processor to deal with and the A430 variant is ORed in here - */ - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - MSM_GPU_RB_CNTL_DEFAULT | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + ring->memptrs->bv_fence = ring->fctx->completed_fence; - /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ - adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); - - if (!adreno_is_a430(adreno_gpu)) { - adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, - rbmemptr(gpu->rb[0], rptr)); + /* Detect and clean up an impossible fence, ie. if GPU managed + * to scribble something invalid, we don't want that to confuse + * us into mistakingly believing that submits have completed. + */ + if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { + ring->memptrs->fence = ring->fctx->last_fence; + } } return 0; @@ -244,11 +686,9 @@ int adreno_hw_init(struct msm_gpu *gpu) static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, struct msm_ringbuffer *ring) { - if (adreno_is_a430(adreno_gpu)) - return ring->memptrs->rptr = adreno_gpu_read( - adreno_gpu, REG_ADRENO_CP_RB_RPTR); - else - return ring->memptrs->rptr; + struct msm_gpu *gpu = &adreno_gpu->base; + + return gpu->funcs->get_rptr(gpu, ring); } struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) @@ -274,80 +714,8 @@ void adreno_recover(struct msm_gpu *gpu) } } -void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = submit->ring; - unsigned i; - - for (i = 0; i < submit->nr_cmds; i++) { - switch (submit->cmd[i].type) { - case MSM_SUBMIT_CMD_IB_TARGET_BUF: - /* ignore IB-targets */ - break; - case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - /* ignore if there has not been a ctx switch: */ - if (priv->lastctx == ctx) - break; - case MSM_SUBMIT_CMD_BUF: - OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? - CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); - OUT_PKT2(ring); - break; - } - } - - OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->seqno); - - if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { - /* Flush HLSQ lazy updates to make sure there is nothing - * pending for indirect loads after the timestamp has - * passed: - */ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, HLSQ_FLUSH); - } - - /* wait for idle before cache flush/interrupt */ - OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); - OUT_RING(ring, 0x00000000); - - if (!adreno_is_a2xx(adreno_gpu)) { - /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ - OUT_PKT3(ring, CP_EVENT_WRITE, 3); - OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); - OUT_RING(ring, rbmemptr(ring, fence)); - OUT_RING(ring, submit->seqno); - } else { - /* BIT(31) means something else on a2xx */ - OUT_PKT3(ring, CP_EVENT_WRITE, 3); - OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(ring, fence)); - OUT_RING(ring, submit->seqno); - OUT_PKT3(ring, CP_INTERRUPT, 1); - OUT_RING(ring, 0x80000000); - } - -#if 0 - if (adreno_is_a3xx(adreno_gpu)) { - /* Dummy set-constant to trigger context rollover */ - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); - OUT_RING(ring, 0x00000000); - } -#endif - - gpu->funcs->flush(gpu, ring); -} - -void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; /* Copy the shadow to the actual register */ @@ -363,7 +731,7 @@ void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* ensure writes to ringbuffer have hit system memory: */ mb(); - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); + gpu_write(gpu, reg, wptr); } bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) @@ -387,6 +755,8 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i, count = 0; + WARN_ON(!mutex_is_locked(&gpu->lock)); + kref_init(&state->ref); ktime_get_real_ts64(&state->time); @@ -396,7 +766,7 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) state->ring[i].fence = gpu->rb[i]->memptrs->fence; state->ring[i].iova = gpu->rb[i]->iova; - state->ring[i].seqno = gpu->rb[i]->seqno; + state->ring[i].seqno = gpu->rb[i]->fctx->last_fence; state->ring[i].rptr = get_rptr(adreno_gpu, gpu->rb[i]); state->ring[i].wptr = get_wptr(gpu->rb[i]); @@ -409,11 +779,9 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) size = j + 1; if (size) { - state->ring[i].data = kvmalloc(size << 2, GFP_KERNEL); - if (state->ring[i].data) { - memcpy(state->ring[i].data, gpu->rb[i]->start, size << 2); + state->ring[i].data = kvmemdup(gpu->rb[i]->start, size << 2, GFP_KERNEL); + if (state->ring[i].data) state->ring[i].data_size = size << 2; - } } } @@ -457,6 +825,7 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state) for (i = 0; state->bos && i < state->nr_bos; i++) kvfree(state->bos[i].data); + kfree(state->vm_logs); kfree(state->bos); kfree(state->comm); kfree(state->cmd); @@ -506,14 +875,19 @@ static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) return NULL; for (i = 0; i < l; i++) - buf_itr += snprintf(buf + buf_itr, buffer_size - buf_itr, "%s", + buf_itr += scnprintf(buf + buf_itr, buffer_size - buf_itr, "%s", ascii85_encode(src[i], out)); return buf; } -/* len is expected to be in bytes */ -static void adreno_show_object(struct drm_printer *p, void **ptr, int len, +/* len is expected to be in bytes + * + * WARNING: *ptr should be allocated with kvmalloc or friends. It can be free'd + * with kvfree() and replaced with a newly kvmalloc'd buffer on the first call + * when the unencoded raw data is encoded + */ +void adreno_show_object(struct drm_printer *p, void **ptr, int len, bool *encoded) { if (!*ptr || !len) @@ -563,10 +937,44 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, if (IS_ERR_OR_NULL(state)) return; - drm_printf(p, "revision: %d (%d.%d.%d.%d)\n", - adreno_gpu->info->revn, adreno_gpu->rev.core, - adreno_gpu->rev.major, adreno_gpu->rev.minor, - adreno_gpu->rev.patchid); + drm_printf(p, "revision: %u (%"ADRENO_CHIPID_FMT")\n", + adreno_gpu->info->revn, + ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); + /* + * If this is state collected due to iova fault, so fault related info + * + * TTBR0 would not be zero, so this is a good way to distinguish + */ + if (state->fault_info.ttbr0) { + const struct msm_gpu_fault_info *info = &state->fault_info; + + drm_puts(p, "fault-info:\n"); + drm_printf(p, " - ttbr0=%.16llx\n", info->ttbr0); + drm_printf(p, " - iova=%.16lx\n", info->iova); + drm_printf(p, " - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ"); + drm_printf(p, " - type=%s\n", info->type); + drm_printf(p, " - source=%s\n", info->block); + + /* Information extracted from what we think are the current + * pgtables. Hopefully the TTBR0 matches what we've extracted + * from the SMMU registers in smmu_info! + */ + drm_puts(p, "pgtable-fault-info:\n"); + drm_printf(p, " - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0); + drm_printf(p, " - asid: %d\n", info->asid); + drm_printf(p, " - ptes: %.16llx %.16llx %.16llx %.16llx\n", + info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]); + } + + if (state->vm_logs) { + drm_puts(p, "vm-log:\n"); + for (i = 0; i < state->nr_vm_logs; i++) { + struct msm_gem_vm_log_entry *e = &state->vm_logs[i]; + drm_printf(p, " - %s:%d: 0x%016llx-0x%016llx\n", + e->op, e->queue_id, e->iova, + e->iova + e->range); + } + } drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status); @@ -575,11 +983,11 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, for (i = 0; i < gpu->nr_rings; i++) { drm_printf(p, " - id: %d\n", i); drm_printf(p, " iova: 0x%016llx\n", state->ring[i].iova); - drm_printf(p, " last-fence: %d\n", state->ring[i].seqno); - drm_printf(p, " retired-fence: %d\n", state->ring[i].fence); - drm_printf(p, " rptr: %d\n", state->ring[i].rptr); - drm_printf(p, " wptr: %d\n", state->ring[i].wptr); - drm_printf(p, " size: %d\n", MSM_GPU_RINGBUFFER_SZ); + drm_printf(p, " last-fence: %u\n", state->ring[i].seqno); + drm_printf(p, " retired-fence: %u\n", state->ring[i].fence); + drm_printf(p, " rptr: %u\n", state->ring[i].rptr); + drm_printf(p, " wptr: %u\n", state->ring[i].wptr); + drm_printf(p, " size: %u\n", MSM_GPU_RINGBUFFER_SZ); adreno_show_object(p, &state->ring[i].data, state->ring[i].data_size, &state->ring[i].encoded); @@ -592,6 +1000,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, drm_printf(p, " - iova: 0x%016llx\n", state->bos[i].iova); drm_printf(p, " size: %zd\n", state->bos[i].size); + drm_printf(p, " flags: 0x%x\n", state->bos[i].flags); + drm_printf(p, " name: %-32s\n", state->bos[i].name); adreno_show_object(p, &state->bos[i].data, state->bos[i].size, &state->bos[i].encoded); @@ -621,17 +1031,16 @@ void adreno_dump_info(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i; - printk("revision: %d (%d.%d.%d.%d)\n", - adreno_gpu->info->revn, adreno_gpu->rev.core, - adreno_gpu->rev.major, adreno_gpu->rev.minor, - adreno_gpu->rev.patchid); + printk("revision: %u (%"ADRENO_CHIPID_FMT")\n", + adreno_gpu->info->revn, + ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; printk("rb %d: fence: %d/%d\n", i, ring->memptrs->fence, - ring->seqno); + ring->fctx->last_fence); printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); printk("rb wptr: %d\n", get_wptr(ring)); @@ -679,119 +1088,171 @@ void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) ring->id); } -/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */ -static int adreno_get_legacy_pwrlevels(struct device *dev) +static int adreno_get_pwrlevels(struct device *dev, + struct msm_gpu *gpu) { - struct device_node *child, *node; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + unsigned long freq = ULONG_MAX; + struct dev_pm_opp *opp; int ret; - node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels"); - if (!node) { - DRM_DEV_ERROR(dev, "Could not find the GPU powerlevels\n"); - return -ENXIO; - } + gpu->fast_rate = 0; - for_each_child_of_node(node, child) { - unsigned int val; + /* devm_pm_opp_of_add_table may error out but will still create an OPP table */ + ret = devm_pm_opp_of_add_table(dev); + if (ret == -ENODEV) { + /* Special cases for ancient hw with ancient DT bindings */ + if (adreno_is_a2xx(adreno_gpu)) { + dev_warn(dev, "Unable to find the OPP table. Falling back to 200 MHz.\n"); + dev_pm_opp_add(dev, 200000000, 0); + } else if (adreno_is_a320(adreno_gpu)) { + dev_warn(dev, "Unable to find the OPP table. Falling back to 450 MHz.\n"); + dev_pm_opp_add(dev, 450000000, 0); + } else { + DRM_DEV_ERROR(dev, "Unable to find the OPP table\n"); + return -ENODEV; + } + } else if (ret) { + DRM_DEV_ERROR(dev, "Unable to set the OPP table\n"); + return ret; + } - ret = of_property_read_u32(child, "qcom,gpu-freq", &val); - if (ret) - continue; + /* Find the fastest defined rate */ + opp = dev_pm_opp_find_freq_floor(dev, &freq); + if (IS_ERR(opp)) + return PTR_ERR(opp); - /* - * Skip the intentionally bogus clock value found at the bottom - * of most legacy frequency tables - */ - if (val != 27000000) - dev_pm_opp_add(dev, val, 0); - } + gpu->fast_rate = freq; + dev_pm_opp_put(opp); - of_node_put(node); + DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); return 0; } -static int adreno_get_pwrlevels(struct device *dev, - struct msm_gpu *gpu) +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *adreno_ocmem) { - unsigned long freq = ULONG_MAX; - struct dev_pm_opp *opp; - int ret; - - gpu->fast_rate = 0; + struct ocmem_buf *ocmem_hdl; + struct ocmem *ocmem; + + ocmem = of_get_ocmem(dev); + if (IS_ERR(ocmem)) { + if (PTR_ERR(ocmem) == -ENODEV) { + /* + * Return success since either the ocmem property was + * not specified in device tree, or ocmem support is + * not compiled into the kernel. + */ + return 0; + } - /* You down with OPP? */ - if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) - ret = adreno_get_legacy_pwrlevels(dev); - else { - ret = dev_pm_opp_of_add_table(dev); - if (ret) - DRM_DEV_ERROR(dev, "Unable to set the OPP table\n"); + return PTR_ERR(ocmem); } - if (!ret) { - /* Find the fastest defined rate */ - opp = dev_pm_opp_find_freq_floor(dev, &freq); - if (!IS_ERR(opp)) { - gpu->fast_rate = freq; - dev_pm_opp_put(opp); - } - } + ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->info->gmem); + if (IS_ERR(ocmem_hdl)) + return PTR_ERR(ocmem_hdl); - if (!gpu->fast_rate) { - dev_warn(dev, - "Could not find a clock rate. Using a reasonable default\n"); - /* Pick a suitably safe clock speed for any target */ - gpu->fast_rate = 200000000; - } + adreno_ocmem->ocmem = ocmem; + adreno_ocmem->base = ocmem_hdl->addr; + adreno_ocmem->hdl = ocmem_hdl; - DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); + if (WARN_ON(ocmem_hdl->len != adreno_gpu->info->gmem)) + return -ENOMEM; return 0; } +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) +{ + if (adreno_ocmem && adreno_ocmem->base) + ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS, + adreno_ocmem->hdl); +} + +int adreno_read_speedbin(struct device *dev, u32 *speedbin) +{ + return nvmem_cell_read_variable_le_u32(dev, "speed_bin", speedbin); +} + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs, int nr_rings) { - struct adreno_platform_config *config = pdev->dev.platform_data; + struct device *dev = &pdev->dev; + struct adreno_platform_config *config = dev->platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu *gpu = &adreno_gpu->base; + const char *gpu_name; + u32 speedbin; + int ret; adreno_gpu->funcs = funcs; - adreno_gpu->info = adreno_info(config->rev); - adreno_gpu->gmem = adreno_gpu->info->gmem; - adreno_gpu->revn = adreno_gpu->info->revn; - adreno_gpu->rev = config->rev; + adreno_gpu->info = config->info; + adreno_gpu->chip_id = config->chip_id; - adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; - adreno_gpu_config.irqname = "kgsl_3d0_irq"; + gpu->allow_relocs = config->info->family < ADRENO_6XX_GEN1; + gpu->pdev = pdev; + + /* Only handle the core clock when GMU is not in use (or is absent). */ + if (adreno_has_gmu_wrapper(adreno_gpu) || + adreno_has_rgmu(adreno_gpu) || + adreno_gpu->info->family < ADRENO_6XX_GEN1) { + /* + * This can only be done before devm_pm_opp_of_add_table(), or + * dev_pm_opp_set_config() will WARN_ON() + */ + if (IS_ERR(devm_clk_get(dev, "core"))) { + /* + * If "core" is absent, go for the legacy clock name. + * If we got this far in probing, it's a given one of + * them exists. + */ + devm_pm_opp_set_clkname(dev, "core_clk"); + } else + devm_pm_opp_set_clkname(dev, "core"); + } - adreno_gpu_config.va_start = SZ_16M; - adreno_gpu_config.va_end = 0xffffffff; - /* maximum range of a2xx mmu */ - if (adreno_is_a2xx(adreno_gpu)) - adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K; + if (adreno_read_speedbin(dev, &speedbin) || !speedbin) + speedbin = 0xffff; + adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin); + + gpu_name = devm_kasprintf(dev, GFP_KERNEL, "%"ADRENO_CHIPID_FMT, + ADRENO_CHIPID_ARGS(config->chip_id)); + if (!gpu_name) + return -ENOMEM; + + adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; adreno_gpu_config.nr_rings = nr_rings; - adreno_get_pwrlevels(&pdev->dev, gpu); + ret = adreno_get_pwrlevels(dev, gpu); + if (ret) + return ret; + + init_completion(&adreno_gpu->fault_coredump_done); + complete_all(&adreno_gpu->fault_coredump_done); - pm_runtime_set_autosuspend_delay(&pdev->dev, + pm_runtime_set_autosuspend_delay(dev, adreno_gpu->info->inactive_period); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_use_autosuspend(dev); return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, - adreno_gpu->info->name, &adreno_gpu_config); + gpu_name, &adreno_gpu_config); } void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev ? gpu->dev->dev_private : NULL; unsigned int i; for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); + if (priv && pm_runtime_enabled(&priv->gpu_pdev->dev)) + pm_runtime_disable(&priv->gpu_pdev->dev); + msm_gpu_cleanup(&adreno_gpu->base); } |
