diff options
Diffstat (limited to 'drivers/gpu/drm/panthor')
20 files changed, 1429 insertions, 270 deletions
diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile index 02db21748c12..753a32c446df 100644 --- a/drivers/gpu/drm/panthor/Makefile +++ b/drivers/gpu/drm/panthor/Makefile @@ -10,6 +10,7 @@ panthor-y := \ panthor_heap.o \ panthor_hw.o \ panthor_mmu.o \ + panthor_pwr.o \ panthor_sched.o obj-$(CONFIG_DRM_PANTHOR) += panthor.o diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c index 3686515d368d..2249b41ca4af 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.c +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c @@ -8,6 +8,7 @@ #include <linux/pm_opp.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "panthor_devfreq.h" #include "panthor_device.h" @@ -62,7 +63,6 @@ static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq) static int panthor_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { - struct panthor_device *ptdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; int err; @@ -72,8 +72,6 @@ static int panthor_devfreq_target(struct device *dev, unsigned long *freq, dev_pm_opp_put(opp); err = dev_pm_opp_set_rate(dev, *freq); - if (!err) - ptdev->current_frequency = *freq; return err; } @@ -115,11 +113,21 @@ static int panthor_devfreq_get_dev_status(struct device *dev, return 0; } +static int panthor_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct panthor_device *ptdev = dev_get_drvdata(dev); + + *freq = clk_get_rate(ptdev->clks.core); + + return 0; +} + static struct devfreq_dev_profile panthor_devfreq_profile = { .timer = DEVFREQ_TIMER_DELAYED, .polling_ms = 50, /* ~3 frames */ .target = panthor_devfreq_target, .get_dev_status = panthor_devfreq_get_dev_status, + .get_cur_freq = panthor_devfreq_get_cur_freq, }; int panthor_devfreq_init(struct panthor_device *ptdev) @@ -134,6 +142,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev) struct thermal_cooling_device *cooling; struct device *dev = ptdev->base.dev; struct panthor_devfreq *pdevfreq; + struct opp_table *table; struct dev_pm_opp *opp; unsigned long cur_freq; unsigned long freq = ULONG_MAX; @@ -145,18 +154,30 @@ int panthor_devfreq_init(struct panthor_device *ptdev) ptdev->devfreq = pdevfreq; - ret = devm_pm_opp_set_regulators(dev, reg_names); - if (ret) { - if (ret != -EPROBE_DEFER) - DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); - - return ret; + /* + * The power domain associated with the GPU may have already added an + * OPP table, complete with OPPs, as part of the platform bus + * initialization. If this is the case, the power domain is in charge of + * also controlling the performance, with a set_performance callback. + * Only add a new OPP table from DT if there isn't such a table present + * already. + */ + table = dev_pm_opp_get_opp_table(dev); + if (IS_ERR_OR_NULL(table)) { + ret = devm_pm_opp_set_regulators(dev, reg_names); + if (ret && ret != -ENODEV) { + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); + return ret; + } + + ret = devm_pm_opp_of_add_table(dev); + if (ret) + return ret; + } else { + dev_pm_opp_put_opp_table(table); } - ret = devm_pm_opp_of_add_table(dev); - if (ret) - return ret; - spin_lock_init(&pdevfreq->lock); panthor_devfreq_reset(pdevfreq); @@ -198,7 +219,6 @@ int panthor_devfreq_init(struct panthor_device *ptdev) return PTR_ERR(opp); panthor_devfreq_profile.initial_freq = cur_freq; - ptdev->current_frequency = cur_freq; /* * Set the recommend OPP this will enable and configure the regulator @@ -296,3 +316,19 @@ void panthor_devfreq_record_idle(struct panthor_device *ptdev) spin_unlock_irqrestore(&pdevfreq->lock, irqflags); } + +unsigned long panthor_devfreq_get_freq(struct panthor_device *ptdev) +{ + struct panthor_devfreq *pdevfreq = ptdev->devfreq; + unsigned long freq = 0; + int ret; + + if (!pdevfreq->devfreq) + return 0; + + ret = pdevfreq->devfreq->profile->get_cur_freq(ptdev->base.dev, &freq); + if (ret) + return 0; + + return freq; +} diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h index b7631de695f7..f8e29e02f66c 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.h +++ b/drivers/gpu/drm/panthor/panthor_devfreq.h @@ -18,4 +18,6 @@ void panthor_devfreq_suspend(struct panthor_device *ptdev); void panthor_devfreq_record_busy(struct panthor_device *ptdev); void panthor_devfreq_record_idle(struct panthor_device *ptdev); +unsigned long panthor_devfreq_get_freq(struct panthor_device *ptdev); + #endif /* __PANTHOR_DEVFREQ_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 81df49880bd8..e133b1e0ad6d 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -13,6 +13,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "panthor_devfreq.h" #include "panthor_device.h" @@ -20,6 +21,7 @@ #include "panthor_gpu.h" #include "panthor_hw.h" #include "panthor_mmu.h" +#include "panthor_pwr.h" #include "panthor_regs.h" #include "panthor_sched.h" @@ -65,6 +67,16 @@ static int panthor_clk_init(struct panthor_device *ptdev) return 0; } +static int panthor_init_power(struct device *dev) +{ + struct dev_pm_domain_list *pd_list = NULL; + + if (dev->pm_domain) + return 0; + + return devm_pm_domain_attach_list(dev, NULL, &pd_list); +} + void panthor_device_unplug(struct panthor_device *ptdev) { /* This function can be called from two different path: the reset work @@ -83,6 +95,8 @@ void panthor_device_unplug(struct panthor_device *ptdev) return; } + drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0); + /* Call drm_dev_unplug() so any access to HW blocks happening after * that point get rejected. */ @@ -93,8 +107,6 @@ void panthor_device_unplug(struct panthor_device *ptdev) */ mutex_unlock(&ptdev->unplug.lock); - drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0); - /* Now, try to cleanly shutdown the GPU before the device resources * get reclaimed. */ @@ -102,6 +114,7 @@ void panthor_device_unplug(struct panthor_device *ptdev) panthor_fw_unplug(ptdev); panthor_mmu_unplug(ptdev); panthor_gpu_unplug(ptdev); + panthor_pwr_unplug(ptdev); pm_runtime_dont_use_autosuspend(ptdev->base.dev); pm_runtime_put_sync_suspend(ptdev->base.dev); @@ -120,7 +133,7 @@ static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data) { struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); - cancel_work_sync(&ptdev->reset.work); + disable_work_sync(&ptdev->reset.work); destroy_workqueue(ptdev->reset.wq); } @@ -141,8 +154,8 @@ static void panthor_device_reset_work(struct work_struct *work) panthor_sched_pre_reset(ptdev); panthor_fw_pre_reset(ptdev, true); panthor_mmu_pre_reset(ptdev); - panthor_gpu_soft_reset(ptdev); - panthor_gpu_l2_power_on(ptdev); + panthor_hw_soft_reset(ptdev); + panthor_hw_l2_power_on(ptdev); panthor_mmu_post_reset(ptdev); ret = panthor_fw_post_reset(ptdev); atomic_set(&ptdev->reset.pending, 0); @@ -172,6 +185,8 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; + ptdev->soc_data = of_device_get_match_data(ptdev->base.dev); + init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); if (ret) @@ -219,6 +234,12 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) return ret; + ret = panthor_init_power(ptdev->base.dev); + if (ret < 0) { + drm_err(&ptdev->base, "init power domains failed, ret=%d", ret); + return ret; + } + ret = panthor_devfreq_init(ptdev); if (ret) return ret; @@ -249,10 +270,14 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; - ret = panthor_gpu_init(ptdev); + ret = panthor_pwr_init(ptdev); if (ret) goto err_rpm_put; + ret = panthor_gpu_init(ptdev); + if (ret) + goto err_unplug_pwr; + ret = panthor_gpu_coherency_init(ptdev); if (ret) goto err_unplug_gpu; @@ -293,6 +318,9 @@ err_unplug_mmu: err_unplug_gpu: panthor_gpu_unplug(ptdev); +err_unplug_pwr: + panthor_pwr_unplug(ptdev); + err_rpm_put: pm_runtime_put_sync_suspend(ptdev->base.dev); return ret; @@ -446,6 +474,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev) { int ret; + panthor_pwr_resume(ptdev); panthor_gpu_resume(ptdev); panthor_mmu_resume(ptdev); @@ -455,6 +484,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev) panthor_mmu_suspend(ptdev); panthor_gpu_suspend(ptdev); + panthor_pwr_suspend(ptdev); return ret; } @@ -568,6 +598,7 @@ int panthor_device_suspend(struct device *dev) panthor_fw_suspend(ptdev); panthor_mmu_suspend(ptdev); panthor_gpu_suspend(ptdev); + panthor_pwr_suspend(ptdev); drm_dev_exit(cookie); } diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index 4fc7cf2aeed5..f35e52b9546a 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -24,14 +24,27 @@ struct panthor_device; struct panthor_gpu; struct panthor_group_pool; struct panthor_heap_pool; +struct panthor_hw; struct panthor_job; struct panthor_mmu; struct panthor_fw; struct panthor_perfcnt; +struct panthor_pwr; struct panthor_vm; struct panthor_vm_pool; /** + * struct panthor_soc_data - Panthor SoC Data + */ +struct panthor_soc_data { + /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */ + bool asn_hash_enable; + + /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */ + u32 asn_hash[3]; +}; + +/** * enum panthor_device_pm_state - PM state */ enum panthor_device_pm_state { @@ -93,6 +106,9 @@ struct panthor_device { /** @base: Base drm_device. */ struct drm_device base; + /** @soc_data: Optional SoC data. */ + const struct panthor_soc_data *soc_data; + /** @phys_addr: Physical address of the iomem region. */ phys_addr_t phys_addr; @@ -120,6 +136,12 @@ struct panthor_device { /** @csif_info: Command stream interface information. */ struct drm_panthor_csif_info csif_info; + /** @hw: GPU-specific data. */ + struct panthor_hw *hw; + + /** @pwr: Power control management data. */ + struct panthor_pwr *pwr; + /** @gpu: GPU management data. */ struct panthor_gpu *gpu; @@ -200,9 +222,6 @@ struct panthor_device { /** @profile_mask: User-set profiling flags for job accounting. */ u32 profile_mask; - /** @current_frequency: Device clock frequency at present. Set by DVFS*/ - unsigned long current_frequency; - /** @fast_rate: Maximum device clock frequency. Set by DVFS */ unsigned long fast_rate; diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 4c202fc5ce05..d1d4c50da5bf 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -20,11 +20,13 @@ #include <drm/drm_drv.h> #include <drm/drm_exec.h> #include <drm/drm_ioctl.h> +#include <drm/drm_print.h> #include <drm/drm_syncobj.h> #include <drm/drm_utils.h> #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> +#include "panthor_devfreq.h" #include "panthor_device.h" #include "panthor_fw.h" #include "panthor_gem.h" @@ -1105,7 +1107,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, if (ret) goto out; - ret = panthor_group_create(pfile, args, queue_args); + ret = panthor_group_create(pfile, args, queue_args, file->client_id); if (ret < 0) goto out; args->group_handle = ret; @@ -1519,7 +1521,8 @@ static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev, drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles); drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate); - drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency); + drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", + panthor_devfreq_get_freq(ptdev)); } static void panthor_show_internal_memory_stats(struct drm_printer *p, struct drm_file *file) @@ -1682,7 +1685,13 @@ static struct attribute *panthor_attrs[] = { ATTRIBUTE_GROUPS(panthor); +static const struct panthor_soc_data soc_data_mediatek_mt8196 = { + .asn_hash_enable = true, + .asn_hash = { 0xb, 0xe, 0x0, }, +}; + static const struct of_device_id dt_match[] = { + { .compatible = "mediatek,mt8196-mali", .data = &soc_data_mediatek_mt8196, }, { .compatible = "rockchip,rk3588-mali" }, { .compatible = "arm,mali-valhall-csf" }, {} diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index df767e82148a..1a5e3c1a27fb 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -16,11 +16,13 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "panthor_device.h" #include "panthor_fw.h" #include "panthor_gem.h" #include "panthor_gpu.h" +#include "panthor_hw.h" #include "panthor_mmu.h" #include "panthor_regs.h" #include "panthor_sched.h" @@ -32,6 +34,7 @@ #define PROGRESS_TIMEOUT_SCALE_SHIFT 10 #define IDLE_HYSTERESIS_US 800 #define PWROFF_HYSTERESIS_US 10000 +#define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC) /** * struct panthor_fw_binary_hdr - Firmware binary header. @@ -316,6 +319,49 @@ panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) return &ptdev->fw->iface.streams[csg_slot][cs_slot]; } +static bool panthor_fw_has_glb_state(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0); +} + +static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0); +} + +u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + return csg_iface->input->endpoint_req2; + else + return csg_iface->input->endpoint_req; +} + +void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + csg_iface->input->endpoint_req2 = value; + else + csg_iface->input->endpoint_req = lower_32_bits(value); +} + +void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value, + u64 mask) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask); + else + panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value), + lower_32_bits(mask)); +} + /** * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count * @ptdev: Device. @@ -995,6 +1041,9 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev) GLB_IDLE_EN | GLB_IDLE; + if (panthor_fw_has_glb_state(ptdev)) + glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; + panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_CFG_ALLOC_EN | @@ -1068,6 +1117,54 @@ static void panthor_fw_stop(struct panthor_device *ptdev) drm_err(&ptdev->base, "Failed to stop MCU"); } +static bool panthor_fw_mcu_halted(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + bool halted; + + halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT; + + if (panthor_fw_has_glb_state(ptdev)) + halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT); + + return halted; +} + +static void panthor_fw_halt_mcu(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + if (panthor_fw_has_glb_state(ptdev)) + panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK); + else + panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); + + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); +} + +static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev) +{ + bool halted = false; + + if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10, + MCU_HALT_TIMEOUT_US, 0, ptdev)) { + drm_warn(&ptdev->base, "Timed out waiting for MCU to halt"); + return false; + } + + return true; +} + +static void panthor_fw_mcu_set_active(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + if (panthor_fw_has_glb_state(ptdev)) + panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK); + else + panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); +} + /** * panthor_fw_pre_reset() - Call before a reset. * @ptdev: Device. @@ -1084,19 +1181,13 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) ptdev->reset.fast = false; if (!on_hang) { - struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - u32 status; - - panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); - gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); - if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status, - status == MCU_STATUS_HALT, 10, - 100000)) { - ptdev->reset.fast = true; - } else { + panthor_fw_halt_mcu(ptdev); + if (!panthor_fw_wait_mcu_halted(ptdev)) drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); - } + else + ptdev->reset.fast = true; } + panthor_fw_stop(ptdev); panthor_job_irq_suspend(&ptdev->fw->irq); panthor_fw_stop(ptdev); @@ -1125,14 +1216,14 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) */ panthor_reload_fw_sections(ptdev, true); } else { - /* The FW detects 0 -> 1 transitions. Make sure we reset - * the HALT bit before the FW is rebooted. + /* + * If the FW was previously successfully halted in the pre-reset + * operation, we need to transition it to active again before + * the FW is rebooted. * This is not needed on a slow reset because FW sections are * re-initialized. */ - struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - - panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); + panthor_fw_mcu_set_active(ptdev); } ret = panthor_fw_start(ptdev); @@ -1163,13 +1254,17 @@ void panthor_fw_unplug(struct panthor_device *ptdev) { struct panthor_fw_section *section; - cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); + disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work); if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { /* Make sure the IRQ handler cannot be called after that point. */ if (ptdev->fw->irq.irq) panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_halt_mcu(ptdev); + if (!panthor_fw_wait_mcu_halted(ptdev)) + drm_warn(&ptdev->base, "Failed to halt MCU on unplug"); + panthor_fw_stop(ptdev); } @@ -1185,7 +1280,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev) ptdev->fw->vm = NULL; if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) - panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); + panthor_hw_l2_power_off(ptdev); } /** @@ -1364,7 +1459,7 @@ int panthor_fw_init(struct panthor_device *ptdev) return ret; } - ret = panthor_gpu_l2_power_on(ptdev); + ret = panthor_hw_l2_power_on(ptdev); if (ret) return ret; @@ -1408,3 +1503,4 @@ MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin"); diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h index 6598d96c6d2a..fbdc21469ba3 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.h +++ b/drivers/gpu/drm/panthor/panthor_fw.h @@ -167,10 +167,11 @@ struct panthor_fw_csg_input_iface { #define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) #define CSG_EP_REQ_EXCL_COMPUTE BIT(20) #define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) -#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) #define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) +#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK) +#define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28) u32 endpoint_req; - u32 reserved2[2]; + u64 endpoint_req2; u64 suspend_buf; u64 protm_suspend_buf; u32 config; @@ -214,6 +215,13 @@ struct panthor_fw_global_input_iface { #define GLB_FWCFG_UPDATE BIT(9) #define GLB_IDLE_EN BIT(10) #define GLB_SLEEP BIT(12) +#define GLB_STATE_MASK GENMASK(14, 12) +#define GLB_STATE_ACTIVE 0 +#define GLB_STATE_HALT 1 +#define GLB_STATE_SLEEP 2 +#define GLB_STATE_SUSPEND 3 +#define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK) +#define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12) #define GLB_INACTIVE_COMPUTE BIT(20) #define GLB_INACTIVE_FRAGMENT BIT(21) #define GLB_INACTIVE_TILER BIT(22) @@ -457,6 +465,16 @@ struct panthor_fw_global_iface { spin_unlock(&(__iface)->lock); \ } while (0) +#define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \ + do { \ + u64 __cur_val, __new_val; \ + spin_lock(&(__iface)->lock); \ + __cur_val = READ_ONCE((__iface)->input->__in_reg); \ + __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ + WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ + spin_unlock(&(__iface)->lock); \ + } while (0) + struct panthor_fw_global_iface * panthor_fw_get_glb_iface(struct panthor_device *ptdev); @@ -466,6 +484,16 @@ panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); struct panthor_fw_cs_iface * panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); +u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface); + +void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value); + +void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value, + u64 mask); + int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, u32 *acked, u32 timeout_ms); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 3f43686f0195..fbde78db270a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -8,6 +8,7 @@ #include <linux/err.h> #include <linux/slab.h> +#include <drm/drm_print.h> #include <drm/panthor_drm.h> #include "panthor_device.h" @@ -86,7 +87,6 @@ static void panthor_gem_free_object(struct drm_gem_object *obj) void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo) { struct panthor_vm *vm; - int ret; if (IS_ERR_OR_NULL(bo)) return; @@ -94,18 +94,11 @@ void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo) vm = bo->vm; panthor_kernel_bo_vunmap(bo); - if (drm_WARN_ON(bo->obj->dev, - to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm))) - goto out_free_bo; - - ret = panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size); - if (ret) - goto out_free_bo; - + drm_WARN_ON(bo->obj->dev, + to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)); + panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size); panthor_vm_free_va(vm, &bo->va_node); drm_gem_object_put(bo->obj); - -out_free_bo: panthor_vm_put(vm); kfree(bo); } @@ -152,6 +145,9 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, bo = to_panthor_bo(&obj->base); kbo->obj = &obj->base; bo->flags = bo_flags; + bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); + drm_gem_object_get(bo->exclusive_vm_root_gem); + bo->base.base.resv = bo->exclusive_vm_root_gem->resv; if (vm == panthor_fw_vm(ptdev)) debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED; @@ -175,9 +171,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, goto err_free_va; kbo->vm = panthor_vm_get(vm); - bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); - drm_gem_object_get(bo->exclusive_vm_root_gem); - bo->base.base.resv = bo->exclusive_vm_root_gem->resv; return kbo; err_free_va: diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index db69449a5be0..06b231b2460a 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -15,9 +15,11 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "panthor_device.h" #include "panthor_gpu.h" +#include "panthor_hw.h" #include "panthor_regs.h" /** @@ -52,6 +54,28 @@ static void panthor_gpu_coherency_set(struct panthor_device *ptdev) ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); } +static void panthor_gpu_l2_config_set(struct panthor_device *ptdev) +{ + const struct panthor_soc_data *data = ptdev->soc_data; + u32 l2_config; + u32 i; + + if (!data || !data->asn_hash_enable) + return; + + if (GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) < 11) { + drm_err(&ptdev->base, "Custom ASN hash not supported by the device"); + return; + } + + for (i = 0; i < ARRAY_SIZE(data->asn_hash); i++) + gpu_write(ptdev, GPU_ASN_HASH(i), data->asn_hash[i]); + + l2_config = gpu_read(ptdev, GPU_L2_CONFIG); + l2_config |= GPU_L2_CONFIG_ASN_HASH_ENABLE; + gpu_write(ptdev, GPU_L2_CONFIG, l2_config); +} + static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) { gpu_write(ptdev, GPU_INT_CLEAR, status); @@ -218,6 +242,11 @@ int panthor_gpu_block_power_on(struct panthor_device *ptdev, return 0; } +void panthor_gpu_l2_power_off(struct panthor_device *ptdev) +{ + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); +} + /** * panthor_gpu_l2_power_on() - Power-on the L2-cache * @ptdev: Device. @@ -241,8 +270,9 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev) hweight64(ptdev->gpu_info.shader_present)); } - /* Set the desired coherency mode before the power up of L2 */ + /* Set the desired coherency mode and L2 config before the power up of L2 */ panthor_gpu_coherency_set(ptdev); + panthor_gpu_l2_config_set(ptdev); return panthor_gpu_power_on(ptdev, L2, 1, 20000); } @@ -344,9 +374,9 @@ void panthor_gpu_suspend(struct panthor_device *ptdev) { /* On a fast reset, simply power down the L2. */ if (!ptdev->reset.fast) - panthor_gpu_soft_reset(ptdev); + panthor_hw_soft_reset(ptdev); else - panthor_gpu_power_off(ptdev, L2, 1, 20000); + panthor_hw_l2_power_off(ptdev); panthor_gpu_irq_suspend(&ptdev->gpu->irq); } @@ -361,6 +391,6 @@ void panthor_gpu_suspend(struct panthor_device *ptdev) void panthor_gpu_resume(struct panthor_device *ptdev) { panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); - panthor_gpu_l2_power_on(ptdev); + panthor_hw_l2_power_on(ptdev); } diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h index 7c17a8c06858..12e66f48ced1 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.h +++ b/drivers/gpu/drm/panthor/panthor_gpu.h @@ -46,6 +46,7 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev, type ## _PWRTRANS, \ mask, timeout_us) +void panthor_gpu_l2_power_off(struct panthor_device *ptdev); int panthor_gpu_l2_power_on(struct panthor_device *ptdev); int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other); diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c index d236e9ceade4..0b6ff4c0a11b 100644 --- a/drivers/gpu/drm/panthor/panthor_heap.c +++ b/drivers/gpu/drm/panthor/panthor_heap.c @@ -4,6 +4,7 @@ #include <linux/iosys-map.h> #include <linux/rwsem.h> +#include <drm/drm_print.h> #include <drm/panthor_drm.h> #include "panthor_device.h" diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c index 4f2858114e5e..87ebb7ae42c4 100644 --- a/drivers/gpu/drm/panthor/panthor_hw.c +++ b/drivers/gpu/drm/panthor/panthor_hw.c @@ -1,13 +1,58 @@ // SPDX-License-Identifier: GPL-2.0 or MIT /* Copyright 2025 ARM Limited. All rights reserved. */ +#include <drm/drm_print.h> + #include "panthor_device.h" +#include "panthor_gpu.h" #include "panthor_hw.h" +#include "panthor_pwr.h" #include "panthor_regs.h" #define GPU_PROD_ID_MAKE(arch_major, prod_major) \ (((arch_major) << 24) | (prod_major)) +/** struct panthor_hw_entry - HW arch major to panthor_hw binding entry */ +struct panthor_hw_entry { + /** @arch_min: Minimum supported architecture major value (inclusive) */ + u8 arch_min; + + /** @arch_max: Maximum supported architecture major value (inclusive) */ + u8 arch_max; + + /** @hwdev: Pointer to panthor_hw structure */ + struct panthor_hw *hwdev; +}; + +static struct panthor_hw panthor_hw_arch_v10 = { + .ops = { + .soft_reset = panthor_gpu_soft_reset, + .l2_power_off = panthor_gpu_l2_power_off, + .l2_power_on = panthor_gpu_l2_power_on, + }, +}; + +static struct panthor_hw panthor_hw_arch_v14 = { + .ops = { + .soft_reset = panthor_pwr_reset_soft, + .l2_power_off = panthor_pwr_l2_power_off, + .l2_power_on = panthor_pwr_l2_power_on, + }, +}; + +static struct panthor_hw_entry panthor_hw_match[] = { + { + .arch_min = 10, + .arch_max = 13, + .hwdev = &panthor_hw_arch_v10, + }, + { + .arch_min = 14, + .arch_max = 14, + .hwdev = &panthor_hw_arch_v14, + }, +}; + static char *get_gpu_model_name(struct panthor_device *ptdev) { const u32 gpu_id = ptdev->gpu_info.gpu_id; @@ -53,6 +98,12 @@ static char *get_gpu_model_name(struct panthor_device *ptdev) fallthrough; case GPU_PROD_ID_MAKE(13, 1): return "Mali-G625"; + case GPU_PROD_ID_MAKE(14, 0): + return "Mali-G1-Ultra"; + case GPU_PROD_ID_MAKE(14, 1): + return "Mali-G1-Premium"; + case GPU_PROD_ID_MAKE(14, 3): + return "Mali-G1-Pro"; } return "(Unknown Mali GPU)"; @@ -62,7 +113,6 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev) { unsigned int i; - ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); @@ -80,12 +130,19 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev) ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); - ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); - ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); - ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); - /* Introduced in arch 11.x */ ptdev->gpu_info.gpu_features = gpu_read64(ptdev, GPU_FEATURES); + + if (panthor_hw_has_pwr_ctrl(ptdev)) { + /* Introduced in arch 14.x */ + ptdev->gpu_info.l2_present = gpu_read64(ptdev, PWR_L2_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, PWR_TILER_PRESENT); + ptdev->gpu_info.shader_present = gpu_read64(ptdev, PWR_SHADER_PRESENT); + } else { + ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); + ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); + } } static void panthor_hw_info_init(struct panthor_device *ptdev) @@ -117,8 +174,50 @@ static void panthor_hw_info_init(struct panthor_device *ptdev) ptdev->gpu_info.tiler_present); } +static int panthor_hw_bind_device(struct panthor_device *ptdev) +{ + struct panthor_hw *hdev = NULL; + const u32 arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); + int i = 0; + + for (i = 0; i < ARRAY_SIZE(panthor_hw_match); i++) { + struct panthor_hw_entry *entry = &panthor_hw_match[i]; + + if (arch_major >= entry->arch_min && arch_major <= entry->arch_max) { + hdev = entry->hwdev; + break; + } + } + + if (!hdev) + return -EOPNOTSUPP; + + ptdev->hw = hdev; + + return 0; +} + +static int panthor_hw_gpu_id_init(struct panthor_device *ptdev) +{ + ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); + if (!ptdev->gpu_info.gpu_id) + return -ENXIO; + + return 0; +} + int panthor_hw_init(struct panthor_device *ptdev) { + int ret = 0; + + ret = panthor_hw_gpu_id_init(ptdev); + if (ret) + return ret; + + ret = panthor_hw_bind_device(ptdev); + if (ret) + return ret; + panthor_hw_info_init(ptdev); return 0; diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h index 0af6acc6aa6a..56c68c1e9c26 100644 --- a/drivers/gpu/drm/panthor/panthor_hw.h +++ b/drivers/gpu/drm/panthor/panthor_hw.h @@ -4,8 +4,53 @@ #ifndef __PANTHOR_HW_H__ #define __PANTHOR_HW_H__ -struct panthor_device; +#include "panthor_device.h" +#include "panthor_regs.h" + +/** + * struct panthor_hw_ops - HW operations that are specific to a GPU + */ +struct panthor_hw_ops { + /** @soft_reset: Soft reset function pointer */ + int (*soft_reset)(struct panthor_device *ptdev); + + /** @l2_power_off: L2 power off function pointer */ + void (*l2_power_off)(struct panthor_device *ptdev); + + /** @l2_power_on: L2 power on function pointer */ + int (*l2_power_on)(struct panthor_device *ptdev); +}; + +/** + * struct panthor_hw - GPU specific register mapping and functions + */ +struct panthor_hw { + /** @features: Bitmap containing panthor_hw_feature */ + + /** @ops: Panthor HW specific operations */ + struct panthor_hw_ops ops; +}; int panthor_hw_init(struct panthor_device *ptdev); +static inline int panthor_hw_soft_reset(struct panthor_device *ptdev) +{ + return ptdev->hw->ops.soft_reset(ptdev); +} + +static inline int panthor_hw_l2_power_on(struct panthor_device *ptdev) +{ + return ptdev->hw->ops.l2_power_on(ptdev); +} + +static inline void panthor_hw_l2_power_off(struct panthor_device *ptdev) +{ + ptdev->hw->ops.l2_power_off(ptdev); +} + +static inline bool panthor_hw_has_pwr_ctrl(struct panthor_device *ptdev) +{ + return GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) >= 14; +} + #endif /* __PANTHOR_HW_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 7870e7dbaa5d..d4839d282689 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -7,6 +7,7 @@ #include <drm/drm_exec.h> #include <drm/drm_gpuvm.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> @@ -181,20 +182,6 @@ struct panthor_vm_op_ctx { u64 range; } va; - /** - * @returned_vmas: List of panthor_vma objects returned after a VM operation. - * - * For unmap operations, this will contain all VMAs that were covered by the - * specified VA range. - * - * For map operations, this will contain all VMAs that previously mapped to - * the specified VA range. - * - * Those VMAs, and the resources they point to will be released as part of - * the op_ctx cleanup operation. - */ - struct list_head returned_vmas; - /** @map: Fields specific to a map operation. */ struct { /** @map.vm_bo: Buffer object to map. */ @@ -917,10 +904,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; struct io_pgtable_ops *ops = vm->pgtbl_ops; + u64 start_iova = iova; u64 offset = 0; - drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); - while (offset < size) { size_t unmapped_sz = 0, pgcount; size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); @@ -935,6 +921,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) panthor_vm_flush_range(vm, iova, offset + unmapped_sz); return -EINVAL; } + + drm_dbg(&ptdev->base, + "unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, size, iova + offset, + unmapped_sz / pgsize, pgsize); + offset += unmapped_sz; } @@ -950,6 +942,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, struct scatterlist *sgl; struct io_pgtable_ops *ops = vm->pgtbl_ops; u64 start_iova = iova; + u64 start_size = size; int ret; if (!size) @@ -969,15 +962,18 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, len = min_t(size_t, len, size); size -= len; - drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", - vm->as.id, iova, &paddr, len); - while (len) { size_t pgcount, mapped = 0; size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_KERNEL, &mapped); + + drm_dbg(&ptdev->base, + "map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, start_size, iova, &paddr, + mapped / pgsize, pgsize); + iova += mapped; paddr += mapped; len -= mapped; @@ -1081,47 +1077,18 @@ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) mutex_unlock(&vm->mm_lock); } -static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) +static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo) { struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); - struct drm_gpuvm *vm = vm_bo->vm; - bool unpin; - /* We must retain the GEM before calling drm_gpuvm_bo_put(), - * otherwise the mutex might be destroyed while we hold it. - * Same goes for the VM, since we take the VM resv lock. - */ - drm_gem_object_get(&bo->base.base); - drm_gpuvm_get(vm); - - /* We take the resv lock to protect against concurrent accesses to the - * gpuvm evicted/extobj lists that are modified in - * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put() - * releases sthe last vm_bo reference. - * We take the BO GPUVA list lock to protect the vm_bo removal from the - * GEM vm_bo list. - */ - dma_resv_lock(drm_gpuvm_resv(vm), NULL); - mutex_lock(&bo->base.base.gpuva.lock); - unpin = drm_gpuvm_bo_put(vm_bo); - mutex_unlock(&bo->base.base.gpuva.lock); - dma_resv_unlock(drm_gpuvm_resv(vm)); - - /* If the vm_bo object was destroyed, release the pin reference that - * was hold by this object. - */ - if (unpin && !drm_gem_is_imported(&bo->base.base)) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); - - drm_gpuvm_put(vm); - drm_gem_object_put(&bo->base.base); + kfree(vm_bo); } static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, struct panthor_vm *vm) { - struct panthor_vma *vma, *tmp_vma; - u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - op_ctx->rsvd_page_tables.ptr; @@ -1134,16 +1101,26 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, kfree(op_ctx->rsvd_page_tables.pages); if (op_ctx->map.vm_bo) - panthor_vm_bo_put(op_ctx->map.vm_bo); + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) kfree(op_ctx->preallocated_vmas[i]); - list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { - list_del(&vma->node); - panthor_vm_bo_put(vma->base.vm_bo); - kfree(vma); + drm_gpuvm_bo_deferred_cleanup(&vm->base); +} + +static void +panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx, + struct panthor_vma *vma) +{ + for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { + if (!op_ctx->preallocated_vmas[i]) { + op_ctx->preallocated_vmas[i] = vma; + return; + } } + + WARN_ON_ONCE(1); } static struct panthor_vma * @@ -1236,7 +1213,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, return -EINVAL; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = flags; op_ctx->va.range = size; op_ctx->va.addr = va; @@ -1247,7 +1223,9 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, if (!drm_gem_is_imported(&bo->base.base)) { /* Pre-reserve the BO pages, so the map operation doesn't have to - * allocate. + * allocate. This pin is dropped in panthor_vm_bo_free(), so + * once we have successfully called drm_gpuvm_bo_create(), + * GPUVM will take care of dropping the pin for us. */ ret = drm_gem_shmem_pin(&bo->base); if (ret) @@ -1286,16 +1264,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(panthor_vm_resv(vm)); - /* If the a vm_bo for this <VM,BO> combination exists, it already - * retains a pin ref, and we can release the one we took earlier. - * - * If our pre-allocated vm_bo is picked, it now retains the pin ref, - * which will be released in panthor_vm_bo_put(). - */ - if (preallocated_vm_bo != op_ctx->map.vm_bo && - !drm_gem_is_imported(&bo->base.base)) - drm_gem_shmem_unpin(&bo->base); - op_ctx->map.bo_offset = offset; /* L1, L2 and L3 page tables. @@ -1343,7 +1311,6 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, int ret; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->va.range = size; op_ctx->va.addr = va; op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; @@ -1391,7 +1358,6 @@ static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx struct panthor_vm *vm) { memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; } @@ -2037,26 +2003,13 @@ static void panthor_vma_link(struct panthor_vm *vm, mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_link(&vma->base, vm_bo); - drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); mutex_unlock(&bo->base.base.gpuva.lock); } -static void panthor_vma_unlink(struct panthor_vm *vm, - struct panthor_vma *vma) +static void panthor_vma_unlink(struct panthor_vma *vma) { - struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); - - mutex_lock(&bo->base.base.gpuva.lock); - drm_gpuva_unlink(&vma->base); - mutex_unlock(&bo->base.base.gpuva.lock); - - /* drm_gpuva_unlink() release the vm_bo, but we manually retained it - * when entering this function, so we can implement deferred VMA - * destruction. Re-assign it here. - */ - vma->base.vm_bo = vm_bo; - list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); + drm_gpuva_unlink_defer(&vma->base); + kfree(vma); } static void panthor_vma_init(struct panthor_vma *vma, u32 flags) @@ -2085,15 +2038,17 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), op_ctx->map.sgt, op->map.gem.offset, op->map.va.range); - if (ret) + if (ret) { + panthor_vm_op_ctx_return_vma(op_ctx, vma); return ret; + } - /* Ref owned by the mapping now, clear the obj field so we don't release the - * pinning/obj ref behind GPUVA's back. - */ drm_gpuva_map(&vm->base, &vma->base, &op->map); panthor_vma_link(vm, vma, op_ctx->map.vm_bo); + + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); op_ctx->map.vm_bo = NULL; + return 0; } @@ -2132,16 +2087,14 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, * owned by the old mapping which will be released when this * mapping is destroyed, we need to grab a ref here. */ - panthor_vma_link(vm, prev_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, prev_vma, op->remap.unmap->va->vm_bo); } if (next_vma) { - panthor_vma_link(vm, next_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, next_vma, op->remap.unmap->va->vm_bo); } - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } @@ -2158,12 +2111,13 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, return ret; drm_gpuva_unmap(&op->unmap); - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } static const struct drm_gpuvm_ops panthor_gpuvm_ops = { .vm_free = panthor_vm_free, + .vm_bo_free = panthor_vm_bo_free, .sm_step_map = panthor_gpuva_sm_step_map, .sm_step_remap = panthor_gpuva_sm_step_remap, .sm_step_unmap = panthor_gpuva_sm_step_unmap, diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c new file mode 100644 index 000000000000..57cfc7ce715b --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_pwr.c @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/cleanup.h> +#include <linux/iopoll.h> +#include <linux/wait.h> + +#include <drm/drm_managed.h> +#include <drm/drm_print.h> + +#include "panthor_device.h" +#include "panthor_hw.h" +#include "panthor_pwr.h" +#include "panthor_regs.h" + +#define PWR_INTERRUPTS_MASK \ + (PWR_IRQ_POWER_CHANGED_SINGLE | \ + PWR_IRQ_POWER_CHANGED_ALL | \ + PWR_IRQ_DELEGATION_CHANGED | \ + PWR_IRQ_RESET_COMPLETED | \ + PWR_IRQ_RETRACT_COMPLETED | \ + PWR_IRQ_INSPECT_COMPLETED | \ + PWR_IRQ_COMMAND_NOT_ALLOWED | \ + PWR_IRQ_COMMAND_INVALID) + +#define PWR_ALL_CORES_MASK GENMASK_U64(63, 0) + +#define PWR_DOMAIN_MAX_BITS 16 + +#define PWR_TRANSITION_TIMEOUT_US (2ULL * USEC_PER_SEC) + +#define PWR_RETRACT_TIMEOUT_US (2ULL * USEC_PER_MSEC) + +#define PWR_RESET_TIMEOUT_MS 500 + +/** + * struct panthor_pwr - PWR_CONTROL block management data. + */ +struct panthor_pwr { + /** @irq: PWR irq. */ + struct panthor_irq irq; + + /** @reqs_lock: Lock protecting access to pending_reqs. */ + spinlock_t reqs_lock; + + /** @pending_reqs: Pending PWR requests. */ + u32 pending_reqs; + + /** @reqs_acked: PWR request wait queue. */ + wait_queue_head_t reqs_acked; +}; + +static void panthor_pwr_irq_handler(struct panthor_device *ptdev, u32 status) +{ + spin_lock(&ptdev->pwr->reqs_lock); + gpu_write(ptdev, PWR_INT_CLEAR, status); + + if (unlikely(status & PWR_IRQ_COMMAND_NOT_ALLOWED)) + drm_err(&ptdev->base, "PWR_IRQ: COMMAND_NOT_ALLOWED"); + + if (unlikely(status & PWR_IRQ_COMMAND_INVALID)) + drm_err(&ptdev->base, "PWR_IRQ: COMMAND_INVALID"); + + if (status & ptdev->pwr->pending_reqs) { + ptdev->pwr->pending_reqs &= ~status; + wake_up_all(&ptdev->pwr->reqs_acked); + } + spin_unlock(&ptdev->pwr->reqs_lock); +} +PANTHOR_IRQ_HANDLER(pwr, PWR, panthor_pwr_irq_handler); + +static void panthor_pwr_write_command(struct panthor_device *ptdev, u32 command, u64 args) +{ + if (args) + gpu_write64(ptdev, PWR_CMDARG, args); + + gpu_write(ptdev, PWR_COMMAND, command); +} + +static bool reset_irq_raised(struct panthor_device *ptdev) +{ + return gpu_read(ptdev, PWR_INT_RAWSTAT) & PWR_IRQ_RESET_COMPLETED; +} + +static bool reset_pending(struct panthor_device *ptdev) +{ + return (ptdev->pwr->pending_reqs & PWR_IRQ_RESET_COMPLETED); +} + +static int panthor_pwr_reset(struct panthor_device *ptdev, u32 reset_cmd) +{ + scoped_guard(spinlock_irqsave, &ptdev->pwr->reqs_lock) { + if (reset_pending(ptdev)) { + drm_WARN(&ptdev->base, 1, "Reset already pending"); + } else { + ptdev->pwr->pending_reqs |= PWR_IRQ_RESET_COMPLETED; + gpu_write(ptdev, PWR_INT_CLEAR, PWR_IRQ_RESET_COMPLETED); + panthor_pwr_write_command(ptdev, reset_cmd, 0); + } + } + + if (!wait_event_timeout(ptdev->pwr->reqs_acked, !reset_pending(ptdev), + msecs_to_jiffies(PWR_RESET_TIMEOUT_MS))) { + guard(spinlock_irqsave)(&ptdev->pwr->reqs_lock); + + if (reset_pending(ptdev) && !reset_irq_raised(ptdev)) { + drm_err(&ptdev->base, "RESET timed out (0x%x)", reset_cmd); + return -ETIMEDOUT; + } + + ptdev->pwr->pending_reqs &= ~PWR_IRQ_RESET_COMPLETED; + } + + return 0; +} + +static const char *get_domain_name(u8 domain) +{ + switch (domain) { + case PWR_COMMAND_DOMAIN_L2: + return "L2"; + case PWR_COMMAND_DOMAIN_TILER: + return "Tiler"; + case PWR_COMMAND_DOMAIN_SHADER: + return "Shader"; + case PWR_COMMAND_DOMAIN_BASE: + return "Base"; + case PWR_COMMAND_DOMAIN_STACK: + return "Stack"; + } + return "Unknown"; +} + +static u32 get_domain_base(u8 domain) +{ + switch (domain) { + case PWR_COMMAND_DOMAIN_L2: + return PWR_L2_PRESENT; + case PWR_COMMAND_DOMAIN_TILER: + return PWR_TILER_PRESENT; + case PWR_COMMAND_DOMAIN_SHADER: + return PWR_SHADER_PRESENT; + case PWR_COMMAND_DOMAIN_BASE: + return PWR_BASE_PRESENT; + case PWR_COMMAND_DOMAIN_STACK: + return PWR_STACK_PRESENT; + } + return 0; +} + +static u32 get_domain_ready_reg(u32 domain) +{ + return get_domain_base(domain) + (PWR_L2_READY - PWR_L2_PRESENT); +} + +static u32 get_domain_pwrtrans_reg(u32 domain) +{ + return get_domain_base(domain) + (PWR_L2_PWRTRANS - PWR_L2_PRESENT); +} + +static bool is_valid_domain(u32 domain) +{ + return get_domain_base(domain) != 0; +} + +static bool has_rtu(struct panthor_device *ptdev) +{ + return ptdev->gpu_info.gpu_features & GPU_FEATURES_RAY_TRAVERSAL; +} + +static u8 get_domain_subdomain(struct panthor_device *ptdev, u32 domain) +{ + if (domain == PWR_COMMAND_DOMAIN_SHADER && has_rtu(ptdev)) + return PWR_COMMAND_SUBDOMAIN_RTU; + + return 0; +} + +static int panthor_pwr_domain_wait_transition(struct panthor_device *ptdev, u32 domain, + u32 timeout_us) +{ + u32 pwrtrans_reg = get_domain_pwrtrans_reg(domain); + u64 val; + int ret = 0; + + ret = gpu_read64_poll_timeout(ptdev, pwrtrans_reg, val, !(PWR_ALL_CORES_MASK & val), 100, + timeout_us); + if (ret) { + drm_err(&ptdev->base, "%s domain power in transition, pwrtrans(0x%llx)", + get_domain_name(domain), val); + return ret; + } + + return 0; +} + +static void panthor_pwr_debug_info_show(struct panthor_device *ptdev) +{ + drm_info(&ptdev->base, "GPU_FEATURES: 0x%016llx", gpu_read64(ptdev, GPU_FEATURES)); + drm_info(&ptdev->base, "PWR_STATUS: 0x%016llx", gpu_read64(ptdev, PWR_STATUS)); + drm_info(&ptdev->base, "L2_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_L2_PRESENT)); + drm_info(&ptdev->base, "L2_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_L2_PWRTRANS)); + drm_info(&ptdev->base, "L2_READY: 0x%016llx", gpu_read64(ptdev, PWR_L2_READY)); + drm_info(&ptdev->base, "TILER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PRESENT)); + drm_info(&ptdev->base, "TILER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PWRTRANS)); + drm_info(&ptdev->base, "TILER_READY: 0x%016llx", gpu_read64(ptdev, PWR_TILER_READY)); + drm_info(&ptdev->base, "SHADER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PRESENT)); + drm_info(&ptdev->base, "SHADER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PWRTRANS)); + drm_info(&ptdev->base, "SHADER_READY: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_READY)); +} + +static int panthor_pwr_domain_transition(struct panthor_device *ptdev, u32 cmd, u32 domain, + u64 mask, u32 timeout_us) +{ + u32 ready_reg = get_domain_ready_reg(domain); + u32 pwr_cmd = PWR_COMMAND_DEF(cmd, domain, get_domain_subdomain(ptdev, domain)); + u64 expected_val = 0; + u64 val; + int ret = 0; + + if (drm_WARN_ON(&ptdev->base, !is_valid_domain(domain))) + return -EINVAL; + + switch (cmd) { + case PWR_COMMAND_POWER_DOWN: + expected_val = 0; + break; + case PWR_COMMAND_POWER_UP: + expected_val = mask; + break; + default: + drm_err(&ptdev->base, "Invalid power domain transition command (0x%x)", cmd); + return -EINVAL; + } + + ret = panthor_pwr_domain_wait_transition(ptdev, domain, timeout_us); + if (ret) + return ret; + + /* domain already in target state, return early */ + if ((gpu_read64(ptdev, ready_reg) & mask) == expected_val) + return 0; + + panthor_pwr_write_command(ptdev, pwr_cmd, mask); + + ret = gpu_read64_poll_timeout(ptdev, ready_reg, val, (mask & val) == expected_val, 100, + timeout_us); + if (ret) { + drm_err(&ptdev->base, + "timeout waiting on %s power domain transition, cmd(0x%x), arg(0x%llx)", + get_domain_name(domain), pwr_cmd, mask); + panthor_pwr_debug_info_show(ptdev); + return ret; + } + + return 0; +} + +#define panthor_pwr_domain_power_off(__ptdev, __domain, __mask, __timeout_us) \ + panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_DOWN, __domain, __mask, \ + __timeout_us) + +#define panthor_pwr_domain_power_on(__ptdev, __domain, __mask, __timeout_us) \ + panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_UP, __domain, __mask, __timeout_us) + +/** + * retract_domain() - Retract control of a domain from MCU + * @ptdev: Device. + * @domain: Domain to retract the control + * + * Retracting L2 domain is not expected since it won't be delegated. + * + * Return: 0 on success or retracted already. + * -EPERM if domain is L2. + * A negative error code otherwise. + */ +static int retract_domain(struct panthor_device *ptdev, u32 domain) +{ + const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_RETRACT, domain, 0); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain); + const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain); + u64 val; + int ret; + + if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2)) + return -EPERM; + + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, !(PWR_STATUS_RETRACT_PENDING & val), + 0, PWR_RETRACT_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "%s domain retract pending", get_domain_name(domain)); + return ret; + } + + if (!(pwr_status & delegated_mask)) { + drm_dbg(&ptdev->base, "%s domain already retracted", get_domain_name(domain)); + return 0; + } + + panthor_pwr_write_command(ptdev, pwr_cmd, 0); + + /* + * On successful retraction + * allow-flag will be set with delegated-flag being cleared. + */ + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, + ((delegated_mask | allow_mask) & val) == allow_mask, 10, + PWR_TRANSITION_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "Retracting %s domain timeout, cmd(0x%x)", + get_domain_name(domain), pwr_cmd); + return ret; + } + + return 0; +} + +/** + * delegate_domain() - Delegate control of a domain to MCU + * @ptdev: Device. + * @domain: Domain to delegate the control + * + * Delegating L2 domain is prohibited. + * + * Return: + * * 0 on success or delegated already. + * * -EPERM if domain is L2. + * * A negative error code otherwise. + */ +static int delegate_domain(struct panthor_device *ptdev, u32 domain) +{ + const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_DELEGATE, domain, 0); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain); + const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain); + u64 val; + int ret; + + if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2)) + return -EPERM; + + /* Already delegated, exit early */ + if (pwr_status & delegated_mask) + return 0; + + /* Check if the command is allowed before delegating. */ + if (!(pwr_status & allow_mask)) { + drm_warn(&ptdev->base, "Delegating %s domain not allowed", get_domain_name(domain)); + return -EPERM; + } + + ret = panthor_pwr_domain_wait_transition(ptdev, domain, PWR_TRANSITION_TIMEOUT_US); + if (ret) + return ret; + + panthor_pwr_write_command(ptdev, pwr_cmd, 0); + + /* + * On successful delegation + * allow-flag will be cleared with delegated-flag being set. + */ + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, + ((delegated_mask | allow_mask) & val) == delegated_mask, + 10, PWR_TRANSITION_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "Delegating %s domain timeout, cmd(0x%x)", + get_domain_name(domain), pwr_cmd); + return ret; + } + + return 0; +} + +static int panthor_pwr_delegate_domains(struct panthor_device *ptdev) +{ + int ret; + + if (!ptdev->pwr) + return 0; + + ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER); + if (ret) + return ret; + + ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_TILER); + if (ret) + goto err_retract_shader; + + return 0; + +err_retract_shader: + retract_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER); + + return ret; +} + +/** + * panthor_pwr_domain_force_off - Forcefully power down a domain. + * @ptdev: Device. + * @domain: Domain to forcefully power down. + * + * This function will attempt to retract and power off the requested power + * domain. However, if retraction fails, the operation is aborted. If power off + * fails, the domain will remain retracted and under the host control. + * + * Return: 0 on success or a negative error code on failure. + */ +static int panthor_pwr_domain_force_off(struct panthor_device *ptdev, u32 domain) +{ + const u64 domain_ready = gpu_read64(ptdev, get_domain_ready_reg(domain)); + int ret; + + /* Domain already powered down, early exit. */ + if (!domain_ready) + return 0; + + /* Domain has to be in host control to issue power off command. */ + ret = retract_domain(ptdev, domain); + if (ret) + return ret; + + return panthor_pwr_domain_power_off(ptdev, domain, domain_ready, PWR_TRANSITION_TIMEOUT_US); +} + +void panthor_pwr_unplug(struct panthor_device *ptdev) +{ + unsigned long flags; + + if (!ptdev->pwr) + return; + + /* Make sure the IRQ handler is not running after that point. */ + panthor_pwr_irq_suspend(&ptdev->pwr->irq); + + /* Wake-up all waiters. */ + spin_lock_irqsave(&ptdev->pwr->reqs_lock, flags); + ptdev->pwr->pending_reqs = 0; + wake_up_all(&ptdev->pwr->reqs_acked); + spin_unlock_irqrestore(&ptdev->pwr->reqs_lock, flags); +} + +int panthor_pwr_init(struct panthor_device *ptdev) +{ + struct panthor_pwr *pwr; + int err, irq; + + if (!panthor_hw_has_pwr_ctrl(ptdev)) + return 0; + + pwr = drmm_kzalloc(&ptdev->base, sizeof(*pwr), GFP_KERNEL); + if (!pwr) + return -ENOMEM; + + spin_lock_init(&pwr->reqs_lock); + init_waitqueue_head(&pwr->reqs_acked); + ptdev->pwr = pwr; + + irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); + if (irq < 0) + return irq; + + err = panthor_request_pwr_irq(ptdev, &pwr->irq, irq, PWR_INTERRUPTS_MASK); + if (err) + return err; + + return 0; +} + +int panthor_pwr_reset_soft(struct panthor_device *ptdev) +{ + if (!(gpu_read64(ptdev, PWR_STATUS) & PWR_STATUS_ALLOW_SOFT_RESET)) { + drm_err(&ptdev->base, "RESET_SOFT not allowed"); + return -EOPNOTSUPP; + } + + return panthor_pwr_reset(ptdev, PWR_COMMAND_RESET_SOFT); +} + +void panthor_pwr_l2_power_off(struct panthor_device *ptdev) +{ + const u64 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + + /* Abort if L2 power off constraints are not satisfied */ + if (!(pwr_status & l2_allow_mask)) { + drm_warn(&ptdev->base, "Power off L2 domain not allowed"); + return; + } + + /* It is expected that when halting the MCU, it would power down its + * delegated domains. However, an unresponsive or hung MCU may not do + * so, which is why we need to check and retract the domains back into + * host control to be powered down in the right order before powering + * down the L2. + */ + if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_TILER)) + return; + + if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_SHADER)) + return; + + panthor_pwr_domain_power_off(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present, + PWR_TRANSITION_TIMEOUT_US); +} + +int panthor_pwr_l2_power_on(struct panthor_device *ptdev) +{ + const u32 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u32 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2); + int ret; + + if ((pwr_status & l2_allow_mask) == 0) { + drm_warn(&ptdev->base, "Power on L2 domain not allowed"); + return -EPERM; + } + + ret = panthor_pwr_domain_power_on(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present, + PWR_TRANSITION_TIMEOUT_US); + if (ret) + return ret; + + /* Delegate control of the shader and tiler power domains to the MCU as + * it can better manage which shader/tiler cores need to be powered up + * or can be powered down based on currently running jobs. + * + * If the shader and tiler domains are already delegated to the MCU, + * this call would just return early. + */ + return panthor_pwr_delegate_domains(ptdev); +} + +void panthor_pwr_suspend(struct panthor_device *ptdev) +{ + if (!ptdev->pwr) + return; + + panthor_pwr_irq_suspend(&ptdev->pwr->irq); +} + +void panthor_pwr_resume(struct panthor_device *ptdev) +{ + if (!ptdev->pwr) + return; + + panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK); +} diff --git a/drivers/gpu/drm/panthor/panthor_pwr.h b/drivers/gpu/drm/panthor/panthor_pwr.h new file mode 100644 index 000000000000..adf1f6136abc --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_pwr.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#ifndef __PANTHOR_PWR_H__ +#define __PANTHOR_PWR_H__ + +struct panthor_device; + +void panthor_pwr_unplug(struct panthor_device *ptdev); + +int panthor_pwr_init(struct panthor_device *ptdev); + +int panthor_pwr_reset_soft(struct panthor_device *ptdev); + +void panthor_pwr_l2_power_off(struct panthor_device *ptdev); + +int panthor_pwr_l2_power_on(struct panthor_device *ptdev); + +void panthor_pwr_suspend(struct panthor_device *ptdev); + +void panthor_pwr_resume(struct panthor_device *ptdev); + +#endif /* __PANTHOR_PWR_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index 8bee76d01bf8..08bf06c452d6 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -64,6 +64,8 @@ #define GPU_FAULT_STATUS 0x3C #define GPU_FAULT_ADDR 0x40 +#define GPU_L2_CONFIG 0x48 +#define GPU_L2_CONFIG_ASN_HASH_ENABLE BIT(24) #define GPU_PWR_KEY 0x50 #define GPU_PWR_KEY_UNLOCK 0x2968A819 @@ -72,6 +74,7 @@ #define GPU_FEATURES 0x60 #define GPU_FEATURES_RAY_INTERSECTION BIT(2) +#define GPU_FEATURES_RAY_TRAVERSAL BIT(5) #define GPU_TIMESTAMP_OFFSET 0x88 #define GPU_CYCLE_COUNT 0x90 @@ -110,6 +113,8 @@ #define GPU_REVID 0x280 +#define GPU_ASN_HASH(n) (0x2C0 + ((n) * 4)) + #define GPU_COHERENCY_FEATURES 0x300 #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) @@ -205,4 +210,82 @@ #define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) #define CSF_GLB_DOORBELL_ID 0 +/* PWR Control registers */ + +#define PWR_CONTROL_BASE 0x800 +#define PWR_CTRL_REG(x) (PWR_CONTROL_BASE + (x)) + +#define PWR_INT_RAWSTAT PWR_CTRL_REG(0x0) +#define PWR_INT_CLEAR PWR_CTRL_REG(0x4) +#define PWR_INT_MASK PWR_CTRL_REG(0x8) +#define PWR_INT_STAT PWR_CTRL_REG(0xc) +#define PWR_IRQ_POWER_CHANGED_SINGLE BIT(0) +#define PWR_IRQ_POWER_CHANGED_ALL BIT(1) +#define PWR_IRQ_DELEGATION_CHANGED BIT(2) +#define PWR_IRQ_RESET_COMPLETED BIT(3) +#define PWR_IRQ_RETRACT_COMPLETED BIT(4) +#define PWR_IRQ_INSPECT_COMPLETED BIT(5) +#define PWR_IRQ_COMMAND_NOT_ALLOWED BIT(30) +#define PWR_IRQ_COMMAND_INVALID BIT(31) + +#define PWR_STATUS PWR_CTRL_REG(0x20) +#define PWR_STATUS_ALLOW_L2 BIT_U64(0) +#define PWR_STATUS_ALLOW_TILER BIT_U64(1) +#define PWR_STATUS_ALLOW_SHADER BIT_U64(8) +#define PWR_STATUS_ALLOW_BASE BIT_U64(14) +#define PWR_STATUS_ALLOW_STACK BIT_U64(15) +#define PWR_STATUS_DOMAIN_ALLOWED(x) BIT_U64(x) +#define PWR_STATUS_DELEGATED_L2 BIT_U64(16) +#define PWR_STATUS_DELEGATED_TILER BIT_U64(17) +#define PWR_STATUS_DELEGATED_SHADER BIT_U64(24) +#define PWR_STATUS_DELEGATED_BASE BIT_U64(30) +#define PWR_STATUS_DELEGATED_STACK BIT_U64(31) +#define PWR_STATUS_DELEGATED_SHIFT 16 +#define PWR_STATUS_DOMAIN_DELEGATED(x) BIT_U64((x) + PWR_STATUS_DELEGATED_SHIFT) +#define PWR_STATUS_ALLOW_SOFT_RESET BIT_U64(33) +#define PWR_STATUS_ALLOW_FAST_RESET BIT_U64(34) +#define PWR_STATUS_POWER_PENDING BIT_U64(41) +#define PWR_STATUS_RESET_PENDING BIT_U64(42) +#define PWR_STATUS_RETRACT_PENDING BIT_U64(43) +#define PWR_STATUS_INSPECT_PENDING BIT_U64(44) + +#define PWR_COMMAND PWR_CTRL_REG(0x28) +#define PWR_COMMAND_POWER_UP 0x10 +#define PWR_COMMAND_POWER_DOWN 0x11 +#define PWR_COMMAND_DELEGATE 0x20 +#define PWR_COMMAND_RETRACT 0x21 +#define PWR_COMMAND_RESET_SOFT 0x31 +#define PWR_COMMAND_RESET_FAST 0x32 +#define PWR_COMMAND_INSPECT 0xF0 +#define PWR_COMMAND_DOMAIN_L2 0 +#define PWR_COMMAND_DOMAIN_TILER 1 +#define PWR_COMMAND_DOMAIN_SHADER 8 +#define PWR_COMMAND_DOMAIN_BASE 14 +#define PWR_COMMAND_DOMAIN_STACK 15 +#define PWR_COMMAND_SUBDOMAIN_RTU BIT(0) +#define PWR_COMMAND_DEF(cmd, domain, subdomain) \ + (((subdomain) << 16) | ((domain) << 8) | (cmd)) + +#define PWR_CMDARG PWR_CTRL_REG(0x30) + +#define PWR_L2_PRESENT PWR_CTRL_REG(0x100) +#define PWR_L2_READY PWR_CTRL_REG(0x108) +#define PWR_L2_PWRTRANS PWR_CTRL_REG(0x110) +#define PWR_L2_PWRACTIVE PWR_CTRL_REG(0x118) +#define PWR_TILER_PRESENT PWR_CTRL_REG(0x140) +#define PWR_TILER_READY PWR_CTRL_REG(0x148) +#define PWR_TILER_PWRTRANS PWR_CTRL_REG(0x150) +#define PWR_TILER_PWRACTIVE PWR_CTRL_REG(0x158) +#define PWR_SHADER_PRESENT PWR_CTRL_REG(0x200) +#define PWR_SHADER_READY PWR_CTRL_REG(0x208) +#define PWR_SHADER_PWRTRANS PWR_CTRL_REG(0x210) +#define PWR_SHADER_PWRACTIVE PWR_CTRL_REG(0x218) +#define PWR_BASE_PRESENT PWR_CTRL_REG(0x380) +#define PWR_BASE_READY PWR_CTRL_REG(0x388) +#define PWR_BASE_PWRTRANS PWR_CTRL_REG(0x390) +#define PWR_BASE_PWRACTIVE PWR_CTRL_REG(0x398) +#define PWR_STACK_PRESENT PWR_CTRL_REG(0x3c0) +#define PWR_STACK_READY PWR_CTRL_REG(0x3c8) +#define PWR_STACK_PWRTRANS PWR_CTRL_REG(0x3d0) + #endif diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 3d1f57e3990f..b834123a6560 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -5,6 +5,7 @@ #include <drm/drm_exec.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> @@ -360,17 +361,23 @@ struct panthor_queue { /** @entity: DRM scheduling entity used for this queue. */ struct drm_sched_entity entity; - /** - * @remaining_time: Time remaining before the job timeout expires. - * - * The job timeout is suspended when the queue is not scheduled by the - * FW. Every time we suspend the timer, we need to save the remaining - * time so we can restore it later on. - */ - unsigned long remaining_time; + /** @name: DRM scheduler name for this queue. */ + char *name; - /** @timeout_suspended: True if the job timeout was suspended. */ - bool timeout_suspended; + /** @timeout: Queue timeout related fields. */ + struct { + /** @timeout.work: Work executed when a queue timeout occurs. */ + struct delayed_work work; + + /** + * @timeout.remaining: Time remaining before a queue timeout. + * + * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT. + * When the timer is suspended, it's set to the time remaining when the + * timer was suspended. + */ + unsigned long remaining; + } timeout; /** * @doorbell_id: Doorbell assigned to this queue. @@ -895,11 +902,18 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; - drm_sched_entity_destroy(&queue->entity); + /* This should have been disabled before that point. */ + drm_WARN_ON(&group->ptdev->base, + disable_delayed_work_sync(&queue->timeout.work)); + + if (queue->entity.fence_context) + drm_sched_entity_destroy(&queue->entity); if (queue->scheduler.ops) drm_sched_fini(&queue->scheduler); + kfree(queue->name); + panthor_queue_put_syncwait_obj(queue); panthor_kernel_bo_destroy(queue->ringbuf); @@ -1039,6 +1053,115 @@ group_unbind_locked(struct panthor_group *group) return 0; } +static bool +group_is_idle(struct panthor_group *group) +{ + struct panthor_device *ptdev = group->ptdev; + u32 inactive_queues; + + if (group->csg_id >= 0) + return ptdev->scheduler->csg_slots[group->csg_id].idle; + + inactive_queues = group->idle_queues | group->blocked_queues; + return hweight32(inactive_queues) == group->queue_count; +} + +static void +queue_reset_timeout_locked(struct panthor_queue *queue) +{ + lockdep_assert_held(&queue->fence_ctx.lock); + + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) { + mod_delayed_work(queue->scheduler.timeout_wq, + &queue->timeout.work, + msecs_to_jiffies(JOB_TIMEOUT_MS)); + } +} + +static bool +group_can_run(struct panthor_group *group) +{ + return group->state != PANTHOR_CS_GROUP_TERMINATED && + group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE && + !group->destroyed && group->fatal_queues == 0 && + !group->timedout; +} + +static bool +queue_timeout_is_suspended(struct panthor_queue *queue) +{ + /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */ + return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT; +} + +static void +queue_suspend_timeout_locked(struct panthor_queue *queue) +{ + unsigned long qtimeout, now; + struct panthor_group *group; + struct panthor_job *job; + bool timer_was_active; + + lockdep_assert_held(&queue->fence_ctx.lock); + + /* Already suspended, nothing to do. */ + if (queue_timeout_is_suspended(queue)) + return; + + job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs, + struct panthor_job, node); + group = job ? job->group : NULL; + + /* If the queue is blocked and the group is idle, we want the timer to + * keep running because the group can't be unblocked by other queues, + * so it has to come from an external source, and we want to timebox + * this external signalling. + */ + if (group && group_can_run(group) && + (group->blocked_queues & BIT(job->queue_idx)) && + group_is_idle(group)) + return; + + now = jiffies; + qtimeout = queue->timeout.work.timer.expires; + + /* Cancel the timer. */ + timer_was_active = cancel_delayed_work(&queue->timeout.work); + if (!timer_was_active || !job) + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); + else if (time_after(qtimeout, now)) + queue->timeout.remaining = qtimeout - now; + else + queue->timeout.remaining = 0; + + if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS))) + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); +} + +static void +queue_suspend_timeout(struct panthor_queue *queue) +{ + spin_lock(&queue->fence_ctx.lock); + queue_suspend_timeout_locked(queue); + spin_unlock(&queue->fence_ctx.lock); +} + +static void +queue_resume_timeout(struct panthor_queue *queue) +{ + spin_lock(&queue->fence_ctx.lock); + + if (queue_timeout_is_suspended(queue)) { + mod_delayed_work(queue->scheduler.timeout_wq, + &queue->timeout.work, + queue->timeout.remaining); + + queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT; + } + + spin_unlock(&queue->fence_ctx.lock); +} + /** * cs_slot_prog_locked() - Program a queue slot * @ptdev: Device. @@ -1077,10 +1200,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) CS_IDLE_EMPTY | CS_STATE_MASK | CS_EXTRACT_EVENT); - if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) { - drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time); - queue->timeout_suspended = false; - } + if (queue->iface.input->insert != queue->iface.input->extract) + queue_resume_timeout(queue); } /** @@ -1107,14 +1228,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) CS_STATE_STOP, CS_STATE_MASK); - /* If the queue is blocked, we want to keep the timeout running, so - * we can detect unbounded waits and kill the group when that happens. - */ - if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) { - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); - queue->timeout_suspended = true; - WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS)); - } + queue_suspend_timeout(queue); return 0; } @@ -1133,11 +1247,13 @@ csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id) { struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; struct panthor_fw_csg_iface *csg_iface; + u64 endpoint_req; lockdep_assert_held(&ptdev->scheduler->lock); csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); - csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28; + endpoint_req = panthor_fw_csg_endpoint_req_get(ptdev, csg_iface); + csg_slot->priority = CSG_EP_REQ_PRIORITY_GET(endpoint_req); } /** @@ -1297,6 +1413,7 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) struct panthor_csg_slot *csg_slot; struct panthor_group *group; u32 queue_mask = 0, i; + u64 endpoint_req; lockdep_assert_held(&ptdev->scheduler->lock); @@ -1323,10 +1440,12 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) csg_iface->input->allow_compute = group->compute_core_mask; csg_iface->input->allow_fragment = group->fragment_core_mask; csg_iface->input->allow_other = group->tiler_core_mask; - csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | - CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | - CSG_EP_REQ_TILER(group->max_tiler_cores) | - CSG_EP_REQ_PRIORITY(priority); + endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | + CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | + CSG_EP_REQ_TILER(group->max_tiler_cores) | + CSG_EP_REQ_PRIORITY(priority); + panthor_fw_csg_endpoint_req_set(ptdev, csg_iface, endpoint_req); + csg_iface->input->config = panthor_vm_as(group->vm); if (group->suspend_buf) @@ -1411,7 +1530,7 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev, fault = cs_iface->output->fault; info = cs_iface->output->fault_info; - if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) { + if (queue) { u64 cs_extract = queue->iface.output->extract; struct panthor_job *job; @@ -1909,28 +2028,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched, return ctx->group_count == sched->csg_slot_count; } -static bool -group_is_idle(struct panthor_group *group) -{ - struct panthor_device *ptdev = group->ptdev; - u32 inactive_queues; - - if (group->csg_id >= 0) - return ptdev->scheduler->csg_slots[group->csg_id].idle; - - inactive_queues = group->idle_queues | group->blocked_queues; - return hweight32(inactive_queues) == group->queue_count; -} - -static bool -group_can_run(struct panthor_group *group) -{ - return group->state != PANTHOR_CS_GROUP_TERMINATED && - group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE && - !group->destroyed && group->fatal_queues == 0 && - !group->timedout; -} - static void tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx, @@ -2224,9 +2321,9 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c continue; } - panthor_fw_update_reqs(csg_iface, endpoint_req, - CSG_EP_REQ_PRIORITY(new_csg_prio), - CSG_EP_REQ_PRIORITY_MASK); + panthor_fw_csg_endpoint_req_update(ptdev, csg_iface, + CSG_EP_REQ_PRIORITY(new_csg_prio), + CSG_EP_REQ_PRIORITY_MASK); csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, CSG_ENDPOINT_CONFIG); @@ -2612,6 +2709,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) static void queue_stop(struct panthor_queue *queue, struct panthor_job *bad_job) { + disable_delayed_work_sync(&queue->timeout.work); drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); } @@ -2623,6 +2721,7 @@ static void queue_start(struct panthor_queue *queue) list_for_each_entry(job, &queue->scheduler.pending_list, base.list) job->base.s_fence->parent = dma_fence_get(job->done_fence); + enable_delayed_work(&queue->timeout.work); drm_sched_start(&queue->scheduler, 0); } @@ -2689,7 +2788,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev) { struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_csg_slots_upd_ctx upd_ctx; - struct panthor_group *group; u32 suspended_slots; u32 i; @@ -2743,13 +2841,23 @@ void panthor_sched_suspend(struct panthor_device *ptdev) while (slot_mask) { u32 csg_id = ffs(slot_mask) - 1; struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; + struct panthor_group *group = csg_slot->group; /* Terminate command timedout, but the soft-reset will * automatically terminate all active groups, so let's * force the state to halted here. */ - if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) - csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; + if (group->state != PANTHOR_CS_GROUP_TERMINATED) { + group->state = PANTHOR_CS_GROUP_TERMINATED; + + /* Reset the queue slots manually if the termination + * request failed. + */ + for (i = 0; i < group->queue_count; i++) { + if (group->queues[i]) + cs_slot_reset_locked(ptdev, csg_id, i); + } + } slot_mask &= ~BIT(csg_id); } } @@ -2779,8 +2887,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev) for (i = 0; i < sched->csg_slot_count; i++) { struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; + struct panthor_group *group = csg_slot->group; - group = csg_slot->group; if (!group) continue; @@ -2909,35 +3017,47 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile) xa_unlock(&gpool->xa); } -static void group_sync_upd_work(struct work_struct *work) +static bool queue_check_job_completion(struct panthor_queue *queue) { - struct panthor_group *group = - container_of(work, struct panthor_group, sync_upd_work); + struct panthor_syncobj_64b *syncobj = NULL; struct panthor_job *job, *job_tmp; + bool cookie, progress = false; LIST_HEAD(done_jobs); - u32 queue_idx; - bool cookie; cookie = dma_fence_begin_signalling(); - for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { - struct panthor_queue *queue = group->queues[queue_idx]; - struct panthor_syncobj_64b *syncobj; + spin_lock(&queue->fence_ctx.lock); + list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { + if (!syncobj) { + struct panthor_group *group = job->group; - if (!queue) - continue; + syncobj = group->syncobjs->kmap + + (job->queue_idx * sizeof(*syncobj)); + } - syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj)); + if (syncobj->seqno < job->done_fence->seqno) + break; - spin_lock(&queue->fence_ctx.lock); - list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (syncobj->seqno < job->done_fence->seqno) - break; + list_move_tail(&job->node, &done_jobs); + dma_fence_signal_locked(job->done_fence); + } - list_move_tail(&job->node, &done_jobs); - dma_fence_signal_locked(job->done_fence); - } - spin_unlock(&queue->fence_ctx.lock); + if (list_empty(&queue->fence_ctx.in_flight_jobs)) { + /* If we have no job left, we cancel the timer, and reset remaining + * time to its default so it can be restarted next time + * queue_resume_timeout() is called. + */ + queue_suspend_timeout_locked(queue); + + /* If there's no job pending, we consider it progress to avoid a + * spurious timeout if the timeout handler and the sync update + * handler raced. + */ + progress = true; + } else if (!list_empty(&done_jobs)) { + queue_reset_timeout_locked(queue); + progress = true; } + spin_unlock(&queue->fence_ctx.lock); dma_fence_end_signalling(cookie); list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { @@ -2947,6 +3067,27 @@ static void group_sync_upd_work(struct work_struct *work) panthor_job_put(&job->base); } + return progress; +} + +static void group_sync_upd_work(struct work_struct *work) +{ + struct panthor_group *group = + container_of(work, struct panthor_group, sync_upd_work); + u32 queue_idx; + bool cookie; + + cookie = dma_fence_begin_signalling(); + for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { + struct panthor_queue *queue = group->queues[queue_idx]; + + if (!queue) + continue; + + queue_check_job_completion(queue); + } + dma_fence_end_signalling(cookie); + group_put(group); } @@ -3194,17 +3335,6 @@ queue_run_job(struct drm_sched_job *sched_job) queue->iface.input->insert = job->ringbuf.end; if (group->csg_id < 0) { - /* If the queue is blocked, we want to keep the timeout running, so we - * can detect unbounded waits and kill the group when that happens. - * Otherwise, we suspend the timeout so the time we spend waiting for - * a CSG slot is not counted. - */ - if (!(group->blocked_queues & BIT(job->queue_idx)) && - !queue->timeout_suspended) { - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); - queue->timeout_suspended = true; - } - group_schedule_locked(group, BIT(job->queue_idx)); } else { gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); @@ -3213,6 +3343,7 @@ queue_run_job(struct drm_sched_job *sched_job) pm_runtime_get(ptdev->base.dev); sched->pm.has_ref = true; } + queue_resume_timeout(queue); panthor_devfreq_record_busy(sched->ptdev); } @@ -3262,7 +3393,6 @@ queue_timedout_job(struct drm_sched_job *sched_job) mutex_unlock(&sched->lock); queue_start(queue); - return DRM_GPU_SCHED_STAT_RESET; } @@ -3305,11 +3435,23 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev, return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64)); } +static void queue_timeout_work(struct work_struct *work) +{ + struct panthor_queue *queue = container_of(work, struct panthor_queue, + timeout.work.work); + bool progress; + + progress = queue_check_job_completion(queue); + if (!progress) + drm_sched_fault(&queue->scheduler); +} + static struct panthor_queue * group_create_queue(struct panthor_group *group, - const struct drm_panthor_queue_create *args) + const struct drm_panthor_queue_create *args, + u64 drm_client_id, u32 gid, u32 qid) { - const struct drm_sched_init_args sched_args = { + struct drm_sched_init_args sched_args = { .ops = &panthor_queue_sched_ops, .submit_wq = group->ptdev->scheduler->wq, .num_rqs = 1, @@ -3320,9 +3462,8 @@ group_create_queue(struct panthor_group *group, * their profiling status. */ .credit_limit = args->ringbuf_size / sizeof(u64), - .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .timeout = MAX_SCHEDULE_TIMEOUT, .timeout_wq = group->ptdev->reset.wq, - .name = "panthor-queue", .dev = group->ptdev->base.dev, }; struct drm_gpu_scheduler *drm_sched; @@ -3343,6 +3484,8 @@ group_create_queue(struct panthor_group *group, if (!queue) return ERR_PTR(-ENOMEM); + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); + INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work); queue->fence_ctx.id = dma_fence_context_alloc(1); spin_lock_init(&queue->fence_ctx.lock); INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); @@ -3397,12 +3540,23 @@ group_create_queue(struct panthor_group *group, if (ret) goto err_free_queue; + /* assign a unique name */ + queue->name = kasprintf(GFP_KERNEL, "panthor-queue-%llu-%u-%u", drm_client_id, gid, qid); + if (!queue->name) { + ret = -ENOMEM; + goto err_free_queue; + } + + sched_args.name = queue->name; + ret = drm_sched_init(&queue->scheduler, &sched_args); if (ret) goto err_free_queue; drm_sched = &queue->scheduler; ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL); + if (ret) + goto err_free_queue; return queue; @@ -3446,7 +3600,8 @@ static void add_group_kbo_sizes(struct panthor_device *ptdev, int panthor_group_create(struct panthor_file *pfile, const struct drm_panthor_group_create *group_args, - const struct drm_panthor_queue_create *queue_args) + const struct drm_panthor_queue_create *queue_args, + u64 drm_client_id) { struct panthor_device *ptdev = pfile->ptdev; struct panthor_group_pool *gpool = pfile->groups; @@ -3539,12 +3694,16 @@ int panthor_group_create(struct panthor_file *pfile, memset(group->syncobjs->kmap, 0, group_args->queues.count * sizeof(struct panthor_syncobj_64b)); + ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); + if (ret) + goto err_put_group; + for (i = 0; i < group_args->queues.count; i++) { - group->queues[i] = group_create_queue(group, &queue_args[i]); + group->queues[i] = group_create_queue(group, &queue_args[i], drm_client_id, gid, i); if (IS_ERR(group->queues[i])) { ret = PTR_ERR(group->queues[i]); group->queues[i] = NULL; - goto err_put_group; + goto err_erase_gid; } group->queue_count++; @@ -3552,10 +3711,6 @@ int panthor_group_create(struct panthor_file *pfile, group->idle_queues = GENMASK(group->queue_count - 1, 0); - ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); - if (ret) - goto err_put_group; - mutex_lock(&sched->reset.lock); if (atomic_read(&sched->reset.in_progress)) { panthor_group_stop(group); @@ -3574,6 +3729,9 @@ int panthor_group_create(struct panthor_file *pfile, return gid; +err_erase_gid: + xa_erase(&gpool->xa, gid); + err_put_group: group_put(group); return ret; @@ -3855,7 +4013,9 @@ void panthor_sched_unplug(struct panthor_device *ptdev) { struct panthor_scheduler *sched = ptdev->scheduler; - cancel_delayed_work_sync(&sched->tick_work); + disable_delayed_work_sync(&sched->tick_work); + disable_work_sync(&sched->fw_events_work); + disable_work_sync(&sched->sync_upd_work); mutex_lock(&sched->lock); if (sched->pm.has_ref) { @@ -3873,8 +4033,6 @@ static void panthor_sched_fini(struct drm_device *ddev, void *res) if (!sched || !sched->csg_slot_count) return; - cancel_delayed_work_sync(&sched->tick_work); - if (sched->wq) destroy_workqueue(sched->wq); diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index 742b0b4ff3a3..f4a475aa34c0 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -21,7 +21,8 @@ struct panthor_job; int panthor_group_create(struct panthor_file *pfile, const struct drm_panthor_group_create *group_args, - const struct drm_panthor_queue_create *queue_args); + const struct drm_panthor_queue_create *queue_args, + u64 drm_client_id); int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); int panthor_group_get_state(struct panthor_file *pfile, struct drm_panthor_group_get_state *get_state); |
