summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/panthor
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/panthor')
-rw-r--r--drivers/gpu/drm/panthor/Makefile1
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.c64
-rw-r--r--drivers/gpu/drm/panthor/panthor_devfreq.h2
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.c43
-rw-r--r--drivers/gpu/drm/panthor/panthor_device.h25
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c13
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.c134
-rw-r--r--drivers/gpu/drm/panthor/panthor_fw.h32
-rw-r--r--drivers/gpu/drm/panthor/panthor_gem.c21
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.c38
-rw-r--r--drivers/gpu/drm/panthor/panthor_gpu.h1
-rw-r--r--drivers/gpu/drm/panthor/panthor_heap.c1
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.c109
-rw-r--r--drivers/gpu/drm/panthor/panthor_hw.h47
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c146
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.c549
-rw-r--r--drivers/gpu/drm/panthor/panthor_pwr.h23
-rw-r--r--drivers/gpu/drm/panthor/panthor_regs.h83
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c364
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.h3
20 files changed, 1429 insertions, 270 deletions
diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile
index 02db21748c12..753a32c446df 100644
--- a/drivers/gpu/drm/panthor/Makefile
+++ b/drivers/gpu/drm/panthor/Makefile
@@ -10,6 +10,7 @@ panthor-y := \
panthor_heap.o \
panthor_hw.o \
panthor_mmu.o \
+ panthor_pwr.o \
panthor_sched.o
obj-$(CONFIG_DRM_PANTHOR) += panthor.o
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
index 3686515d368d..2249b41ca4af 100644
--- a/drivers/gpu/drm/panthor/panthor_devfreq.c
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -8,6 +8,7 @@
#include <linux/pm_opp.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "panthor_devfreq.h"
#include "panthor_device.h"
@@ -62,7 +63,6 @@ static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
- struct panthor_device *ptdev = dev_get_drvdata(dev);
struct dev_pm_opp *opp;
int err;
@@ -72,8 +72,6 @@ static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
dev_pm_opp_put(opp);
err = dev_pm_opp_set_rate(dev, *freq);
- if (!err)
- ptdev->current_frequency = *freq;
return err;
}
@@ -115,11 +113,21 @@ static int panthor_devfreq_get_dev_status(struct device *dev,
return 0;
}
+static int panthor_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+ struct panthor_device *ptdev = dev_get_drvdata(dev);
+
+ *freq = clk_get_rate(ptdev->clks.core);
+
+ return 0;
+}
+
static struct devfreq_dev_profile panthor_devfreq_profile = {
.timer = DEVFREQ_TIMER_DELAYED,
.polling_ms = 50, /* ~3 frames */
.target = panthor_devfreq_target,
.get_dev_status = panthor_devfreq_get_dev_status,
+ .get_cur_freq = panthor_devfreq_get_cur_freq,
};
int panthor_devfreq_init(struct panthor_device *ptdev)
@@ -134,6 +142,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
struct thermal_cooling_device *cooling;
struct device *dev = ptdev->base.dev;
struct panthor_devfreq *pdevfreq;
+ struct opp_table *table;
struct dev_pm_opp *opp;
unsigned long cur_freq;
unsigned long freq = ULONG_MAX;
@@ -145,18 +154,30 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
ptdev->devfreq = pdevfreq;
- ret = devm_pm_opp_set_regulators(dev, reg_names);
- if (ret) {
- if (ret != -EPROBE_DEFER)
- DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
-
- return ret;
+ /*
+ * The power domain associated with the GPU may have already added an
+ * OPP table, complete with OPPs, as part of the platform bus
+ * initialization. If this is the case, the power domain is in charge of
+ * also controlling the performance, with a set_performance callback.
+ * Only add a new OPP table from DT if there isn't such a table present
+ * already.
+ */
+ table = dev_pm_opp_get_opp_table(dev);
+ if (IS_ERR_OR_NULL(table)) {
+ ret = devm_pm_opp_set_regulators(dev, reg_names);
+ if (ret && ret != -ENODEV) {
+ if (ret != -EPROBE_DEFER)
+ DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
+ return ret;
+ }
+
+ ret = devm_pm_opp_of_add_table(dev);
+ if (ret)
+ return ret;
+ } else {
+ dev_pm_opp_put_opp_table(table);
}
- ret = devm_pm_opp_of_add_table(dev);
- if (ret)
- return ret;
-
spin_lock_init(&pdevfreq->lock);
panthor_devfreq_reset(pdevfreq);
@@ -198,7 +219,6 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
return PTR_ERR(opp);
panthor_devfreq_profile.initial_freq = cur_freq;
- ptdev->current_frequency = cur_freq;
/*
* Set the recommend OPP this will enable and configure the regulator
@@ -296,3 +316,19 @@ void panthor_devfreq_record_idle(struct panthor_device *ptdev)
spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
}
+
+unsigned long panthor_devfreq_get_freq(struct panthor_device *ptdev)
+{
+ struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+ unsigned long freq = 0;
+ int ret;
+
+ if (!pdevfreq->devfreq)
+ return 0;
+
+ ret = pdevfreq->devfreq->profile->get_cur_freq(ptdev->base.dev, &freq);
+ if (ret)
+ return 0;
+
+ return freq;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h
index b7631de695f7..f8e29e02f66c 100644
--- a/drivers/gpu/drm/panthor/panthor_devfreq.h
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
@@ -18,4 +18,6 @@ void panthor_devfreq_suspend(struct panthor_device *ptdev);
void panthor_devfreq_record_busy(struct panthor_device *ptdev);
void panthor_devfreq_record_idle(struct panthor_device *ptdev);
+unsigned long panthor_devfreq_get_freq(struct panthor_device *ptdev);
+
#endif /* __PANTHOR_DEVFREQ_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index 81df49880bd8..e133b1e0ad6d 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -13,6 +13,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "panthor_devfreq.h"
#include "panthor_device.h"
@@ -20,6 +21,7 @@
#include "panthor_gpu.h"
#include "panthor_hw.h"
#include "panthor_mmu.h"
+#include "panthor_pwr.h"
#include "panthor_regs.h"
#include "panthor_sched.h"
@@ -65,6 +67,16 @@ static int panthor_clk_init(struct panthor_device *ptdev)
return 0;
}
+static int panthor_init_power(struct device *dev)
+{
+ struct dev_pm_domain_list *pd_list = NULL;
+
+ if (dev->pm_domain)
+ return 0;
+
+ return devm_pm_domain_attach_list(dev, NULL, &pd_list);
+}
+
void panthor_device_unplug(struct panthor_device *ptdev)
{
/* This function can be called from two different path: the reset work
@@ -83,6 +95,8 @@ void panthor_device_unplug(struct panthor_device *ptdev)
return;
}
+ drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
+
/* Call drm_dev_unplug() so any access to HW blocks happening after
* that point get rejected.
*/
@@ -93,8 +107,6 @@ void panthor_device_unplug(struct panthor_device *ptdev)
*/
mutex_unlock(&ptdev->unplug.lock);
- drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
-
/* Now, try to cleanly shutdown the GPU before the device resources
* get reclaimed.
*/
@@ -102,6 +114,7 @@ void panthor_device_unplug(struct panthor_device *ptdev)
panthor_fw_unplug(ptdev);
panthor_mmu_unplug(ptdev);
panthor_gpu_unplug(ptdev);
+ panthor_pwr_unplug(ptdev);
pm_runtime_dont_use_autosuspend(ptdev->base.dev);
pm_runtime_put_sync_suspend(ptdev->base.dev);
@@ -120,7 +133,7 @@ static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
{
struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
- cancel_work_sync(&ptdev->reset.work);
+ disable_work_sync(&ptdev->reset.work);
destroy_workqueue(ptdev->reset.wq);
}
@@ -141,8 +154,8 @@ static void panthor_device_reset_work(struct work_struct *work)
panthor_sched_pre_reset(ptdev);
panthor_fw_pre_reset(ptdev, true);
panthor_mmu_pre_reset(ptdev);
- panthor_gpu_soft_reset(ptdev);
- panthor_gpu_l2_power_on(ptdev);
+ panthor_hw_soft_reset(ptdev);
+ panthor_hw_l2_power_on(ptdev);
panthor_mmu_post_reset(ptdev);
ret = panthor_fw_post_reset(ptdev);
atomic_set(&ptdev->reset.pending, 0);
@@ -172,6 +185,8 @@ int panthor_device_init(struct panthor_device *ptdev)
struct page *p;
int ret;
+ ptdev->soc_data = of_device_get_match_data(ptdev->base.dev);
+
init_completion(&ptdev->unplug.done);
ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock);
if (ret)
@@ -219,6 +234,12 @@ int panthor_device_init(struct panthor_device *ptdev)
if (ret)
return ret;
+ ret = panthor_init_power(ptdev->base.dev);
+ if (ret < 0) {
+ drm_err(&ptdev->base, "init power domains failed, ret=%d", ret);
+ return ret;
+ }
+
ret = panthor_devfreq_init(ptdev);
if (ret)
return ret;
@@ -249,10 +270,14 @@ int panthor_device_init(struct panthor_device *ptdev)
if (ret)
goto err_rpm_put;
- ret = panthor_gpu_init(ptdev);
+ ret = panthor_pwr_init(ptdev);
if (ret)
goto err_rpm_put;
+ ret = panthor_gpu_init(ptdev);
+ if (ret)
+ goto err_unplug_pwr;
+
ret = panthor_gpu_coherency_init(ptdev);
if (ret)
goto err_unplug_gpu;
@@ -293,6 +318,9 @@ err_unplug_mmu:
err_unplug_gpu:
panthor_gpu_unplug(ptdev);
+err_unplug_pwr:
+ panthor_pwr_unplug(ptdev);
+
err_rpm_put:
pm_runtime_put_sync_suspend(ptdev->base.dev);
return ret;
@@ -446,6 +474,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev)
{
int ret;
+ panthor_pwr_resume(ptdev);
panthor_gpu_resume(ptdev);
panthor_mmu_resume(ptdev);
@@ -455,6 +484,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev)
panthor_mmu_suspend(ptdev);
panthor_gpu_suspend(ptdev);
+ panthor_pwr_suspend(ptdev);
return ret;
}
@@ -568,6 +598,7 @@ int panthor_device_suspend(struct device *dev)
panthor_fw_suspend(ptdev);
panthor_mmu_suspend(ptdev);
panthor_gpu_suspend(ptdev);
+ panthor_pwr_suspend(ptdev);
drm_dev_exit(cookie);
}
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 4fc7cf2aeed5..f35e52b9546a 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -24,14 +24,27 @@ struct panthor_device;
struct panthor_gpu;
struct panthor_group_pool;
struct panthor_heap_pool;
+struct panthor_hw;
struct panthor_job;
struct panthor_mmu;
struct panthor_fw;
struct panthor_perfcnt;
+struct panthor_pwr;
struct panthor_vm;
struct panthor_vm_pool;
/**
+ * struct panthor_soc_data - Panthor SoC Data
+ */
+struct panthor_soc_data {
+ /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */
+ bool asn_hash_enable;
+
+ /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */
+ u32 asn_hash[3];
+};
+
+/**
* enum panthor_device_pm_state - PM state
*/
enum panthor_device_pm_state {
@@ -93,6 +106,9 @@ struct panthor_device {
/** @base: Base drm_device. */
struct drm_device base;
+ /** @soc_data: Optional SoC data. */
+ const struct panthor_soc_data *soc_data;
+
/** @phys_addr: Physical address of the iomem region. */
phys_addr_t phys_addr;
@@ -120,6 +136,12 @@ struct panthor_device {
/** @csif_info: Command stream interface information. */
struct drm_panthor_csif_info csif_info;
+ /** @hw: GPU-specific data. */
+ struct panthor_hw *hw;
+
+ /** @pwr: Power control management data. */
+ struct panthor_pwr *pwr;
+
/** @gpu: GPU management data. */
struct panthor_gpu *gpu;
@@ -200,9 +222,6 @@ struct panthor_device {
/** @profile_mask: User-set profiling flags for job accounting. */
u32 profile_mask;
- /** @current_frequency: Device clock frequency at present. Set by DVFS*/
- unsigned long current_frequency;
-
/** @fast_rate: Maximum device clock frequency. Set by DVFS */
unsigned long fast_rate;
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 4c202fc5ce05..d1d4c50da5bf 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -20,11 +20,13 @@
#include <drm/drm_drv.h>
#include <drm/drm_exec.h>
#include <drm/drm_ioctl.h>
+#include <drm/drm_print.h>
#include <drm/drm_syncobj.h>
#include <drm/drm_utils.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
+#include "panthor_devfreq.h"
#include "panthor_device.h"
#include "panthor_fw.h"
#include "panthor_gem.h"
@@ -1105,7 +1107,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
if (ret)
goto out;
- ret = panthor_group_create(pfile, args, queue_args);
+ ret = panthor_group_create(pfile, args, queue_args, file->client_id);
if (ret < 0)
goto out;
args->group_handle = ret;
@@ -1519,7 +1521,8 @@ static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);
drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
- drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
+ drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n",
+ panthor_devfreq_get_freq(ptdev));
}
static void panthor_show_internal_memory_stats(struct drm_printer *p, struct drm_file *file)
@@ -1682,7 +1685,13 @@ static struct attribute *panthor_attrs[] = {
ATTRIBUTE_GROUPS(panthor);
+static const struct panthor_soc_data soc_data_mediatek_mt8196 = {
+ .asn_hash_enable = true,
+ .asn_hash = { 0xb, 0xe, 0x0, },
+};
+
static const struct of_device_id dt_match[] = {
+ { .compatible = "mediatek,mt8196-mali", .data = &soc_data_mediatek_mt8196, },
{ .compatible = "rockchip,rk3588-mali" },
{ .compatible = "arm,mali-valhall-csf" },
{}
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index df767e82148a..1a5e3c1a27fb 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -16,11 +16,13 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "panthor_device.h"
#include "panthor_fw.h"
#include "panthor_gem.h"
#include "panthor_gpu.h"
+#include "panthor_hw.h"
#include "panthor_mmu.h"
#include "panthor_regs.h"
#include "panthor_sched.h"
@@ -32,6 +34,7 @@
#define PROGRESS_TIMEOUT_SCALE_SHIFT 10
#define IDLE_HYSTERESIS_US 800
#define PWROFF_HYSTERESIS_US 10000
+#define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC)
/**
* struct panthor_fw_binary_hdr - Firmware binary header.
@@ -316,6 +319,49 @@ panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
return &ptdev->fw->iface.streams[csg_slot][cs_slot];
}
+static bool panthor_fw_has_glb_state(struct panthor_device *ptdev)
+{
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+ return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0);
+}
+
+static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev)
+{
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+ return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0);
+}
+
+u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface)
+{
+ if (panthor_fw_has_64bit_ep_req(ptdev))
+ return csg_iface->input->endpoint_req2;
+ else
+ return csg_iface->input->endpoint_req;
+}
+
+void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface, u64 value)
+{
+ if (panthor_fw_has_64bit_ep_req(ptdev))
+ csg_iface->input->endpoint_req2 = value;
+ else
+ csg_iface->input->endpoint_req = lower_32_bits(value);
+}
+
+void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface, u64 value,
+ u64 mask)
+{
+ if (panthor_fw_has_64bit_ep_req(ptdev))
+ panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask);
+ else
+ panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value),
+ lower_32_bits(mask));
+}
+
/**
* panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
* @ptdev: Device.
@@ -995,6 +1041,9 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
GLB_IDLE_EN |
GLB_IDLE;
+ if (panthor_fw_has_glb_state(ptdev))
+ glb_iface->input->ack_irq_mask |= GLB_STATE_MASK;
+
panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
panthor_fw_toggle_reqs(glb_iface, req, ack,
GLB_CFG_ALLOC_EN |
@@ -1068,6 +1117,54 @@ static void panthor_fw_stop(struct panthor_device *ptdev)
drm_err(&ptdev->base, "Failed to stop MCU");
}
+static bool panthor_fw_mcu_halted(struct panthor_device *ptdev)
+{
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+ bool halted;
+
+ halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT;
+
+ if (panthor_fw_has_glb_state(ptdev))
+ halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT);
+
+ return halted;
+}
+
+static void panthor_fw_halt_mcu(struct panthor_device *ptdev)
+{
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+ if (panthor_fw_has_glb_state(ptdev))
+ panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK);
+ else
+ panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
+
+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+}
+
+static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev)
+{
+ bool halted = false;
+
+ if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10,
+ MCU_HALT_TIMEOUT_US, 0, ptdev)) {
+ drm_warn(&ptdev->base, "Timed out waiting for MCU to halt");
+ return false;
+ }
+
+ return true;
+}
+
+static void panthor_fw_mcu_set_active(struct panthor_device *ptdev)
+{
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+ if (panthor_fw_has_glb_state(ptdev))
+ panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK);
+ else
+ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
+}
+
/**
* panthor_fw_pre_reset() - Call before a reset.
* @ptdev: Device.
@@ -1084,19 +1181,13 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
ptdev->reset.fast = false;
if (!on_hang) {
- struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
- u32 status;
-
- panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
- gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
- if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
- status == MCU_STATUS_HALT, 10,
- 100000)) {
- ptdev->reset.fast = true;
- } else {
+ panthor_fw_halt_mcu(ptdev);
+ if (!panthor_fw_wait_mcu_halted(ptdev))
drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
- }
+ else
+ ptdev->reset.fast = true;
}
+ panthor_fw_stop(ptdev);
panthor_job_irq_suspend(&ptdev->fw->irq);
panthor_fw_stop(ptdev);
@@ -1125,14 +1216,14 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
*/
panthor_reload_fw_sections(ptdev, true);
} else {
- /* The FW detects 0 -> 1 transitions. Make sure we reset
- * the HALT bit before the FW is rebooted.
+ /*
+ * If the FW was previously successfully halted in the pre-reset
+ * operation, we need to transition it to active again before
+ * the FW is rebooted.
* This is not needed on a slow reset because FW sections are
* re-initialized.
*/
- struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
-
- panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
+ panthor_fw_mcu_set_active(ptdev);
}
ret = panthor_fw_start(ptdev);
@@ -1163,13 +1254,17 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
{
struct panthor_fw_section *section;
- cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
+ disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
/* Make sure the IRQ handler cannot be called after that point. */
if (ptdev->fw->irq.irq)
panthor_job_irq_suspend(&ptdev->fw->irq);
+ panthor_fw_halt_mcu(ptdev);
+ if (!panthor_fw_wait_mcu_halted(ptdev))
+ drm_warn(&ptdev->base, "Failed to halt MCU on unplug");
+
panthor_fw_stop(ptdev);
}
@@ -1185,7 +1280,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
ptdev->fw->vm = NULL;
if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
- panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
+ panthor_hw_l2_power_off(ptdev);
}
/**
@@ -1364,7 +1459,7 @@ int panthor_fw_init(struct panthor_device *ptdev)
return ret;
}
- ret = panthor_gpu_l2_power_on(ptdev);
+ ret = panthor_hw_l2_power_on(ptdev);
if (ret)
return ret;
@@ -1408,3 +1503,4 @@ MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin");
MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin");
MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin");
MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin");
+MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin");
diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h
index 6598d96c6d2a..fbdc21469ba3 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.h
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
@@ -167,10 +167,11 @@ struct panthor_fw_csg_input_iface {
#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16))
#define CSG_EP_REQ_EXCL_COMPUTE BIT(20)
#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21)
-#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28))
#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28)
+#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK)
+#define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28)
u32 endpoint_req;
- u32 reserved2[2];
+ u64 endpoint_req2;
u64 suspend_buf;
u64 protm_suspend_buf;
u32 config;
@@ -214,6 +215,13 @@ struct panthor_fw_global_input_iface {
#define GLB_FWCFG_UPDATE BIT(9)
#define GLB_IDLE_EN BIT(10)
#define GLB_SLEEP BIT(12)
+#define GLB_STATE_MASK GENMASK(14, 12)
+#define GLB_STATE_ACTIVE 0
+#define GLB_STATE_HALT 1
+#define GLB_STATE_SLEEP 2
+#define GLB_STATE_SUSPEND 3
+#define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK)
+#define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12)
#define GLB_INACTIVE_COMPUTE BIT(20)
#define GLB_INACTIVE_FRAGMENT BIT(21)
#define GLB_INACTIVE_TILER BIT(22)
@@ -457,6 +465,16 @@ struct panthor_fw_global_iface {
spin_unlock(&(__iface)->lock); \
} while (0)
+#define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \
+ do { \
+ u64 __cur_val, __new_val; \
+ spin_lock(&(__iface)->lock); \
+ __cur_val = READ_ONCE((__iface)->input->__in_reg); \
+ __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
+ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
+ spin_unlock(&(__iface)->lock); \
+ } while (0)
+
struct panthor_fw_global_iface *
panthor_fw_get_glb_iface(struct panthor_device *ptdev);
@@ -466,6 +484,16 @@ panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
struct panthor_fw_cs_iface *
panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
+u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface);
+
+void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface, u64 value);
+
+void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
+ struct panthor_fw_csg_iface *csg_iface, u64 value,
+ u64 mask);
+
int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
u32 *acked, u32 timeout_ms);
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index 3f43686f0195..fbde78db270a 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -8,6 +8,7 @@
#include <linux/err.h>
#include <linux/slab.h>
+#include <drm/drm_print.h>
#include <drm/panthor_drm.h>
#include "panthor_device.h"
@@ -86,7 +87,6 @@ static void panthor_gem_free_object(struct drm_gem_object *obj)
void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
{
struct panthor_vm *vm;
- int ret;
if (IS_ERR_OR_NULL(bo))
return;
@@ -94,18 +94,11 @@ void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
vm = bo->vm;
panthor_kernel_bo_vunmap(bo);
- if (drm_WARN_ON(bo->obj->dev,
- to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
- goto out_free_bo;
-
- ret = panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size);
- if (ret)
- goto out_free_bo;
-
+ drm_WARN_ON(bo->obj->dev,
+ to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm));
+ panthor_vm_unmap_range(vm, bo->va_node.start, bo->va_node.size);
panthor_vm_free_va(vm, &bo->va_node);
drm_gem_object_put(bo->obj);
-
-out_free_bo:
panthor_vm_put(vm);
kfree(bo);
}
@@ -152,6 +145,9 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
bo = to_panthor_bo(&obj->base);
kbo->obj = &obj->base;
bo->flags = bo_flags;
+ bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
+ drm_gem_object_get(bo->exclusive_vm_root_gem);
+ bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
if (vm == panthor_fw_vm(ptdev))
debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED;
@@ -175,9 +171,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
goto err_free_va;
kbo->vm = panthor_vm_get(vm);
- bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
- drm_gem_object_get(bo->exclusive_vm_root_gem);
- bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
return kbo;
err_free_va:
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index db69449a5be0..06b231b2460a 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -15,9 +15,11 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "panthor_device.h"
#include "panthor_gpu.h"
+#include "panthor_hw.h"
#include "panthor_regs.h"
/**
@@ -52,6 +54,28 @@ static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
}
+static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
+{
+ const struct panthor_soc_data *data = ptdev->soc_data;
+ u32 l2_config;
+ u32 i;
+
+ if (!data || !data->asn_hash_enable)
+ return;
+
+ if (GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) < 11) {
+ drm_err(&ptdev->base, "Custom ASN hash not supported by the device");
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(data->asn_hash); i++)
+ gpu_write(ptdev, GPU_ASN_HASH(i), data->asn_hash[i]);
+
+ l2_config = gpu_read(ptdev, GPU_L2_CONFIG);
+ l2_config |= GPU_L2_CONFIG_ASN_HASH_ENABLE;
+ gpu_write(ptdev, GPU_L2_CONFIG, l2_config);
+}
+
static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
{
gpu_write(ptdev, GPU_INT_CLEAR, status);
@@ -218,6 +242,11 @@ int panthor_gpu_block_power_on(struct panthor_device *ptdev,
return 0;
}
+void panthor_gpu_l2_power_off(struct panthor_device *ptdev)
+{
+ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
+}
+
/**
* panthor_gpu_l2_power_on() - Power-on the L2-cache
* @ptdev: Device.
@@ -241,8 +270,9 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
hweight64(ptdev->gpu_info.shader_present));
}
- /* Set the desired coherency mode before the power up of L2 */
+ /* Set the desired coherency mode and L2 config before the power up of L2 */
panthor_gpu_coherency_set(ptdev);
+ panthor_gpu_l2_config_set(ptdev);
return panthor_gpu_power_on(ptdev, L2, 1, 20000);
}
@@ -344,9 +374,9 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
{
/* On a fast reset, simply power down the L2. */
if (!ptdev->reset.fast)
- panthor_gpu_soft_reset(ptdev);
+ panthor_hw_soft_reset(ptdev);
else
- panthor_gpu_power_off(ptdev, L2, 1, 20000);
+ panthor_hw_l2_power_off(ptdev);
panthor_gpu_irq_suspend(&ptdev->gpu->irq);
}
@@ -361,6 +391,6 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
void panthor_gpu_resume(struct panthor_device *ptdev)
{
panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
- panthor_gpu_l2_power_on(ptdev);
+ panthor_hw_l2_power_on(ptdev);
}
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h
index 7c17a8c06858..12e66f48ced1 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.h
+++ b/drivers/gpu/drm/panthor/panthor_gpu.h
@@ -46,6 +46,7 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev,
type ## _PWRTRANS, \
mask, timeout_us)
+void panthor_gpu_l2_power_off(struct panthor_device *ptdev);
int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
int panthor_gpu_flush_caches(struct panthor_device *ptdev,
u32 l2, u32 lsc, u32 other);
diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index d236e9ceade4..0b6ff4c0a11b 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -4,6 +4,7 @@
#include <linux/iosys-map.h>
#include <linux/rwsem.h>
+#include <drm/drm_print.h>
#include <drm/panthor_drm.h>
#include "panthor_device.h"
diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c
index 4f2858114e5e..87ebb7ae42c4 100644
--- a/drivers/gpu/drm/panthor/panthor_hw.c
+++ b/drivers/gpu/drm/panthor/panthor_hw.c
@@ -1,13 +1,58 @@
// SPDX-License-Identifier: GPL-2.0 or MIT
/* Copyright 2025 ARM Limited. All rights reserved. */
+#include <drm/drm_print.h>
+
#include "panthor_device.h"
+#include "panthor_gpu.h"
#include "panthor_hw.h"
+#include "panthor_pwr.h"
#include "panthor_regs.h"
#define GPU_PROD_ID_MAKE(arch_major, prod_major) \
(((arch_major) << 24) | (prod_major))
+/** struct panthor_hw_entry - HW arch major to panthor_hw binding entry */
+struct panthor_hw_entry {
+ /** @arch_min: Minimum supported architecture major value (inclusive) */
+ u8 arch_min;
+
+ /** @arch_max: Maximum supported architecture major value (inclusive) */
+ u8 arch_max;
+
+ /** @hwdev: Pointer to panthor_hw structure */
+ struct panthor_hw *hwdev;
+};
+
+static struct panthor_hw panthor_hw_arch_v10 = {
+ .ops = {
+ .soft_reset = panthor_gpu_soft_reset,
+ .l2_power_off = panthor_gpu_l2_power_off,
+ .l2_power_on = panthor_gpu_l2_power_on,
+ },
+};
+
+static struct panthor_hw panthor_hw_arch_v14 = {
+ .ops = {
+ .soft_reset = panthor_pwr_reset_soft,
+ .l2_power_off = panthor_pwr_l2_power_off,
+ .l2_power_on = panthor_pwr_l2_power_on,
+ },
+};
+
+static struct panthor_hw_entry panthor_hw_match[] = {
+ {
+ .arch_min = 10,
+ .arch_max = 13,
+ .hwdev = &panthor_hw_arch_v10,
+ },
+ {
+ .arch_min = 14,
+ .arch_max = 14,
+ .hwdev = &panthor_hw_arch_v14,
+ },
+};
+
static char *get_gpu_model_name(struct panthor_device *ptdev)
{
const u32 gpu_id = ptdev->gpu_info.gpu_id;
@@ -53,6 +98,12 @@ static char *get_gpu_model_name(struct panthor_device *ptdev)
fallthrough;
case GPU_PROD_ID_MAKE(13, 1):
return "Mali-G625";
+ case GPU_PROD_ID_MAKE(14, 0):
+ return "Mali-G1-Ultra";
+ case GPU_PROD_ID_MAKE(14, 1):
+ return "Mali-G1-Premium";
+ case GPU_PROD_ID_MAKE(14, 3):
+ return "Mali-G1-Pro";
}
return "(Unknown Mali GPU)";
@@ -62,7 +113,6 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev)
{
unsigned int i;
- ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID);
ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID);
ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES);
@@ -80,12 +130,19 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev)
ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT);
- ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT);
- ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT);
- ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT);
-
/* Introduced in arch 11.x */
ptdev->gpu_info.gpu_features = gpu_read64(ptdev, GPU_FEATURES);
+
+ if (panthor_hw_has_pwr_ctrl(ptdev)) {
+ /* Introduced in arch 14.x */
+ ptdev->gpu_info.l2_present = gpu_read64(ptdev, PWR_L2_PRESENT);
+ ptdev->gpu_info.tiler_present = gpu_read64(ptdev, PWR_TILER_PRESENT);
+ ptdev->gpu_info.shader_present = gpu_read64(ptdev, PWR_SHADER_PRESENT);
+ } else {
+ ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT);
+ ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT);
+ ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT);
+ }
}
static void panthor_hw_info_init(struct panthor_device *ptdev)
@@ -117,8 +174,50 @@ static void panthor_hw_info_init(struct panthor_device *ptdev)
ptdev->gpu_info.tiler_present);
}
+static int panthor_hw_bind_device(struct panthor_device *ptdev)
+{
+ struct panthor_hw *hdev = NULL;
+ const u32 arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id);
+ int i = 0;
+
+ for (i = 0; i < ARRAY_SIZE(panthor_hw_match); i++) {
+ struct panthor_hw_entry *entry = &panthor_hw_match[i];
+
+ if (arch_major >= entry->arch_min && arch_major <= entry->arch_max) {
+ hdev = entry->hwdev;
+ break;
+ }
+ }
+
+ if (!hdev)
+ return -EOPNOTSUPP;
+
+ ptdev->hw = hdev;
+
+ return 0;
+}
+
+static int panthor_hw_gpu_id_init(struct panthor_device *ptdev)
+{
+ ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
+ if (!ptdev->gpu_info.gpu_id)
+ return -ENXIO;
+
+ return 0;
+}
+
int panthor_hw_init(struct panthor_device *ptdev)
{
+ int ret = 0;
+
+ ret = panthor_hw_gpu_id_init(ptdev);
+ if (ret)
+ return ret;
+
+ ret = panthor_hw_bind_device(ptdev);
+ if (ret)
+ return ret;
+
panthor_hw_info_init(ptdev);
return 0;
diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h
index 0af6acc6aa6a..56c68c1e9c26 100644
--- a/drivers/gpu/drm/panthor/panthor_hw.h
+++ b/drivers/gpu/drm/panthor/panthor_hw.h
@@ -4,8 +4,53 @@
#ifndef __PANTHOR_HW_H__
#define __PANTHOR_HW_H__
-struct panthor_device;
+#include "panthor_device.h"
+#include "panthor_regs.h"
+
+/**
+ * struct panthor_hw_ops - HW operations that are specific to a GPU
+ */
+struct panthor_hw_ops {
+ /** @soft_reset: Soft reset function pointer */
+ int (*soft_reset)(struct panthor_device *ptdev);
+
+ /** @l2_power_off: L2 power off function pointer */
+ void (*l2_power_off)(struct panthor_device *ptdev);
+
+ /** @l2_power_on: L2 power on function pointer */
+ int (*l2_power_on)(struct panthor_device *ptdev);
+};
+
+/**
+ * struct panthor_hw - GPU specific register mapping and functions
+ */
+struct panthor_hw {
+ /** @features: Bitmap containing panthor_hw_feature */
+
+ /** @ops: Panthor HW specific operations */
+ struct panthor_hw_ops ops;
+};
int panthor_hw_init(struct panthor_device *ptdev);
+static inline int panthor_hw_soft_reset(struct panthor_device *ptdev)
+{
+ return ptdev->hw->ops.soft_reset(ptdev);
+}
+
+static inline int panthor_hw_l2_power_on(struct panthor_device *ptdev)
+{
+ return ptdev->hw->ops.l2_power_on(ptdev);
+}
+
+static inline void panthor_hw_l2_power_off(struct panthor_device *ptdev)
+{
+ ptdev->hw->ops.l2_power_off(ptdev);
+}
+
+static inline bool panthor_hw_has_pwr_ctrl(struct panthor_device *ptdev)
+{
+ return GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) >= 14;
+}
+
#endif /* __PANTHOR_HW_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 7870e7dbaa5d..d4839d282689 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -7,6 +7,7 @@
#include <drm/drm_exec.h>
#include <drm/drm_gpuvm.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
@@ -181,20 +182,6 @@ struct panthor_vm_op_ctx {
u64 range;
} va;
- /**
- * @returned_vmas: List of panthor_vma objects returned after a VM operation.
- *
- * For unmap operations, this will contain all VMAs that were covered by the
- * specified VA range.
- *
- * For map operations, this will contain all VMAs that previously mapped to
- * the specified VA range.
- *
- * Those VMAs, and the resources they point to will be released as part of
- * the op_ctx cleanup operation.
- */
- struct list_head returned_vmas;
-
/** @map: Fields specific to a map operation. */
struct {
/** @map.vm_bo: Buffer object to map. */
@@ -917,10 +904,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
{
struct panthor_device *ptdev = vm->ptdev;
struct io_pgtable_ops *ops = vm->pgtbl_ops;
+ u64 start_iova = iova;
u64 offset = 0;
- drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
-
while (offset < size) {
size_t unmapped_sz = 0, pgcount;
size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
@@ -935,6 +921,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
return -EINVAL;
}
+
+ drm_dbg(&ptdev->base,
+ "unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu",
+ vm->as.id, start_iova, size, iova + offset,
+ unmapped_sz / pgsize, pgsize);
+
offset += unmapped_sz;
}
@@ -950,6 +942,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
struct scatterlist *sgl;
struct io_pgtable_ops *ops = vm->pgtbl_ops;
u64 start_iova = iova;
+ u64 start_size = size;
int ret;
if (!size)
@@ -969,15 +962,18 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
len = min_t(size_t, len, size);
size -= len;
- drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
- vm->as.id, iova, &paddr, len);
-
while (len) {
size_t pgcount, mapped = 0;
size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
GFP_KERNEL, &mapped);
+
+ drm_dbg(&ptdev->base,
+ "map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu",
+ vm->as.id, start_iova, start_size, iova, &paddr,
+ mapped / pgsize, pgsize);
+
iova += mapped;
paddr += mapped;
len -= mapped;
@@ -1081,47 +1077,18 @@ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
mutex_unlock(&vm->mm_lock);
}
-static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
+static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo)
{
struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
- struct drm_gpuvm *vm = vm_bo->vm;
- bool unpin;
- /* We must retain the GEM before calling drm_gpuvm_bo_put(),
- * otherwise the mutex might be destroyed while we hold it.
- * Same goes for the VM, since we take the VM resv lock.
- */
- drm_gem_object_get(&bo->base.base);
- drm_gpuvm_get(vm);
-
- /* We take the resv lock to protect against concurrent accesses to the
- * gpuvm evicted/extobj lists that are modified in
- * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
- * releases sthe last vm_bo reference.
- * We take the BO GPUVA list lock to protect the vm_bo removal from the
- * GEM vm_bo list.
- */
- dma_resv_lock(drm_gpuvm_resv(vm), NULL);
- mutex_lock(&bo->base.base.gpuva.lock);
- unpin = drm_gpuvm_bo_put(vm_bo);
- mutex_unlock(&bo->base.base.gpuva.lock);
- dma_resv_unlock(drm_gpuvm_resv(vm));
-
- /* If the vm_bo object was destroyed, release the pin reference that
- * was hold by this object.
- */
- if (unpin && !drm_gem_is_imported(&bo->base.base))
+ if (!drm_gem_is_imported(&bo->base.base))
drm_gem_shmem_unpin(&bo->base);
-
- drm_gpuvm_put(vm);
- drm_gem_object_put(&bo->base.base);
+ kfree(vm_bo);
}
static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
struct panthor_vm *vm)
{
- struct panthor_vma *vma, *tmp_vma;
-
u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
op_ctx->rsvd_page_tables.ptr;
@@ -1134,16 +1101,26 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
kfree(op_ctx->rsvd_page_tables.pages);
if (op_ctx->map.vm_bo)
- panthor_vm_bo_put(op_ctx->map.vm_bo);
+ drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo);
for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
kfree(op_ctx->preallocated_vmas[i]);
- list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
- list_del(&vma->node);
- panthor_vm_bo_put(vma->base.vm_bo);
- kfree(vma);
+ drm_gpuvm_bo_deferred_cleanup(&vm->base);
+}
+
+static void
+panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx,
+ struct panthor_vma *vma)
+{
+ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
+ if (!op_ctx->preallocated_vmas[i]) {
+ op_ctx->preallocated_vmas[i] = vma;
+ return;
+ }
}
+
+ WARN_ON_ONCE(1);
}
static struct panthor_vma *
@@ -1236,7 +1213,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
return -EINVAL;
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->flags = flags;
op_ctx->va.range = size;
op_ctx->va.addr = va;
@@ -1247,7 +1223,9 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
if (!drm_gem_is_imported(&bo->base.base)) {
/* Pre-reserve the BO pages, so the map operation doesn't have to
- * allocate.
+ * allocate. This pin is dropped in panthor_vm_bo_free(), so
+ * once we have successfully called drm_gpuvm_bo_create(),
+ * GPUVM will take care of dropping the pin for us.
*/
ret = drm_gem_shmem_pin(&bo->base);
if (ret)
@@ -1286,16 +1264,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
mutex_unlock(&bo->base.base.gpuva.lock);
dma_resv_unlock(panthor_vm_resv(vm));
- /* If the a vm_bo for this <VM,BO> combination exists, it already
- * retains a pin ref, and we can release the one we took earlier.
- *
- * If our pre-allocated vm_bo is picked, it now retains the pin ref,
- * which will be released in panthor_vm_bo_put().
- */
- if (preallocated_vm_bo != op_ctx->map.vm_bo &&
- !drm_gem_is_imported(&bo->base.base))
- drm_gem_shmem_unpin(&bo->base);
-
op_ctx->map.bo_offset = offset;
/* L1, L2 and L3 page tables.
@@ -1343,7 +1311,6 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
int ret;
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->va.range = size;
op_ctx->va.addr = va;
op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
@@ -1391,7 +1358,6 @@ static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx
struct panthor_vm *vm)
{
memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->returned_vmas);
op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
}
@@ -2037,26 +2003,13 @@ static void panthor_vma_link(struct panthor_vm *vm,
mutex_lock(&bo->base.base.gpuva.lock);
drm_gpuva_link(&vma->base, vm_bo);
- drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
mutex_unlock(&bo->base.base.gpuva.lock);
}
-static void panthor_vma_unlink(struct panthor_vm *vm,
- struct panthor_vma *vma)
+static void panthor_vma_unlink(struct panthor_vma *vma)
{
- struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
- struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
-
- mutex_lock(&bo->base.base.gpuva.lock);
- drm_gpuva_unlink(&vma->base);
- mutex_unlock(&bo->base.base.gpuva.lock);
-
- /* drm_gpuva_unlink() release the vm_bo, but we manually retained it
- * when entering this function, so we can implement deferred VMA
- * destruction. Re-assign it here.
- */
- vma->base.vm_bo = vm_bo;
- list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
+ drm_gpuva_unlink_defer(&vma->base);
+ kfree(vma);
}
static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
@@ -2085,15 +2038,17 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
op_ctx->map.sgt, op->map.gem.offset,
op->map.va.range);
- if (ret)
+ if (ret) {
+ panthor_vm_op_ctx_return_vma(op_ctx, vma);
return ret;
+ }
- /* Ref owned by the mapping now, clear the obj field so we don't release the
- * pinning/obj ref behind GPUVA's back.
- */
drm_gpuva_map(&vm->base, &vma->base, &op->map);
panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
+
+ drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo);
op_ctx->map.vm_bo = NULL;
+
return 0;
}
@@ -2132,16 +2087,14 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
* owned by the old mapping which will be released when this
* mapping is destroyed, we need to grab a ref here.
*/
- panthor_vma_link(vm, prev_vma,
- drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+ panthor_vma_link(vm, prev_vma, op->remap.unmap->va->vm_bo);
}
if (next_vma) {
- panthor_vma_link(vm, next_vma,
- drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+ panthor_vma_link(vm, next_vma, op->remap.unmap->va->vm_bo);
}
- panthor_vma_unlink(vm, unmap_vma);
+ panthor_vma_unlink(unmap_vma);
return 0;
}
@@ -2158,12 +2111,13 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
return ret;
drm_gpuva_unmap(&op->unmap);
- panthor_vma_unlink(vm, unmap_vma);
+ panthor_vma_unlink(unmap_vma);
return 0;
}
static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
.vm_free = panthor_vm_free,
+ .vm_bo_free = panthor_vm_bo_free,
.sm_step_map = panthor_gpuva_sm_step_map,
.sm_step_remap = panthor_gpuva_sm_step_remap,
.sm_step_unmap = panthor_gpuva_sm_step_unmap,
diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c
new file mode 100644
index 000000000000..57cfc7ce715b
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_pwr.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2025 ARM Limited. All rights reserved. */
+
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/cleanup.h>
+#include <linux/iopoll.h>
+#include <linux/wait.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "panthor_device.h"
+#include "panthor_hw.h"
+#include "panthor_pwr.h"
+#include "panthor_regs.h"
+
+#define PWR_INTERRUPTS_MASK \
+ (PWR_IRQ_POWER_CHANGED_SINGLE | \
+ PWR_IRQ_POWER_CHANGED_ALL | \
+ PWR_IRQ_DELEGATION_CHANGED | \
+ PWR_IRQ_RESET_COMPLETED | \
+ PWR_IRQ_RETRACT_COMPLETED | \
+ PWR_IRQ_INSPECT_COMPLETED | \
+ PWR_IRQ_COMMAND_NOT_ALLOWED | \
+ PWR_IRQ_COMMAND_INVALID)
+
+#define PWR_ALL_CORES_MASK GENMASK_U64(63, 0)
+
+#define PWR_DOMAIN_MAX_BITS 16
+
+#define PWR_TRANSITION_TIMEOUT_US (2ULL * USEC_PER_SEC)
+
+#define PWR_RETRACT_TIMEOUT_US (2ULL * USEC_PER_MSEC)
+
+#define PWR_RESET_TIMEOUT_MS 500
+
+/**
+ * struct panthor_pwr - PWR_CONTROL block management data.
+ */
+struct panthor_pwr {
+ /** @irq: PWR irq. */
+ struct panthor_irq irq;
+
+ /** @reqs_lock: Lock protecting access to pending_reqs. */
+ spinlock_t reqs_lock;
+
+ /** @pending_reqs: Pending PWR requests. */
+ u32 pending_reqs;
+
+ /** @reqs_acked: PWR request wait queue. */
+ wait_queue_head_t reqs_acked;
+};
+
+static void panthor_pwr_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+ spin_lock(&ptdev->pwr->reqs_lock);
+ gpu_write(ptdev, PWR_INT_CLEAR, status);
+
+ if (unlikely(status & PWR_IRQ_COMMAND_NOT_ALLOWED))
+ drm_err(&ptdev->base, "PWR_IRQ: COMMAND_NOT_ALLOWED");
+
+ if (unlikely(status & PWR_IRQ_COMMAND_INVALID))
+ drm_err(&ptdev->base, "PWR_IRQ: COMMAND_INVALID");
+
+ if (status & ptdev->pwr->pending_reqs) {
+ ptdev->pwr->pending_reqs &= ~status;
+ wake_up_all(&ptdev->pwr->reqs_acked);
+ }
+ spin_unlock(&ptdev->pwr->reqs_lock);
+}
+PANTHOR_IRQ_HANDLER(pwr, PWR, panthor_pwr_irq_handler);
+
+static void panthor_pwr_write_command(struct panthor_device *ptdev, u32 command, u64 args)
+{
+ if (args)
+ gpu_write64(ptdev, PWR_CMDARG, args);
+
+ gpu_write(ptdev, PWR_COMMAND, command);
+}
+
+static bool reset_irq_raised(struct panthor_device *ptdev)
+{
+ return gpu_read(ptdev, PWR_INT_RAWSTAT) & PWR_IRQ_RESET_COMPLETED;
+}
+
+static bool reset_pending(struct panthor_device *ptdev)
+{
+ return (ptdev->pwr->pending_reqs & PWR_IRQ_RESET_COMPLETED);
+}
+
+static int panthor_pwr_reset(struct panthor_device *ptdev, u32 reset_cmd)
+{
+ scoped_guard(spinlock_irqsave, &ptdev->pwr->reqs_lock) {
+ if (reset_pending(ptdev)) {
+ drm_WARN(&ptdev->base, 1, "Reset already pending");
+ } else {
+ ptdev->pwr->pending_reqs |= PWR_IRQ_RESET_COMPLETED;
+ gpu_write(ptdev, PWR_INT_CLEAR, PWR_IRQ_RESET_COMPLETED);
+ panthor_pwr_write_command(ptdev, reset_cmd, 0);
+ }
+ }
+
+ if (!wait_event_timeout(ptdev->pwr->reqs_acked, !reset_pending(ptdev),
+ msecs_to_jiffies(PWR_RESET_TIMEOUT_MS))) {
+ guard(spinlock_irqsave)(&ptdev->pwr->reqs_lock);
+
+ if (reset_pending(ptdev) && !reset_irq_raised(ptdev)) {
+ drm_err(&ptdev->base, "RESET timed out (0x%x)", reset_cmd);
+ return -ETIMEDOUT;
+ }
+
+ ptdev->pwr->pending_reqs &= ~PWR_IRQ_RESET_COMPLETED;
+ }
+
+ return 0;
+}
+
+static const char *get_domain_name(u8 domain)
+{
+ switch (domain) {
+ case PWR_COMMAND_DOMAIN_L2:
+ return "L2";
+ case PWR_COMMAND_DOMAIN_TILER:
+ return "Tiler";
+ case PWR_COMMAND_DOMAIN_SHADER:
+ return "Shader";
+ case PWR_COMMAND_DOMAIN_BASE:
+ return "Base";
+ case PWR_COMMAND_DOMAIN_STACK:
+ return "Stack";
+ }
+ return "Unknown";
+}
+
+static u32 get_domain_base(u8 domain)
+{
+ switch (domain) {
+ case PWR_COMMAND_DOMAIN_L2:
+ return PWR_L2_PRESENT;
+ case PWR_COMMAND_DOMAIN_TILER:
+ return PWR_TILER_PRESENT;
+ case PWR_COMMAND_DOMAIN_SHADER:
+ return PWR_SHADER_PRESENT;
+ case PWR_COMMAND_DOMAIN_BASE:
+ return PWR_BASE_PRESENT;
+ case PWR_COMMAND_DOMAIN_STACK:
+ return PWR_STACK_PRESENT;
+ }
+ return 0;
+}
+
+static u32 get_domain_ready_reg(u32 domain)
+{
+ return get_domain_base(domain) + (PWR_L2_READY - PWR_L2_PRESENT);
+}
+
+static u32 get_domain_pwrtrans_reg(u32 domain)
+{
+ return get_domain_base(domain) + (PWR_L2_PWRTRANS - PWR_L2_PRESENT);
+}
+
+static bool is_valid_domain(u32 domain)
+{
+ return get_domain_base(domain) != 0;
+}
+
+static bool has_rtu(struct panthor_device *ptdev)
+{
+ return ptdev->gpu_info.gpu_features & GPU_FEATURES_RAY_TRAVERSAL;
+}
+
+static u8 get_domain_subdomain(struct panthor_device *ptdev, u32 domain)
+{
+ if (domain == PWR_COMMAND_DOMAIN_SHADER && has_rtu(ptdev))
+ return PWR_COMMAND_SUBDOMAIN_RTU;
+
+ return 0;
+}
+
+static int panthor_pwr_domain_wait_transition(struct panthor_device *ptdev, u32 domain,
+ u32 timeout_us)
+{
+ u32 pwrtrans_reg = get_domain_pwrtrans_reg(domain);
+ u64 val;
+ int ret = 0;
+
+ ret = gpu_read64_poll_timeout(ptdev, pwrtrans_reg, val, !(PWR_ALL_CORES_MASK & val), 100,
+ timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base, "%s domain power in transition, pwrtrans(0x%llx)",
+ get_domain_name(domain), val);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void panthor_pwr_debug_info_show(struct panthor_device *ptdev)
+{
+ drm_info(&ptdev->base, "GPU_FEATURES: 0x%016llx", gpu_read64(ptdev, GPU_FEATURES));
+ drm_info(&ptdev->base, "PWR_STATUS: 0x%016llx", gpu_read64(ptdev, PWR_STATUS));
+ drm_info(&ptdev->base, "L2_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_L2_PRESENT));
+ drm_info(&ptdev->base, "L2_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_L2_PWRTRANS));
+ drm_info(&ptdev->base, "L2_READY: 0x%016llx", gpu_read64(ptdev, PWR_L2_READY));
+ drm_info(&ptdev->base, "TILER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PRESENT));
+ drm_info(&ptdev->base, "TILER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PWRTRANS));
+ drm_info(&ptdev->base, "TILER_READY: 0x%016llx", gpu_read64(ptdev, PWR_TILER_READY));
+ drm_info(&ptdev->base, "SHADER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PRESENT));
+ drm_info(&ptdev->base, "SHADER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PWRTRANS));
+ drm_info(&ptdev->base, "SHADER_READY: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_READY));
+}
+
+static int panthor_pwr_domain_transition(struct panthor_device *ptdev, u32 cmd, u32 domain,
+ u64 mask, u32 timeout_us)
+{
+ u32 ready_reg = get_domain_ready_reg(domain);
+ u32 pwr_cmd = PWR_COMMAND_DEF(cmd, domain, get_domain_subdomain(ptdev, domain));
+ u64 expected_val = 0;
+ u64 val;
+ int ret = 0;
+
+ if (drm_WARN_ON(&ptdev->base, !is_valid_domain(domain)))
+ return -EINVAL;
+
+ switch (cmd) {
+ case PWR_COMMAND_POWER_DOWN:
+ expected_val = 0;
+ break;
+ case PWR_COMMAND_POWER_UP:
+ expected_val = mask;
+ break;
+ default:
+ drm_err(&ptdev->base, "Invalid power domain transition command (0x%x)", cmd);
+ return -EINVAL;
+ }
+
+ ret = panthor_pwr_domain_wait_transition(ptdev, domain, timeout_us);
+ if (ret)
+ return ret;
+
+ /* domain already in target state, return early */
+ if ((gpu_read64(ptdev, ready_reg) & mask) == expected_val)
+ return 0;
+
+ panthor_pwr_write_command(ptdev, pwr_cmd, mask);
+
+ ret = gpu_read64_poll_timeout(ptdev, ready_reg, val, (mask & val) == expected_val, 100,
+ timeout_us);
+ if (ret) {
+ drm_err(&ptdev->base,
+ "timeout waiting on %s power domain transition, cmd(0x%x), arg(0x%llx)",
+ get_domain_name(domain), pwr_cmd, mask);
+ panthor_pwr_debug_info_show(ptdev);
+ return ret;
+ }
+
+ return 0;
+}
+
+#define panthor_pwr_domain_power_off(__ptdev, __domain, __mask, __timeout_us) \
+ panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_DOWN, __domain, __mask, \
+ __timeout_us)
+
+#define panthor_pwr_domain_power_on(__ptdev, __domain, __mask, __timeout_us) \
+ panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_UP, __domain, __mask, __timeout_us)
+
+/**
+ * retract_domain() - Retract control of a domain from MCU
+ * @ptdev: Device.
+ * @domain: Domain to retract the control
+ *
+ * Retracting L2 domain is not expected since it won't be delegated.
+ *
+ * Return: 0 on success or retracted already.
+ * -EPERM if domain is L2.
+ * A negative error code otherwise.
+ */
+static int retract_domain(struct panthor_device *ptdev, u32 domain)
+{
+ const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_RETRACT, domain, 0);
+ const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS);
+ const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain);
+ const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain);
+ u64 val;
+ int ret;
+
+ if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2))
+ return -EPERM;
+
+ ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, !(PWR_STATUS_RETRACT_PENDING & val),
+ 0, PWR_RETRACT_TIMEOUT_US);
+ if (ret) {
+ drm_err(&ptdev->base, "%s domain retract pending", get_domain_name(domain));
+ return ret;
+ }
+
+ if (!(pwr_status & delegated_mask)) {
+ drm_dbg(&ptdev->base, "%s domain already retracted", get_domain_name(domain));
+ return 0;
+ }
+
+ panthor_pwr_write_command(ptdev, pwr_cmd, 0);
+
+ /*
+ * On successful retraction
+ * allow-flag will be set with delegated-flag being cleared.
+ */
+ ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val,
+ ((delegated_mask | allow_mask) & val) == allow_mask, 10,
+ PWR_TRANSITION_TIMEOUT_US);
+ if (ret) {
+ drm_err(&ptdev->base, "Retracting %s domain timeout, cmd(0x%x)",
+ get_domain_name(domain), pwr_cmd);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * delegate_domain() - Delegate control of a domain to MCU
+ * @ptdev: Device.
+ * @domain: Domain to delegate the control
+ *
+ * Delegating L2 domain is prohibited.
+ *
+ * Return:
+ * * 0 on success or delegated already.
+ * * -EPERM if domain is L2.
+ * * A negative error code otherwise.
+ */
+static int delegate_domain(struct panthor_device *ptdev, u32 domain)
+{
+ const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_DELEGATE, domain, 0);
+ const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS);
+ const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain);
+ const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain);
+ u64 val;
+ int ret;
+
+ if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2))
+ return -EPERM;
+
+ /* Already delegated, exit early */
+ if (pwr_status & delegated_mask)
+ return 0;
+
+ /* Check if the command is allowed before delegating. */
+ if (!(pwr_status & allow_mask)) {
+ drm_warn(&ptdev->base, "Delegating %s domain not allowed", get_domain_name(domain));
+ return -EPERM;
+ }
+
+ ret = panthor_pwr_domain_wait_transition(ptdev, domain, PWR_TRANSITION_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ panthor_pwr_write_command(ptdev, pwr_cmd, 0);
+
+ /*
+ * On successful delegation
+ * allow-flag will be cleared with delegated-flag being set.
+ */
+ ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val,
+ ((delegated_mask | allow_mask) & val) == delegated_mask,
+ 10, PWR_TRANSITION_TIMEOUT_US);
+ if (ret) {
+ drm_err(&ptdev->base, "Delegating %s domain timeout, cmd(0x%x)",
+ get_domain_name(domain), pwr_cmd);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int panthor_pwr_delegate_domains(struct panthor_device *ptdev)
+{
+ int ret;
+
+ if (!ptdev->pwr)
+ return 0;
+
+ ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER);
+ if (ret)
+ return ret;
+
+ ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_TILER);
+ if (ret)
+ goto err_retract_shader;
+
+ return 0;
+
+err_retract_shader:
+ retract_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER);
+
+ return ret;
+}
+
+/**
+ * panthor_pwr_domain_force_off - Forcefully power down a domain.
+ * @ptdev: Device.
+ * @domain: Domain to forcefully power down.
+ *
+ * This function will attempt to retract and power off the requested power
+ * domain. However, if retraction fails, the operation is aborted. If power off
+ * fails, the domain will remain retracted and under the host control.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int panthor_pwr_domain_force_off(struct panthor_device *ptdev, u32 domain)
+{
+ const u64 domain_ready = gpu_read64(ptdev, get_domain_ready_reg(domain));
+ int ret;
+
+ /* Domain already powered down, early exit. */
+ if (!domain_ready)
+ return 0;
+
+ /* Domain has to be in host control to issue power off command. */
+ ret = retract_domain(ptdev, domain);
+ if (ret)
+ return ret;
+
+ return panthor_pwr_domain_power_off(ptdev, domain, domain_ready, PWR_TRANSITION_TIMEOUT_US);
+}
+
+void panthor_pwr_unplug(struct panthor_device *ptdev)
+{
+ unsigned long flags;
+
+ if (!ptdev->pwr)
+ return;
+
+ /* Make sure the IRQ handler is not running after that point. */
+ panthor_pwr_irq_suspend(&ptdev->pwr->irq);
+
+ /* Wake-up all waiters. */
+ spin_lock_irqsave(&ptdev->pwr->reqs_lock, flags);
+ ptdev->pwr->pending_reqs = 0;
+ wake_up_all(&ptdev->pwr->reqs_acked);
+ spin_unlock_irqrestore(&ptdev->pwr->reqs_lock, flags);
+}
+
+int panthor_pwr_init(struct panthor_device *ptdev)
+{
+ struct panthor_pwr *pwr;
+ int err, irq;
+
+ if (!panthor_hw_has_pwr_ctrl(ptdev))
+ return 0;
+
+ pwr = drmm_kzalloc(&ptdev->base, sizeof(*pwr), GFP_KERNEL);
+ if (!pwr)
+ return -ENOMEM;
+
+ spin_lock_init(&pwr->reqs_lock);
+ init_waitqueue_head(&pwr->reqs_acked);
+ ptdev->pwr = pwr;
+
+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
+ if (irq < 0)
+ return irq;
+
+ err = panthor_request_pwr_irq(ptdev, &pwr->irq, irq, PWR_INTERRUPTS_MASK);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int panthor_pwr_reset_soft(struct panthor_device *ptdev)
+{
+ if (!(gpu_read64(ptdev, PWR_STATUS) & PWR_STATUS_ALLOW_SOFT_RESET)) {
+ drm_err(&ptdev->base, "RESET_SOFT not allowed");
+ return -EOPNOTSUPP;
+ }
+
+ return panthor_pwr_reset(ptdev, PWR_COMMAND_RESET_SOFT);
+}
+
+void panthor_pwr_l2_power_off(struct panthor_device *ptdev)
+{
+ const u64 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2);
+ const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS);
+
+ /* Abort if L2 power off constraints are not satisfied */
+ if (!(pwr_status & l2_allow_mask)) {
+ drm_warn(&ptdev->base, "Power off L2 domain not allowed");
+ return;
+ }
+
+ /* It is expected that when halting the MCU, it would power down its
+ * delegated domains. However, an unresponsive or hung MCU may not do
+ * so, which is why we need to check and retract the domains back into
+ * host control to be powered down in the right order before powering
+ * down the L2.
+ */
+ if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_TILER))
+ return;
+
+ if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_SHADER))
+ return;
+
+ panthor_pwr_domain_power_off(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present,
+ PWR_TRANSITION_TIMEOUT_US);
+}
+
+int panthor_pwr_l2_power_on(struct panthor_device *ptdev)
+{
+ const u32 pwr_status = gpu_read64(ptdev, PWR_STATUS);
+ const u32 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2);
+ int ret;
+
+ if ((pwr_status & l2_allow_mask) == 0) {
+ drm_warn(&ptdev->base, "Power on L2 domain not allowed");
+ return -EPERM;
+ }
+
+ ret = panthor_pwr_domain_power_on(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present,
+ PWR_TRANSITION_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ /* Delegate control of the shader and tiler power domains to the MCU as
+ * it can better manage which shader/tiler cores need to be powered up
+ * or can be powered down based on currently running jobs.
+ *
+ * If the shader and tiler domains are already delegated to the MCU,
+ * this call would just return early.
+ */
+ return panthor_pwr_delegate_domains(ptdev);
+}
+
+void panthor_pwr_suspend(struct panthor_device *ptdev)
+{
+ if (!ptdev->pwr)
+ return;
+
+ panthor_pwr_irq_suspend(&ptdev->pwr->irq);
+}
+
+void panthor_pwr_resume(struct panthor_device *ptdev)
+{
+ if (!ptdev->pwr)
+ return;
+
+ panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_pwr.h b/drivers/gpu/drm/panthor/panthor_pwr.h
new file mode 100644
index 000000000000..adf1f6136abc
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_pwr.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2025 ARM Limited. All rights reserved. */
+
+#ifndef __PANTHOR_PWR_H__
+#define __PANTHOR_PWR_H__
+
+struct panthor_device;
+
+void panthor_pwr_unplug(struct panthor_device *ptdev);
+
+int panthor_pwr_init(struct panthor_device *ptdev);
+
+int panthor_pwr_reset_soft(struct panthor_device *ptdev);
+
+void panthor_pwr_l2_power_off(struct panthor_device *ptdev);
+
+int panthor_pwr_l2_power_on(struct panthor_device *ptdev);
+
+void panthor_pwr_suspend(struct panthor_device *ptdev);
+
+void panthor_pwr_resume(struct panthor_device *ptdev);
+
+#endif /* __PANTHOR_PWR_H__ */
diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
index 8bee76d01bf8..08bf06c452d6 100644
--- a/drivers/gpu/drm/panthor/panthor_regs.h
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -64,6 +64,8 @@
#define GPU_FAULT_STATUS 0x3C
#define GPU_FAULT_ADDR 0x40
+#define GPU_L2_CONFIG 0x48
+#define GPU_L2_CONFIG_ASN_HASH_ENABLE BIT(24)
#define GPU_PWR_KEY 0x50
#define GPU_PWR_KEY_UNLOCK 0x2968A819
@@ -72,6 +74,7 @@
#define GPU_FEATURES 0x60
#define GPU_FEATURES_RAY_INTERSECTION BIT(2)
+#define GPU_FEATURES_RAY_TRAVERSAL BIT(5)
#define GPU_TIMESTAMP_OFFSET 0x88
#define GPU_CYCLE_COUNT 0x90
@@ -110,6 +113,8 @@
#define GPU_REVID 0x280
+#define GPU_ASN_HASH(n) (0x2C0 + ((n) * 4))
+
#define GPU_COHERENCY_FEATURES 0x300
#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name)
@@ -205,4 +210,82 @@
#define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000))
#define CSF_GLB_DOORBELL_ID 0
+/* PWR Control registers */
+
+#define PWR_CONTROL_BASE 0x800
+#define PWR_CTRL_REG(x) (PWR_CONTROL_BASE + (x))
+
+#define PWR_INT_RAWSTAT PWR_CTRL_REG(0x0)
+#define PWR_INT_CLEAR PWR_CTRL_REG(0x4)
+#define PWR_INT_MASK PWR_CTRL_REG(0x8)
+#define PWR_INT_STAT PWR_CTRL_REG(0xc)
+#define PWR_IRQ_POWER_CHANGED_SINGLE BIT(0)
+#define PWR_IRQ_POWER_CHANGED_ALL BIT(1)
+#define PWR_IRQ_DELEGATION_CHANGED BIT(2)
+#define PWR_IRQ_RESET_COMPLETED BIT(3)
+#define PWR_IRQ_RETRACT_COMPLETED BIT(4)
+#define PWR_IRQ_INSPECT_COMPLETED BIT(5)
+#define PWR_IRQ_COMMAND_NOT_ALLOWED BIT(30)
+#define PWR_IRQ_COMMAND_INVALID BIT(31)
+
+#define PWR_STATUS PWR_CTRL_REG(0x20)
+#define PWR_STATUS_ALLOW_L2 BIT_U64(0)
+#define PWR_STATUS_ALLOW_TILER BIT_U64(1)
+#define PWR_STATUS_ALLOW_SHADER BIT_U64(8)
+#define PWR_STATUS_ALLOW_BASE BIT_U64(14)
+#define PWR_STATUS_ALLOW_STACK BIT_U64(15)
+#define PWR_STATUS_DOMAIN_ALLOWED(x) BIT_U64(x)
+#define PWR_STATUS_DELEGATED_L2 BIT_U64(16)
+#define PWR_STATUS_DELEGATED_TILER BIT_U64(17)
+#define PWR_STATUS_DELEGATED_SHADER BIT_U64(24)
+#define PWR_STATUS_DELEGATED_BASE BIT_U64(30)
+#define PWR_STATUS_DELEGATED_STACK BIT_U64(31)
+#define PWR_STATUS_DELEGATED_SHIFT 16
+#define PWR_STATUS_DOMAIN_DELEGATED(x) BIT_U64((x) + PWR_STATUS_DELEGATED_SHIFT)
+#define PWR_STATUS_ALLOW_SOFT_RESET BIT_U64(33)
+#define PWR_STATUS_ALLOW_FAST_RESET BIT_U64(34)
+#define PWR_STATUS_POWER_PENDING BIT_U64(41)
+#define PWR_STATUS_RESET_PENDING BIT_U64(42)
+#define PWR_STATUS_RETRACT_PENDING BIT_U64(43)
+#define PWR_STATUS_INSPECT_PENDING BIT_U64(44)
+
+#define PWR_COMMAND PWR_CTRL_REG(0x28)
+#define PWR_COMMAND_POWER_UP 0x10
+#define PWR_COMMAND_POWER_DOWN 0x11
+#define PWR_COMMAND_DELEGATE 0x20
+#define PWR_COMMAND_RETRACT 0x21
+#define PWR_COMMAND_RESET_SOFT 0x31
+#define PWR_COMMAND_RESET_FAST 0x32
+#define PWR_COMMAND_INSPECT 0xF0
+#define PWR_COMMAND_DOMAIN_L2 0
+#define PWR_COMMAND_DOMAIN_TILER 1
+#define PWR_COMMAND_DOMAIN_SHADER 8
+#define PWR_COMMAND_DOMAIN_BASE 14
+#define PWR_COMMAND_DOMAIN_STACK 15
+#define PWR_COMMAND_SUBDOMAIN_RTU BIT(0)
+#define PWR_COMMAND_DEF(cmd, domain, subdomain) \
+ (((subdomain) << 16) | ((domain) << 8) | (cmd))
+
+#define PWR_CMDARG PWR_CTRL_REG(0x30)
+
+#define PWR_L2_PRESENT PWR_CTRL_REG(0x100)
+#define PWR_L2_READY PWR_CTRL_REG(0x108)
+#define PWR_L2_PWRTRANS PWR_CTRL_REG(0x110)
+#define PWR_L2_PWRACTIVE PWR_CTRL_REG(0x118)
+#define PWR_TILER_PRESENT PWR_CTRL_REG(0x140)
+#define PWR_TILER_READY PWR_CTRL_REG(0x148)
+#define PWR_TILER_PWRTRANS PWR_CTRL_REG(0x150)
+#define PWR_TILER_PWRACTIVE PWR_CTRL_REG(0x158)
+#define PWR_SHADER_PRESENT PWR_CTRL_REG(0x200)
+#define PWR_SHADER_READY PWR_CTRL_REG(0x208)
+#define PWR_SHADER_PWRTRANS PWR_CTRL_REG(0x210)
+#define PWR_SHADER_PWRACTIVE PWR_CTRL_REG(0x218)
+#define PWR_BASE_PRESENT PWR_CTRL_REG(0x380)
+#define PWR_BASE_READY PWR_CTRL_REG(0x388)
+#define PWR_BASE_PWRTRANS PWR_CTRL_REG(0x390)
+#define PWR_BASE_PWRACTIVE PWR_CTRL_REG(0x398)
+#define PWR_STACK_PRESENT PWR_CTRL_REG(0x3c0)
+#define PWR_STACK_READY PWR_CTRL_REG(0x3c8)
+#define PWR_STACK_PWRTRANS PWR_CTRL_REG(0x3d0)
+
#endif
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 3d1f57e3990f..b834123a6560 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -5,6 +5,7 @@
#include <drm/drm_exec.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
@@ -360,17 +361,23 @@ struct panthor_queue {
/** @entity: DRM scheduling entity used for this queue. */
struct drm_sched_entity entity;
- /**
- * @remaining_time: Time remaining before the job timeout expires.
- *
- * The job timeout is suspended when the queue is not scheduled by the
- * FW. Every time we suspend the timer, we need to save the remaining
- * time so we can restore it later on.
- */
- unsigned long remaining_time;
+ /** @name: DRM scheduler name for this queue. */
+ char *name;
- /** @timeout_suspended: True if the job timeout was suspended. */
- bool timeout_suspended;
+ /** @timeout: Queue timeout related fields. */
+ struct {
+ /** @timeout.work: Work executed when a queue timeout occurs. */
+ struct delayed_work work;
+
+ /**
+ * @timeout.remaining: Time remaining before a queue timeout.
+ *
+ * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
+ * When the timer is suspended, it's set to the time remaining when the
+ * timer was suspended.
+ */
+ unsigned long remaining;
+ } timeout;
/**
* @doorbell_id: Doorbell assigned to this queue.
@@ -895,11 +902,18 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
if (IS_ERR_OR_NULL(queue))
return;
- drm_sched_entity_destroy(&queue->entity);
+ /* This should have been disabled before that point. */
+ drm_WARN_ON(&group->ptdev->base,
+ disable_delayed_work_sync(&queue->timeout.work));
+
+ if (queue->entity.fence_context)
+ drm_sched_entity_destroy(&queue->entity);
if (queue->scheduler.ops)
drm_sched_fini(&queue->scheduler);
+ kfree(queue->name);
+
panthor_queue_put_syncwait_obj(queue);
panthor_kernel_bo_destroy(queue->ringbuf);
@@ -1039,6 +1053,115 @@ group_unbind_locked(struct panthor_group *group)
return 0;
}
+static bool
+group_is_idle(struct panthor_group *group)
+{
+ struct panthor_device *ptdev = group->ptdev;
+ u32 inactive_queues;
+
+ if (group->csg_id >= 0)
+ return ptdev->scheduler->csg_slots[group->csg_id].idle;
+
+ inactive_queues = group->idle_queues | group->blocked_queues;
+ return hweight32(inactive_queues) == group->queue_count;
+}
+
+static void
+queue_reset_timeout_locked(struct panthor_queue *queue)
+{
+ lockdep_assert_held(&queue->fence_ctx.lock);
+
+ if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
+ mod_delayed_work(queue->scheduler.timeout_wq,
+ &queue->timeout.work,
+ msecs_to_jiffies(JOB_TIMEOUT_MS));
+ }
+}
+
+static bool
+group_can_run(struct panthor_group *group)
+{
+ return group->state != PANTHOR_CS_GROUP_TERMINATED &&
+ group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
+ !group->destroyed && group->fatal_queues == 0 &&
+ !group->timedout;
+}
+
+static bool
+queue_timeout_is_suspended(struct panthor_queue *queue)
+{
+ /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
+ return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT;
+}
+
+static void
+queue_suspend_timeout_locked(struct panthor_queue *queue)
+{
+ unsigned long qtimeout, now;
+ struct panthor_group *group;
+ struct panthor_job *job;
+ bool timer_was_active;
+
+ lockdep_assert_held(&queue->fence_ctx.lock);
+
+ /* Already suspended, nothing to do. */
+ if (queue_timeout_is_suspended(queue))
+ return;
+
+ job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
+ struct panthor_job, node);
+ group = job ? job->group : NULL;
+
+ /* If the queue is blocked and the group is idle, we want the timer to
+ * keep running because the group can't be unblocked by other queues,
+ * so it has to come from an external source, and we want to timebox
+ * this external signalling.
+ */
+ if (group && group_can_run(group) &&
+ (group->blocked_queues & BIT(job->queue_idx)) &&
+ group_is_idle(group))
+ return;
+
+ now = jiffies;
+ qtimeout = queue->timeout.work.timer.expires;
+
+ /* Cancel the timer. */
+ timer_was_active = cancel_delayed_work(&queue->timeout.work);
+ if (!timer_was_active || !job)
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+ else if (time_after(qtimeout, now))
+ queue->timeout.remaining = qtimeout - now;
+ else
+ queue->timeout.remaining = 0;
+
+ if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+}
+
+static void
+queue_suspend_timeout(struct panthor_queue *queue)
+{
+ spin_lock(&queue->fence_ctx.lock);
+ queue_suspend_timeout_locked(queue);
+ spin_unlock(&queue->fence_ctx.lock);
+}
+
+static void
+queue_resume_timeout(struct panthor_queue *queue)
+{
+ spin_lock(&queue->fence_ctx.lock);
+
+ if (queue_timeout_is_suspended(queue)) {
+ mod_delayed_work(queue->scheduler.timeout_wq,
+ &queue->timeout.work,
+ queue->timeout.remaining);
+
+ queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
+ }
+
+ spin_unlock(&queue->fence_ctx.lock);
+}
+
/**
* cs_slot_prog_locked() - Program a queue slot
* @ptdev: Device.
@@ -1077,10 +1200,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
CS_IDLE_EMPTY |
CS_STATE_MASK |
CS_EXTRACT_EVENT);
- if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
- drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
- queue->timeout_suspended = false;
- }
+ if (queue->iface.input->insert != queue->iface.input->extract)
+ queue_resume_timeout(queue);
}
/**
@@ -1107,14 +1228,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
CS_STATE_STOP,
CS_STATE_MASK);
- /* If the queue is blocked, we want to keep the timeout running, so
- * we can detect unbounded waits and kill the group when that happens.
- */
- if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
- queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
- queue->timeout_suspended = true;
- WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
- }
+ queue_suspend_timeout(queue);
return 0;
}
@@ -1133,11 +1247,13 @@ csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_fw_csg_iface *csg_iface;
+ u64 endpoint_req;
lockdep_assert_held(&ptdev->scheduler->lock);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
- csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
+ endpoint_req = panthor_fw_csg_endpoint_req_get(ptdev, csg_iface);
+ csg_slot->priority = CSG_EP_REQ_PRIORITY_GET(endpoint_req);
}
/**
@@ -1297,6 +1413,7 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
struct panthor_csg_slot *csg_slot;
struct panthor_group *group;
u32 queue_mask = 0, i;
+ u64 endpoint_req;
lockdep_assert_held(&ptdev->scheduler->lock);
@@ -1323,10 +1440,12 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
csg_iface->input->allow_compute = group->compute_core_mask;
csg_iface->input->allow_fragment = group->fragment_core_mask;
csg_iface->input->allow_other = group->tiler_core_mask;
- csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
- CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
- CSG_EP_REQ_TILER(group->max_tiler_cores) |
- CSG_EP_REQ_PRIORITY(priority);
+ endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
+ CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
+ CSG_EP_REQ_TILER(group->max_tiler_cores) |
+ CSG_EP_REQ_PRIORITY(priority);
+ panthor_fw_csg_endpoint_req_set(ptdev, csg_iface, endpoint_req);
+
csg_iface->input->config = panthor_vm_as(group->vm);
if (group->suspend_buf)
@@ -1411,7 +1530,7 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
fault = cs_iface->output->fault;
info = cs_iface->output->fault_info;
- if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
+ if (queue) {
u64 cs_extract = queue->iface.output->extract;
struct panthor_job *job;
@@ -1909,28 +2028,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
return ctx->group_count == sched->csg_slot_count;
}
-static bool
-group_is_idle(struct panthor_group *group)
-{
- struct panthor_device *ptdev = group->ptdev;
- u32 inactive_queues;
-
- if (group->csg_id >= 0)
- return ptdev->scheduler->csg_slots[group->csg_id].idle;
-
- inactive_queues = group->idle_queues | group->blocked_queues;
- return hweight32(inactive_queues) == group->queue_count;
-}
-
-static bool
-group_can_run(struct panthor_group *group)
-{
- return group->state != PANTHOR_CS_GROUP_TERMINATED &&
- group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
- !group->destroyed && group->fatal_queues == 0 &&
- !group->timedout;
-}
-
static void
tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx,
@@ -2224,9 +2321,9 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c
continue;
}
- panthor_fw_update_reqs(csg_iface, endpoint_req,
- CSG_EP_REQ_PRIORITY(new_csg_prio),
- CSG_EP_REQ_PRIORITY_MASK);
+ panthor_fw_csg_endpoint_req_update(ptdev, csg_iface,
+ CSG_EP_REQ_PRIORITY(new_csg_prio),
+ CSG_EP_REQ_PRIORITY_MASK);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
CSG_ENDPOINT_CONFIG);
@@ -2612,6 +2709,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
static void queue_stop(struct panthor_queue *queue,
struct panthor_job *bad_job)
{
+ disable_delayed_work_sync(&queue->timeout.work);
drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
}
@@ -2623,6 +2721,7 @@ static void queue_start(struct panthor_queue *queue)
list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
job->base.s_fence->parent = dma_fence_get(job->done_fence);
+ enable_delayed_work(&queue->timeout.work);
drm_sched_start(&queue->scheduler, 0);
}
@@ -2689,7 +2788,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slots_upd_ctx upd_ctx;
- struct panthor_group *group;
u32 suspended_slots;
u32 i;
@@ -2743,13 +2841,23 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
while (slot_mask) {
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+ struct panthor_group *group = csg_slot->group;
/* Terminate command timedout, but the soft-reset will
* automatically terminate all active groups, so let's
* force the state to halted here.
*/
- if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
- csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+ if (group->state != PANTHOR_CS_GROUP_TERMINATED) {
+ group->state = PANTHOR_CS_GROUP_TERMINATED;
+
+ /* Reset the queue slots manually if the termination
+ * request failed.
+ */
+ for (i = 0; i < group->queue_count; i++) {
+ if (group->queues[i])
+ cs_slot_reset_locked(ptdev, csg_id, i);
+ }
+ }
slot_mask &= ~BIT(csg_id);
}
}
@@ -2779,8 +2887,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
for (i = 0; i < sched->csg_slot_count; i++) {
struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+ struct panthor_group *group = csg_slot->group;
- group = csg_slot->group;
if (!group)
continue;
@@ -2909,35 +3017,47 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
xa_unlock(&gpool->xa);
}
-static void group_sync_upd_work(struct work_struct *work)
+static bool queue_check_job_completion(struct panthor_queue *queue)
{
- struct panthor_group *group =
- container_of(work, struct panthor_group, sync_upd_work);
+ struct panthor_syncobj_64b *syncobj = NULL;
struct panthor_job *job, *job_tmp;
+ bool cookie, progress = false;
LIST_HEAD(done_jobs);
- u32 queue_idx;
- bool cookie;
cookie = dma_fence_begin_signalling();
- for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
- struct panthor_queue *queue = group->queues[queue_idx];
- struct panthor_syncobj_64b *syncobj;
+ spin_lock(&queue->fence_ctx.lock);
+ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+ if (!syncobj) {
+ struct panthor_group *group = job->group;
- if (!queue)
- continue;
+ syncobj = group->syncobjs->kmap +
+ (job->queue_idx * sizeof(*syncobj));
+ }
- syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
+ if (syncobj->seqno < job->done_fence->seqno)
+ break;
- spin_lock(&queue->fence_ctx.lock);
- list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (syncobj->seqno < job->done_fence->seqno)
- break;
+ list_move_tail(&job->node, &done_jobs);
+ dma_fence_signal_locked(job->done_fence);
+ }
- list_move_tail(&job->node, &done_jobs);
- dma_fence_signal_locked(job->done_fence);
- }
- spin_unlock(&queue->fence_ctx.lock);
+ if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
+ /* If we have no job left, we cancel the timer, and reset remaining
+ * time to its default so it can be restarted next time
+ * queue_resume_timeout() is called.
+ */
+ queue_suspend_timeout_locked(queue);
+
+ /* If there's no job pending, we consider it progress to avoid a
+ * spurious timeout if the timeout handler and the sync update
+ * handler raced.
+ */
+ progress = true;
+ } else if (!list_empty(&done_jobs)) {
+ queue_reset_timeout_locked(queue);
+ progress = true;
}
+ spin_unlock(&queue->fence_ctx.lock);
dma_fence_end_signalling(cookie);
list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
@@ -2947,6 +3067,27 @@ static void group_sync_upd_work(struct work_struct *work)
panthor_job_put(&job->base);
}
+ return progress;
+}
+
+static void group_sync_upd_work(struct work_struct *work)
+{
+ struct panthor_group *group =
+ container_of(work, struct panthor_group, sync_upd_work);
+ u32 queue_idx;
+ bool cookie;
+
+ cookie = dma_fence_begin_signalling();
+ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
+ struct panthor_queue *queue = group->queues[queue_idx];
+
+ if (!queue)
+ continue;
+
+ queue_check_job_completion(queue);
+ }
+ dma_fence_end_signalling(cookie);
+
group_put(group);
}
@@ -3194,17 +3335,6 @@ queue_run_job(struct drm_sched_job *sched_job)
queue->iface.input->insert = job->ringbuf.end;
if (group->csg_id < 0) {
- /* If the queue is blocked, we want to keep the timeout running, so we
- * can detect unbounded waits and kill the group when that happens.
- * Otherwise, we suspend the timeout so the time we spend waiting for
- * a CSG slot is not counted.
- */
- if (!(group->blocked_queues & BIT(job->queue_idx)) &&
- !queue->timeout_suspended) {
- queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
- queue->timeout_suspended = true;
- }
-
group_schedule_locked(group, BIT(job->queue_idx));
} else {
gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
@@ -3213,6 +3343,7 @@ queue_run_job(struct drm_sched_job *sched_job)
pm_runtime_get(ptdev->base.dev);
sched->pm.has_ref = true;
}
+ queue_resume_timeout(queue);
panthor_devfreq_record_busy(sched->ptdev);
}
@@ -3262,7 +3393,6 @@ queue_timedout_job(struct drm_sched_job *sched_job)
mutex_unlock(&sched->lock);
queue_start(queue);
-
return DRM_GPU_SCHED_STAT_RESET;
}
@@ -3305,11 +3435,23 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
}
+static void queue_timeout_work(struct work_struct *work)
+{
+ struct panthor_queue *queue = container_of(work, struct panthor_queue,
+ timeout.work.work);
+ bool progress;
+
+ progress = queue_check_job_completion(queue);
+ if (!progress)
+ drm_sched_fault(&queue->scheduler);
+}
+
static struct panthor_queue *
group_create_queue(struct panthor_group *group,
- const struct drm_panthor_queue_create *args)
+ const struct drm_panthor_queue_create *args,
+ u64 drm_client_id, u32 gid, u32 qid)
{
- const struct drm_sched_init_args sched_args = {
+ struct drm_sched_init_args sched_args = {
.ops = &panthor_queue_sched_ops,
.submit_wq = group->ptdev->scheduler->wq,
.num_rqs = 1,
@@ -3320,9 +3462,8 @@ group_create_queue(struct panthor_group *group,
* their profiling status.
*/
.credit_limit = args->ringbuf_size / sizeof(u64),
- .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .timeout = MAX_SCHEDULE_TIMEOUT,
.timeout_wq = group->ptdev->reset.wq,
- .name = "panthor-queue",
.dev = group->ptdev->base.dev,
};
struct drm_gpu_scheduler *drm_sched;
@@ -3343,6 +3484,8 @@ group_create_queue(struct panthor_group *group,
if (!queue)
return ERR_PTR(-ENOMEM);
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+ INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
queue->fence_ctx.id = dma_fence_context_alloc(1);
spin_lock_init(&queue->fence_ctx.lock);
INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
@@ -3397,12 +3540,23 @@ group_create_queue(struct panthor_group *group,
if (ret)
goto err_free_queue;
+ /* assign a unique name */
+ queue->name = kasprintf(GFP_KERNEL, "panthor-queue-%llu-%u-%u", drm_client_id, gid, qid);
+ if (!queue->name) {
+ ret = -ENOMEM;
+ goto err_free_queue;
+ }
+
+ sched_args.name = queue->name;
+
ret = drm_sched_init(&queue->scheduler, &sched_args);
if (ret)
goto err_free_queue;
drm_sched = &queue->scheduler;
ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
+ if (ret)
+ goto err_free_queue;
return queue;
@@ -3446,7 +3600,8 @@ static void add_group_kbo_sizes(struct panthor_device *ptdev,
int panthor_group_create(struct panthor_file *pfile,
const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args)
+ const struct drm_panthor_queue_create *queue_args,
+ u64 drm_client_id)
{
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_group_pool *gpool = pfile->groups;
@@ -3539,12 +3694,16 @@ int panthor_group_create(struct panthor_file *pfile,
memset(group->syncobjs->kmap, 0,
group_args->queues.count * sizeof(struct panthor_syncobj_64b));
+ ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
+ if (ret)
+ goto err_put_group;
+
for (i = 0; i < group_args->queues.count; i++) {
- group->queues[i] = group_create_queue(group, &queue_args[i]);
+ group->queues[i] = group_create_queue(group, &queue_args[i], drm_client_id, gid, i);
if (IS_ERR(group->queues[i])) {
ret = PTR_ERR(group->queues[i]);
group->queues[i] = NULL;
- goto err_put_group;
+ goto err_erase_gid;
}
group->queue_count++;
@@ -3552,10 +3711,6 @@ int panthor_group_create(struct panthor_file *pfile,
group->idle_queues = GENMASK(group->queue_count - 1, 0);
- ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
- if (ret)
- goto err_put_group;
-
mutex_lock(&sched->reset.lock);
if (atomic_read(&sched->reset.in_progress)) {
panthor_group_stop(group);
@@ -3574,6 +3729,9 @@ int panthor_group_create(struct panthor_file *pfile,
return gid;
+err_erase_gid:
+ xa_erase(&gpool->xa, gid);
+
err_put_group:
group_put(group);
return ret;
@@ -3855,7 +4013,9 @@ void panthor_sched_unplug(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
- cancel_delayed_work_sync(&sched->tick_work);
+ disable_delayed_work_sync(&sched->tick_work);
+ disable_work_sync(&sched->fw_events_work);
+ disable_work_sync(&sched->sync_upd_work);
mutex_lock(&sched->lock);
if (sched->pm.has_ref) {
@@ -3873,8 +4033,6 @@ static void panthor_sched_fini(struct drm_device *ddev, void *res)
if (!sched || !sched->csg_slot_count)
return;
- cancel_delayed_work_sync(&sched->tick_work);
-
if (sched->wq)
destroy_workqueue(sched->wq);
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 742b0b4ff3a3..f4a475aa34c0 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -21,7 +21,8 @@ struct panthor_job;
int panthor_group_create(struct panthor_file *pfile,
const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args);
+ const struct drm_panthor_queue_create *queue_args,
+ u64 drm_client_id);
int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
int panthor_group_get_state(struct panthor_file *pfile,
struct drm_panthor_group_get_state *get_state);