summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/Kconfig3
-rw-r--r--drivers/accel/Makefile3
-rw-r--r--drivers/accel/amdxdna/Kconfig18
-rw-r--r--drivers/accel/amdxdna/Makefile25
-rw-r--r--drivers/accel/amdxdna/TODO1
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c1079
-rw-r--r--drivers/accel/amdxdna/aie2_error.c419
-rw-r--r--drivers/accel/amdxdna/aie2_message.c1074
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h448
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c1187
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h346
-rw-r--r--drivers/accel/amdxdna/aie2_pm.c108
-rw-r--r--drivers/accel/amdxdna/aie2_psp.c146
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c177
-rw-r--r--drivers/accel/amdxdna/aie2_solver.c380
-rw-r--r--drivers/accel/amdxdna/aie2_solver.h155
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c572
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h194
-rw-r--r--drivers/accel/amdxdna/amdxdna_error.h59
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c972
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h90
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c575
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.h124
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.c61
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox_helper.h44
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c364
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h149
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.c94
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.h18
-rw-r--r--drivers/accel/amdxdna/amdxdna_sysfs.c67
-rw-r--r--drivers/accel/amdxdna/amdxdna_ubuf.c232
-rw-r--r--drivers/accel/amdxdna/amdxdna_ubuf.h19
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c122
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c115
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c146
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c115
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c116
-rw-r--r--drivers/accel/drm_accel.c126
-rw-r--r--drivers/accel/ethosu/Kconfig11
-rw-r--r--drivers/accel/ethosu/Makefile4
-rw-r--r--drivers/accel/ethosu/ethosu_device.h197
-rw-r--r--drivers/accel/ethosu/ethosu_drv.c403
-rw-r--r--drivers/accel/ethosu/ethosu_drv.h15
-rw-r--r--drivers/accel/ethosu/ethosu_gem.c704
-rw-r--r--drivers/accel/ethosu/ethosu_gem.h46
-rw-r--r--drivers/accel/ethosu/ethosu_job.c497
-rw-r--r--drivers/accel/ethosu/ethosu_job.h40
-rw-r--r--drivers/accel/habanalabs/Kconfig25
-rw-r--r--drivers/accel/habanalabs/common/Makefile5
-rw-r--r--drivers/accel/habanalabs/common/command_submission.c15
-rw-r--r--drivers/accel/habanalabs/common/context.c3
-rw-r--r--drivers/accel/habanalabs/common/debugfs.c348
-rw-r--r--drivers/accel/habanalabs/common/device.c248
-rw-r--r--drivers/accel/habanalabs/common/firmware_if.c229
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h130
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_drv.c11
-rw-r--r--drivers/accel/habanalabs/common/habanalabs_ioctl.c19
-rw-r--r--drivers/accel/habanalabs/common/hldio.c437
-rw-r--r--drivers/accel/habanalabs/common/hldio.h146
-rw-r--r--drivers/accel/habanalabs/common/hwmon.c60
-rw-r--r--drivers/accel/habanalabs/common/irq.c33
-rw-r--r--drivers/accel/habanalabs/common/memory.c34
-rw-r--r--drivers/accel/habanalabs/common/memory_mgr.c42
-rw-r--r--drivers/accel/habanalabs/common/mmu/mmu.c14
-rw-r--r--drivers/accel/habanalabs/common/pci/pci.c4
-rw-r--r--drivers/accel/habanalabs/common/sysfs.c25
-rw-r--r--drivers/accel/habanalabs/gaudi/gaudi.c31
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c476
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2P.h17
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c2
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2_security.c1
-rw-r--r--drivers/accel/habanalabs/goya/goya.c13
-rw-r--r--drivers/accel/habanalabs/include/gaudi2/gaudi2.h4
-rw-r--r--drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h244
-rw-r--r--drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h27
-rw-r--r--drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h8
-rw-r--r--drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h3
-rw-r--r--drivers/accel/ivpu/Kconfig10
-rw-r--r--drivers/accel/ivpu/Makefile18
-rw-r--r--drivers/accel/ivpu/ivpu_coredump.c39
-rw-r--r--drivers/accel/ivpu/ivpu_coredump.h25
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c201
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c198
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h112
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c307
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h28
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.c113
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.h17
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c256
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h32
-rw-r--r--drivers/accel/ivpu/ivpu_gem_userptr.c213
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c420
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h207
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c1065
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx_reg.h72
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c1250
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx_reg.h96
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c905
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.h50
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h112
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h83
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c1199
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.h36
-rw-r--r--drivers/accel/ivpu/ivpu_hw_reg_io.h64
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c66
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.h17
-rw-r--r--drivers/accel/ivpu/ivpu_job.c876
-rw-r--r--drivers/accel/ivpu/ivpu_job.h63
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c331
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.h24
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c236
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.h6
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c252
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h15
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c342
-rw-r--r--drivers/accel/ivpu/ivpu_ms.h36
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c270
-rw-r--r--drivers/accel/ivpu/ivpu_pm.h12
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c159
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.h13
-rw-r--r--drivers/accel/ivpu/ivpu_trace.h73
-rw-r--r--drivers/accel/ivpu/ivpu_trace_points.c9
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h70
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h883
-rw-r--r--drivers/accel/qaic/Kconfig2
-rw-r--r--drivers/accel/qaic/Makefile3
-rw-r--r--drivers/accel/qaic/mhi_controller.c392
-rw-r--r--drivers/accel/qaic/mhi_controller.h2
-rw-r--r--drivers/accel/qaic/qaic.h66
-rw-r--r--drivers/accel/qaic/qaic_control.c29
-rw-r--r--drivers/accel/qaic/qaic_data.c200
-rw-r--r--drivers/accel/qaic/qaic_debugfs.c50
-rw-r--r--drivers/accel/qaic/qaic_drv.c231
-rw-r--r--drivers/accel/qaic/qaic_ras.c642
-rw-r--r--drivers/accel/qaic/qaic_ras.h10
-rw-r--r--drivers/accel/qaic/qaic_ssr.c815
-rw-r--r--drivers/accel/qaic/qaic_ssr.h17
-rw-r--r--drivers/accel/qaic/qaic_sysfs.c109
-rw-r--r--drivers/accel/qaic/qaic_timesync.c15
-rw-r--r--drivers/accel/qaic/qaic_timesync.h3
-rw-r--r--drivers/accel/qaic/sahara.c560
-rw-r--r--drivers/accel/rocket/Kconfig24
-rw-r--r--drivers/accel/rocket/Makefile10
-rw-r--r--drivers/accel/rocket/rocket_core.c110
-rw-r--r--drivers/accel/rocket/rocket_core.h64
-rw-r--r--drivers/accel/rocket/rocket_device.c60
-rw-r--r--drivers/accel/rocket/rocket_device.h30
-rw-r--r--drivers/accel/rocket/rocket_drv.c290
-rw-r--r--drivers/accel/rocket/rocket_drv.h32
-rw-r--r--drivers/accel/rocket/rocket_gem.c182
-rw-r--r--drivers/accel/rocket/rocket_gem.h34
-rw-r--r--drivers/accel/rocket/rocket_job.c637
-rw-r--r--drivers/accel/rocket/rocket_job.h52
-rw-r--r--drivers/accel/rocket/rocket_registers.h4404
154 files changed, 29928 insertions, 4542 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 64065fb8922b..bdf48ccafcf2 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -24,8 +24,11 @@ menuconfig DRM_ACCEL
different device files, called accel/accel* (in /dev, sysfs
and debugfs).
+source "drivers/accel/amdxdna/Kconfig"
+source "drivers/accel/ethosu/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
+source "drivers/accel/rocket/Kconfig"
endif
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index ab3df932937f..1d3a7251b950 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
+obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) += ethosu/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
+obj-$(CONFIG_DRM_ACCEL_ROCKET) += rocket/ \ No newline at end of file
diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig
new file mode 100644
index 000000000000..f39d7a87296c
--- /dev/null
+++ b/drivers/accel/amdxdna/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_AMDXDNA
+ tristate "AMD AI Engine"
+ depends on AMD_IOMMU
+ depends on DRM_ACCEL
+ depends on PCI && HAS_IOMEM
+ depends on X86_64
+ select DRM_SCHED
+ select DRM_GEM_SHMEM_HELPER
+ select FW_LOADER
+ select HMM_MIRROR
+ help
+ Choose this option to enable support for NPU integrated into AMD
+ client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
+ accelerate machine learning applications.
+
+ If "M" is selected, the driver module will be amdxdna.
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
new file mode 100644
index 000000000000..6344aaf523fa
--- /dev/null
+++ b/drivers/accel/amdxdna/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+amdxdna-y := \
+ aie2_ctx.o \
+ aie2_error.o \
+ aie2_message.o \
+ aie2_pci.o \
+ aie2_pm.o \
+ aie2_psp.o \
+ aie2_smu.o \
+ aie2_solver.o \
+ amdxdna_ctx.o \
+ amdxdna_gem.o \
+ amdxdna_mailbox.o \
+ amdxdna_mailbox_helper.o \
+ amdxdna_pci_drv.o \
+ amdxdna_pm.o \
+ amdxdna_sysfs.o \
+ amdxdna_ubuf.o \
+ npu1_regs.o \
+ npu2_regs.o \
+ npu4_regs.o \
+ npu5_regs.o \
+ npu6_regs.o
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
new file mode 100644
index 000000000000..0e4bbebeaedf
--- /dev/null
+++ b/drivers/accel/amdxdna/TODO
@@ -0,0 +1 @@
+- Add debugfs support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
new file mode 100644
index 000000000000..42d876a427c5
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -0,0 +1,1079 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
+
+static bool force_cmdlist;
+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
+
+static void aie2_job_release(struct kref *ref)
+{
+ struct amdxdna_sched_job *job;
+
+ job = container_of(ref, struct amdxdna_sched_job, refcnt);
+ amdxdna_sched_job_cleanup(job);
+ atomic64_inc(&job->hwctx->job_free_cnt);
+ wake_up(&job->hwctx->priv->job_free_wq);
+ if (job->out_fence)
+ dma_fence_put(job->out_fence);
+ kfree(job);
+}
+
+static void aie2_job_put(struct amdxdna_sched_job *job)
+{
+ kref_put(&job->refcnt, aie2_job_release);
+}
+
+static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
+{
+ hwctx->old_status = hwctx->status;
+ hwctx->status = HWCTX_STAT_STOP;
+}
+
+static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
+{
+ hwctx->status = hwctx->old_status;
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
+ struct drm_sched_job *bad_job)
+{
+ drm_sched_stop(&hwctx->priv->sched, bad_job);
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+ int ret;
+
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+ goto out;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+ goto out;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+ goto out;
+ }
+
+ ret = aie2_config_cu(hwctx, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+ goto out;
+ }
+
+out:
+ drm_sched_start(&hwctx->priv->sched, 0);
+ XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+ return ret;
+}
+
+static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *fence, *out_fence = NULL;
+ int ret;
+
+ fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
+ if (!fence)
+ return NULL;
+
+ ret = dma_fence_chain_find_seqno(&fence, seq);
+ if (ret)
+ goto out;
+
+ out_fence = dma_fence_get(dma_fence_chain_contained(fence));
+
+out:
+ dma_fence_put(fence);
+ return out_fence;
+}
+
+static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+ struct dma_fence *fence;
+
+ fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
+ if (!fence)
+ return;
+
+ /* Wait up to 2 seconds for fw to finish all pending requests */
+ dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
+ dma_fence_put(fence);
+}
+
+static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ aie2_hwctx_wait_for_idle(hwctx);
+ aie2_hwctx_stop(xdna, hwctx, NULL);
+ aie2_hwctx_status_shift_stop(hwctx);
+
+ return 0;
+}
+
+void aie2_hwctx_suspend(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+
+ /*
+ * Command timeout is unlikely. But if it happens, it doesn't
+ * break the system. aie2_hwctx_stop() will destroy mailbox
+ * and abort all commands.
+ */
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
+}
+
+static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ aie2_hwctx_status_restore(hwctx);
+ return aie2_hwctx_restart(xdna, hwctx);
+}
+
+int aie2_hwctx_resume(struct amdxdna_client *client)
+{
+ /*
+ * The resume path cannot guarantee that mailbox channel can be
+ * regenerated. If this happen, when submit message to this
+ * mailbox channel, error will return.
+ */
+ return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+ struct dma_fence *fence = job->fence;
+
+ trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+
+ amdxdna_pm_suspend_put(job->hwctx->client->xdna);
+ job->hwctx->priv->completed++;
+ dma_fence_signal(fence);
+
+ up(&job->hwctx->priv->job_sem);
+ job->job_done = true;
+ mmput_async(job->mm);
+ aie2_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ int ret = 0;
+ u32 status;
+
+ cmd_abo = job->cmd_bo;
+
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(!data) || unlikely(size != sizeof(u32))) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = readl(data);
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ if (status == AIE2_STATUS_SUCCESS)
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ else
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ int ret = 0;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ job->drv_cmd->result = readl(data);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ struct amdxdna_dev *xdna;
+ u32 fail_cmd_status;
+ u32 fail_cmd_idx;
+ u32 cmd_status;
+ int ret = 0;
+
+ cmd_abo = job->cmd_bo;
+
+ if (unlikely(job->job_timeout)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
+ xdna = job->hwctx->client->xdna;
+ XDNA_DBG(xdna, "Status 0x%x", cmd_status);
+ if (cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ goto out;
+ }
+
+ /* Slow path to handle error, read from ringbuf on BAR */
+ fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
+ fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
+ XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+ fail_cmd_idx, fail_cmd_status);
+
+ if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+ amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+ struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
+
+ cc->error_index = fail_cmd_idx;
+ if (cc->error_index >= cc->command_count)
+ cc->error_index = 0;
+ }
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct dma_fence *fence;
+ int ret;
+
+ if (!mmget_not_zero(job->mm))
+ return ERR_PTR(-ESRCH);
+
+ kref_get(&job->refcnt);
+ fence = dma_fence_get(job->fence);
+
+ if (job->drv_cmd) {
+ switch (job->drv_cmd->opcode) {
+ case SYNC_DEBUG_BO:
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ case ATTACH_DEBUG_BO:
+ ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ goto out;
+ }
+
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+ ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else if (force_cmdlist)
+ ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else
+ ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+ if (ret) {
+ dma_fence_put(job->fence);
+ aie2_job_put(job);
+ mmput(job->mm);
+ fence = ERR_PTR(ret);
+ }
+ trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+ return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+
+ trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ if (!job->job_done)
+ up(&hwctx->priv->job_sem);
+
+ drm_sched_job_cleanup(sched_job);
+ aie2_job_put(job);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct amdxdna_dev *xdna;
+
+ xdna = hwctx->client->xdna;
+ trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ job->job_timeout = true;
+ mutex_lock(&xdna->dev_lock);
+ aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+ aie2_hwctx_restart(xdna, hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ return DRM_GPU_SCHED_STAT_RESET;
+}
+
+static const struct drm_sched_backend_ops sched_ops = {
+ .run_job = aie2_sched_job_run,
+ .free_job = aie2_sched_job_free,
+ .timedout_job = aie2_sched_job_timedout,
+};
+
+static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int start, end, first, last;
+ u32 width = 1, entries = 0;
+ int i;
+
+ if (!hwctx->num_tiles) {
+ XDNA_ERR(xdna, "Number of tiles is zero");
+ return -EINVAL;
+ }
+
+ ndev = xdna->dev_handle;
+ if (unlikely(!ndev->metadata.core.row_count)) {
+ XDNA_WARN(xdna, "Core tile row count is zero");
+ return -EINVAL;
+ }
+
+ hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
+ if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
+ XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
+ return -EINVAL;
+ }
+
+ if (ndev->priv->col_align == COL_ALIGN_NATURE)
+ width = hwctx->num_col;
+
+ /*
+ * In range [start, end], find out columns that is multiple of width.
+ * 'first' is the first column,
+ * 'last' is the last column,
+ * 'entries' is the total number of columns.
+ */
+ start = xdna->dev_info->first_col;
+ end = ndev->total_col - hwctx->num_col;
+ if (start > 0 && end == 0) {
+ XDNA_DBG(xdna, "Force start from col 0");
+ start = 0;
+ }
+ first = start + (width - start % width) % width;
+ last = end - end % width;
+ if (last >= first)
+ entries = (last - first) / width + 1;
+ XDNA_DBG(xdna, "start %d end %d first %d last %d",
+ start, end, first, last);
+
+ if (unlikely(!entries)) {
+ XDNA_ERR(xdna, "Start %d end %d width %d",
+ start, end, width);
+ return -EINVAL;
+ }
+
+ hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
+ if (!hwctx->col_list)
+ return -ENOMEM;
+
+ hwctx->col_list_len = entries;
+ hwctx->col_list[0] = first;
+ for (i = 1; i < entries; i++)
+ hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
+
+ print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
+ entries * sizeof(*hwctx->col_list), false);
+ return 0;
+}
+
+static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct alloc_requests *xrs_req;
+ int ret;
+
+ xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
+ if (!xrs_req)
+ return -ENOMEM;
+
+ xrs_req->cdo.start_cols = hwctx->col_list;
+ xrs_req->cdo.cols_len = hwctx->col_list_len;
+ xrs_req->cdo.ncols = hwctx->num_col;
+ xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
+
+ xrs_req->rqos.gops = hwctx->qos.gops;
+ xrs_req->rqos.fps = hwctx->qos.fps;
+ xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
+ xrs_req->rqos.latency = hwctx->qos.latency;
+ xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
+ xrs_req->rqos.priority = hwctx->qos.priority;
+
+ xrs_req->rid = (uintptr_t)hwctx;
+
+ ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
+
+ kfree(xrs_req);
+ return ret;
+}
+
+static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ int ret;
+
+ ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
+}
+
+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct drm_file *filp = hwctx->client->filp;
+ struct drm_syncobj *syncobj;
+ u32 hdl;
+ int ret;
+
+ hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+ ret = drm_syncobj_create(&syncobj, 0, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+ return ret;
+ }
+ ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+ if (ret) {
+ drm_syncobj_put(syncobj);
+ XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+ return ret;
+ }
+ hwctx->priv->syncobj = syncobj;
+ hwctx->syncobj_hdl = hdl;
+
+ return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+ /*
+ * The syncobj_hdl is owned by user space and will be cleaned up
+ * separately.
+ */
+ drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ const struct drm_sched_init_args args = {
+ .ops = &sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .credit_limit = HWCTX_MAX_CMDS,
+ .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+ .name = "amdxdna_js",
+ .dev = xdna->ddev.dev,
+ };
+ struct drm_gpu_scheduler *sched;
+ struct amdxdna_hwctx_priv *priv;
+ struct amdxdna_gem_obj *heap;
+ int i, ret;
+
+ priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ hwctx->priv = priv;
+
+ mutex_lock(&client->mm_lock);
+ heap = client->dev_heap;
+ if (!heap) {
+ XDNA_ERR(xdna, "The client dev heap object not exist");
+ mutex_unlock(&client->mm_lock);
+ ret = -ENOENT;
+ goto free_priv;
+ }
+ drm_gem_object_get(to_gobj(heap));
+ mutex_unlock(&client->mm_lock);
+ priv->heap = heap;
+ sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
+
+ ret = amdxdna_gem_pin(heap);
+ if (ret) {
+ XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
+ goto put_heap;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ struct amdxdna_gem_obj *abo;
+ struct amdxdna_drm_create_bo args = {
+ .flags = 0,
+ .type = AMDXDNA_BO_DEV,
+ .vaddr = 0,
+ .size = MAX_CHAIN_CMDBUF_SIZE,
+ };
+
+ abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto free_cmd_bufs;
+ }
+
+ XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+ i, abo->mem.dev_addr, abo->mem.size);
+ priv->cmd_buf[i] = abo;
+ }
+
+ sched = &priv->sched;
+ mutex_init(&priv->io_lock);
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&priv->io_lock);
+ fs_reclaim_release(GFP_KERNEL);
+
+ ret = drm_sched_init(sched, &args);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+ goto free_cmd_bufs;
+ }
+
+ ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+ goto free_sched;
+ }
+
+ ret = aie2_hwctx_col_list(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
+ goto free_entity;
+ }
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_col_list;
+
+ ret = aie2_alloc_resource(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
+ goto suspend_put;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
+ goto release_resource;
+ }
+
+ ret = aie2_ctx_syncobj_create(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+ goto release_resource;
+ }
+ amdxdna_pm_suspend_put(xdna);
+
+ hwctx->status = HWCTX_STAT_INIT;
+ init_waitqueue_head(&priv->job_free_wq);
+
+ XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+
+ return 0;
+
+release_resource:
+ aie2_release_resource(hwctx);
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
+free_col_list:
+ kfree(hwctx->col_list);
+free_entity:
+ drm_sched_entity_destroy(&priv->entity);
+free_sched:
+ drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ if (!priv->cmd_buf[i])
+ continue;
+ drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+ }
+ amdxdna_gem_unpin(heap);
+put_heap:
+ drm_gem_object_put(to_gobj(heap));
+free_priv:
+ kfree(priv);
+ return ret;
+}
+
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna;
+ int idx;
+
+ xdna = hwctx->client->xdna;
+
+ XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
+ aie2_hwctx_wait_for_idle(hwctx);
+
+ /* Request fw to destroy hwctx and cancel the rest pending requests */
+ aie2_release_resource(hwctx);
+
+ mutex_unlock(&xdna->dev_lock);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+
+ /* Wait for all submitted jobs to be completed or canceled */
+ wait_event(hwctx->priv->job_free_wq,
+ atomic64_read(&hwctx->job_submit_cnt) ==
+ atomic64_read(&hwctx->job_free_cnt));
+ mutex_lock(&xdna->dev_lock);
+
+ drm_sched_fini(&hwctx->priv->sched);
+ aie2_ctx_syncobj_destroy(hwctx);
+
+ for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+ drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
+ amdxdna_gem_unpin(hwctx->priv->heap);
+ drm_gem_object_put(to_gobj(hwctx->priv->heap));
+
+ mutex_destroy(&hwctx->priv->io_lock);
+ kfree(hwctx->col_list);
+ kfree(hwctx->priv);
+ kfree(hwctx->cus);
+}
+
+static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_hwctx *hwctx = handle;
+
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
+ return 0;
+}
+
+static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
+{
+ struct amdxdna_hwctx_param_config_cu *config = buf;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 total_size;
+ int ret;
+
+ XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
+ if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
+ return -EINVAL;
+
+ if (hwctx->status != HWCTX_STAT_INIT) {
+ XDNA_ERR(xdna, "Not support re-config CU");
+ return -EINVAL;
+ }
+
+ if (!config->num_cus) {
+ XDNA_ERR(xdna, "Number of CU is zero");
+ return -EINVAL;
+ }
+
+ total_size = struct_size(config, cu_configs, config->num_cus);
+ if (total_size > size) {
+ XDNA_ERR(xdna, "CU config larger than size");
+ return -EINVAL;
+ }
+
+ hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
+ if (!hwctx->cus)
+ return -ENOMEM;
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_cus;
+
+ ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
+ if (ret) {
+ XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
+ goto pm_suspend_put;
+ }
+
+ wmb(); /* To avoid locking in command submit when check status */
+ hwctx->status = HWCTX_STAT_READY;
+
+ return 0;
+
+pm_suspend_put:
+ amdxdna_pm_suspend_put(xdna);
+free_cus:
+ kfree(hwctx->cus);
+ hwctx->cus = NULL;
+ return ret;
+}
+
+static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
+
+ if (!out_fence) {
+ XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
+ return;
+ }
+
+ dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(out_fence);
+}
+
+static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
+ bool attach)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ struct amdxdna_gem_obj *abo;
+ u64 seq;
+ int ret;
+
+ abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
+ if (!abo) {
+ XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
+ return -EINVAL;
+ }
+
+ if (attach) {
+ if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
+ ret = -EBUSY;
+ goto put_obj;
+ }
+ cmd.opcode = ATTACH_DEBUG_BO;
+ } else {
+ if (abo->assigned_hwctx != hwctx->id) {
+ ret = -EINVAL;
+ goto put_obj;
+ }
+ cmd.opcode = DETACH_DEBUG_BO;
+ }
+
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ goto put_obj;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ goto put_obj;
+ }
+
+ if (attach)
+ abo->assigned_hwctx = hwctx->id;
+ else
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+
+ XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
+
+put_obj:
+ amdxdna_gem_put_obj(abo);
+ return ret;
+}
+
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ switch (type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ return aie2_hwctx_cu_config(hwctx, buf, size);
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
+ default:
+ XDNA_DBG(xdna, "Not supported type %d", type);
+ return -EOPNOTSUPP;
+ }
+}
+
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_drv_cmd cmd = { 0 };
+ u64 seq;
+ int ret;
+
+ cmd.opcode = SYNC_DEBUG_BO;
+ ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+ &debug_bo_hdl, 1, hwctx->id, &seq);
+ if (ret) {
+ XDNA_ERR(xdna, "Submit command failed");
+ return ret;
+ }
+
+ aie2_cmd_wait(hwctx, seq);
+ if (cmd.result) {
+ XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct amdxdna_umap *mapp;
+ unsigned long timeout;
+ struct mm_struct *mm;
+ bool found;
+ int ret;
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+ found = false;
+ down_write(&xdna->notifier_lock);
+ list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+ if (mapp->invalid) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ abo->mem.map_invalid = false;
+ up_write(&xdna->notifier_lock);
+ return 0;
+ }
+ kref_get(&mapp->refcnt);
+ up_write(&xdna->notifier_lock);
+
+ XDNA_DBG(xdna, "populate memory range %lx %lx",
+ mapp->vma->vm_start, mapp->vma->vm_end);
+ mm = mapp->notifier.mm;
+ if (!mmget_not_zero(mm)) {
+ amdxdna_umap_put(mapp);
+ return -EFAULT;
+ }
+
+ mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
+ mmap_read_lock(mm);
+ ret = hmm_range_fault(&mapp->range);
+ mmap_read_unlock(mm);
+ if (ret) {
+ if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto put_mm;
+ }
+
+ if (ret == -EBUSY) {
+ amdxdna_umap_put(mapp);
+ goto again;
+ }
+
+ goto put_mm;
+ }
+
+ down_write(&xdna->notifier_lock);
+ if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
+ up_write(&xdna->notifier_lock);
+ amdxdna_umap_put(mapp);
+ goto again;
+ }
+ mapp->invalid = false;
+ up_write(&xdna->notifier_lock);
+ amdxdna_umap_put(mapp);
+ goto again;
+
+put_mm:
+ amdxdna_umap_put(mapp);
+ mmput(mm);
+ return ret;
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct ww_acquire_ctx acquire_ctx;
+ struct dma_fence_chain *chain;
+ struct amdxdna_gem_obj *abo;
+ unsigned long timeout = 0;
+ int ret, i;
+
+ ret = down_interruptible(&hwctx->priv->job_sem);
+ if (ret) {
+ XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
+ return ret;
+ }
+
+ chain = dma_fence_chain_alloc();
+ if (!chain) {
+ XDNA_ERR(xdna, "Alloc fence chain failed");
+ ret = -ENOMEM;
+ goto up_sem;
+ }
+
+ ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
+ hwctx->client->filp->client_id);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+ goto free_chain;
+ }
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto cleanup_job;
+
+retry:
+ ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
+ goto suspend_put;
+ }
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ goto suspend_put;
+ }
+ }
+
+ down_read(&xdna->notifier_lock);
+ for (i = 0; i < job->bo_cnt; i++) {
+ abo = to_xdna_obj(job->bos[i]);
+ if (abo->mem.map_invalid) {
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (!timeout) {
+ timeout = jiffies +
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ } else if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto suspend_put;
+ }
+
+ ret = aie2_populate_range(abo);
+ if (ret)
+ goto suspend_put;
+ goto retry;
+ }
+ }
+
+ mutex_lock(&hwctx->priv->io_lock);
+ drm_sched_job_arm(&job->base);
+ job->out_fence = dma_fence_get(&job->base.s_fence->finished);
+ for (i = 0; i < job->bo_cnt; i++)
+ dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
+ job->seq = hwctx->priv->seq++;
+ kref_get(&job->refcnt);
+ drm_sched_entity_push_job(&job->base);
+
+ *seq = job->seq;
+ drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
+ mutex_unlock(&hwctx->priv->io_lock);
+
+ up_read(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+
+ aie2_job_put(job);
+ atomic64_inc(&hwctx->job_submit_cnt);
+
+ return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
+cleanup_job:
+ drm_sched_job_cleanup(&job->base);
+free_chain:
+ dma_fence_chain_free(chain);
+up_sem:
+ up(&hwctx->priv->job_sem);
+ job->job_done = true;
+ return ret;
+}
+
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
+ unsigned long cur_seq)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct drm_gem_object *gobj = to_gobj(abo);
+ long ret;
+
+ ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (!ret || ret == -ERESTARTSYS)
+ XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
new file mode 100644
index 000000000000..d452008ec4f4
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_error.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+struct async_event {
+ struct amdxdna_dev_hdl *ndev;
+ struct async_event_msg_resp resp;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+};
+
+struct async_events {
+ struct workqueue_struct *wq;
+ u8 *buf;
+ dma_addr_t addr;
+ u32 size;
+ u32 event_cnt;
+ struct async_event event[] __counted_by(event_cnt);
+};
+
+/*
+ * Below enum, struct and lookup tables are porting from XAIE util header file.
+ *
+ * Below data is defined by AIE device and it is used for decode error message
+ * from the device.
+ */
+
+enum aie_module_type {
+ AIE_MEM_MOD = 0,
+ AIE_CORE_MOD,
+ AIE_PL_MOD,
+ AIE_UNKNOWN_MOD,
+};
+
+enum aie_error_category {
+ AIE_ERROR_SATURATION = 0,
+ AIE_ERROR_FP,
+ AIE_ERROR_STREAM,
+ AIE_ERROR_ACCESS,
+ AIE_ERROR_BUS,
+ AIE_ERROR_INSTRUCTION,
+ AIE_ERROR_ECC,
+ AIE_ERROR_LOCK,
+ AIE_ERROR_DMA,
+ AIE_ERROR_MEM_PARITY,
+ /* Unknown is not from XAIE, added for better category */
+ AIE_ERROR_UNKNOWN,
+};
+
+/* Don't pack, unless XAIE side changed */
+struct aie_error {
+ __u8 row;
+ __u8 col;
+ __u32 mod_type;
+ __u8 event_id;
+};
+
+struct aie_err_info {
+ u32 err_cnt;
+ u32 ret_code;
+ u32 rsvd;
+ struct aie_error payload[] __counted_by(err_cnt);
+};
+
+struct aie_event_category {
+ u8 event_id;
+ enum aie_error_category category;
+};
+
+#define EVENT_CATEGORY(id, cat) { id, cat }
+static const struct aie_event_category aie_ml_mem_event_cat[] = {
+ EVENT_CATEGORY(88U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(90U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY),
+ EVENT_CATEGORY(97U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(98U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(99U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(100U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(101U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_core_event_cat[] = {
+ EVENT_CATEGORY(55U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(56U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(57U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(58U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(60U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(62U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(64U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(65U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(66U, AIE_ERROR_ACCESS),
+ EVENT_CATEGORY(67U, AIE_ERROR_LOCK),
+ EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION),
+ EVENT_CATEGORY(71U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(72U, AIE_ERROR_BUS),
+};
+
+static const struct aie_event_category aie_ml_mem_tile_event_cat[] = {
+ EVENT_CATEGORY(130U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(132U, AIE_ERROR_ECC),
+ EVENT_CATEGORY(133U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(134U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(135U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(136U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(137U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(138U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(139U, AIE_ERROR_LOCK),
+};
+
+static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
+ EVENT_CATEGORY(64U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(65U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(66U, AIE_ERROR_STREAM),
+ EVENT_CATEGORY(67U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(68U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(69U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(70U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(71U, AIE_ERROR_BUS),
+ EVENT_CATEGORY(72U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(73U, AIE_ERROR_DMA),
+ EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
+};
+
+static const enum amdxdna_error_num aie_cat_err_num_map[] = {
+ [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION,
+ [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP,
+ [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM,
+ [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS,
+ [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC,
+ [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK,
+ [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA,
+ [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1);
+
+static const enum amdxdna_error_module aie_err_mod_map[] = {
+ [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE,
+ [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL,
+ [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN,
+};
+
+static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1);
+
+static enum aie_error_category
+aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
+{
+ const struct aie_event_category *lut;
+ int num_entry;
+ int i;
+
+ switch (mod_type) {
+ case AIE_PL_MOD:
+ lut = aie_ml_shim_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat);
+ break;
+ case AIE_CORE_MOD:
+ lut = aie_ml_core_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_core_event_cat);
+ break;
+ case AIE_MEM_MOD:
+ if (row == 1) {
+ lut = aie_ml_mem_tile_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat);
+ } else {
+ lut = aie_ml_mem_event_cat;
+ num_entry = ARRAY_SIZE(aie_ml_mem_event_cat);
+ }
+ break;
+ default:
+ return AIE_ERROR_UNKNOWN;
+ }
+
+ for (i = 0; i < num_entry; i++) {
+ if (event_id != lut[i].event_id)
+ continue;
+
+ if (lut[i].category > AIE_ERROR_UNKNOWN)
+ return AIE_ERROR_UNKNOWN;
+
+ return lut[i].category;
+ }
+
+ return AIE_ERROR_UNKNOWN;
+}
+
+static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ enum amdxdna_error_module err_mod;
+ enum aie_error_category aie_err;
+ enum amdxdna_error_num err_num;
+ struct aie_error *last_err;
+
+ last_err = &errs[num_err - 1];
+ if (last_err->mod_type >= AIE_UNKNOWN_MOD) {
+ err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN];
+ err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD];
+ } else {
+ aie_err = aie_get_error_category(last_err->row,
+ last_err->event_id,
+ last_err->mod_type);
+ err_num = aie_cat_err_num_map[aie_err];
+ err_mod = aie_err_mod_map[last_err->mod_type];
+ }
+
+ ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod);
+ ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real());
+ ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col);
+}
+
+static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
+{
+ struct aie_error *errs = err_info;
+ u32 err_col = 0; /* assume that AIE has less than 32 columns */
+ int i;
+
+ /* Get err column bitmap */
+ for (i = 0; i < num_err; i++) {
+ struct aie_error *err = &errs[i];
+ enum aie_error_category cat;
+
+ cat = aie_get_error_category(err->row, err->event_id, err->mod_type);
+ XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d",
+ err->row, err->col, err->mod_type,
+ err->event_id, cat);
+
+ if (err->col >= 32) {
+ XDNA_WARN(ndev->xdna, "Invalid column number");
+ break;
+ }
+
+ err_col |= (1 << err->col);
+ }
+
+ return err_col;
+}
+
+static int aie2_error_async_cb(void *handle, void __iomem *data, size_t size)
+{
+ struct async_event *e = handle;
+
+ if (data) {
+ e->resp.type = readl(data + offsetof(struct async_event_msg_resp, type));
+ wmb(); /* Update status in the end, so that no lock for here */
+ e->resp.status = readl(data + offsetof(struct async_event_msg_resp, status));
+ }
+ queue_work(e->wq, &e->work);
+ return 0;
+}
+
+static int aie2_error_event_send(struct async_event *e)
+{
+ drm_clflush_virt_range(e->buf, e->size); /* device can access */
+ return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e,
+ aie2_error_async_cb);
+}
+
+static void aie2_error_worker(struct work_struct *err_work)
+{
+ struct aie_err_info *info;
+ struct amdxdna_dev *xdna;
+ struct async_event *e;
+ u32 max_err;
+ u32 err_col;
+
+ e = container_of(err_work, struct async_event, work);
+
+ xdna = e->ndev->xdna;
+
+ if (e->resp.status == MAX_AIE2_STATUS_CODE)
+ return;
+
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+
+ print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4,
+ e->buf, 0x100, false);
+
+ info = (struct aie_err_info *)e->buf;
+ XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code);
+
+ max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error);
+ if (unlikely(info->err_cnt > max_err)) {
+ WARN_ONCE(1, "Error count too large %d\n", info->err_cnt);
+ return;
+ }
+ err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt);
+ if (!err_col) {
+ XDNA_WARN(xdna, "Did not get error column");
+ return;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt);
+
+ /* Re-sent this event to firmware */
+ if (aie2_error_event_send(e))
+ XDNA_WARN(xdna, "Unable to register async event");
+ mutex_unlock(&xdna->dev_lock);
+}
+
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct async_events *events;
+
+ events = ndev->async_events;
+
+ mutex_unlock(&xdna->dev_lock);
+ destroy_workqueue(events->wq);
+ mutex_lock(&xdna->dev_lock);
+
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+ kfree(events);
+}
+
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 total_col = ndev->total_col;
+ u32 total_size = ASYNC_BUF_SIZE * total_col;
+ struct async_events *events;
+ int i, ret;
+
+ events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL);
+ if (!events)
+ return -ENOMEM;
+
+ events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!events->buf) {
+ ret = -ENOMEM;
+ goto free_events;
+ }
+ events->size = total_size;
+ events->event_cnt = total_col;
+
+ events->wq = alloc_ordered_workqueue("async_wq", 0);
+ if (!events->wq) {
+ ret = -ENOMEM;
+ goto free_buf;
+ }
+
+ for (i = 0; i < events->event_cnt; i++) {
+ struct async_event *e = &events->event[i];
+ u32 offset = i * ASYNC_BUF_SIZE;
+
+ e->ndev = ndev;
+ e->wq = events->wq;
+ e->buf = &events->buf[offset];
+ e->addr = events->addr + offset;
+ e->size = ASYNC_BUF_SIZE;
+ e->resp.status = MAX_AIE2_STATUS_CODE;
+ INIT_WORK(&e->work, aie2_error_worker);
+
+ ret = aie2_error_event_send(e);
+ if (ret)
+ goto free_wq;
+ }
+
+ ndev->async_events = events;
+
+ XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x",
+ events->event_cnt, events->size);
+ return 0;
+
+free_wq:
+ destroy_workqueue(events->wq);
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
+ events->addr, DMA_FROM_DEVICE);
+free_events:
+ kfree(events);
+ return ret;
+}
+
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ args->num_element = 1;
+ args->element_size = sizeof(ndev->last_async_err);
+ if (copy_to_user(u64_to_user_ptr(args->buffer),
+ &ndev->last_async_err, args->element_size))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
new file mode 100644
index 000000000000..d493bb1c3360
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -0,0 +1,1074 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+#define DECLARE_AIE2_MSG(name, op) \
+ DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+
+#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
+
+static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
+ struct xdna_mailbox_msg *msg)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ if (!ndev->mgmt_chann)
+ return -ENODEV;
+
+ drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
+ ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
+ if (ret == -ETIME) {
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ }
+
+ if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
+ XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
+ msg->opcode, *hdl->data);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
+{
+ DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ req.value = value;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
+{
+ DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
+ int ret;
+
+ req.type = type;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
+ return ret;
+ }
+
+ *value = resp.value;
+ return 0;
+}
+
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
+{
+ DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
+
+ req.pasid = pasid;
+
+ return aie2_send_mgmt_msg_wait(ndev, &msg);
+}
+
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
+{
+ DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
+ resp.major, resp.minor);
+
+ version->major = resp.major;
+ version->minor = resp.minor;
+
+ return 0;
+}
+
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
+{
+ DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ metadata->size = resp.info.size;
+ metadata->cols = resp.info.cols;
+ metadata->rows = resp.info.rows;
+
+ metadata->version.major = resp.info.major;
+ metadata->version.minor = resp.info.minor;
+
+ metadata->core.row_count = resp.info.core_rows;
+ metadata->core.row_start = resp.info.core_row_start;
+ metadata->core.dma_channel_count = resp.info.core_dma_channels;
+ metadata->core.lock_count = resp.info.core_locks;
+ metadata->core.event_reg_count = resp.info.core_events;
+
+ metadata->mem.row_count = resp.info.mem_rows;
+ metadata->mem.row_start = resp.info.mem_row_start;
+ metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
+ metadata->mem.lock_count = resp.info.mem_locks;
+ metadata->mem.event_reg_count = resp.info.mem_events;
+
+ metadata->shim.row_count = resp.info.shim_rows;
+ metadata->shim.row_start = resp.info.shim_row_start;
+ metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
+ metadata->shim.lock_count = resp.info.shim_locks;
+ metadata->shim.event_reg_count = resp.info.shim_events;
+
+ return 0;
+}
+
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver)
+{
+ DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
+ int ret;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ fw_ver->major = resp.major;
+ fw_ver->minor = resp.minor;
+ fw_ver->sub = resp.sub;
+ fw_ver->build = resp.build;
+
+ return 0;
+}
+
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct xdna_mailbox_chann_res x2i;
+ struct xdna_mailbox_chann_res i2x;
+ struct cq_pair *cq_pair;
+ u32 intr_reg;
+ int ret;
+
+ req.aie_type = 1;
+ req.start_col = hwctx->start_col;
+ req.num_col = hwctx->num_col;
+ req.num_cq_pairs_requested = 1;
+ req.pasid = hwctx->client->pasid;
+ req.context_priority = 2;
+
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ hwctx->fw_ctx_id = resp.context_id;
+ WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+
+ if (ndev->force_preempt_enabled) {
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
+ return ret;
+ }
+ }
+
+ cq_pair = &resp.cq_pair[0];
+ x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
+ x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
+ x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
+ x2i.rb_size = cq_pair->x2i_q.buf_size;
+
+ i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
+ i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
+ i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
+ i2x.rb_size = cq_pair->i2x_q.buf_size;
+
+ ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
+ if (ret == -EINVAL) {
+ XDNA_ERR(xdna, "not able to create channel");
+ goto out_destroy_context;
+ }
+
+ intr_reg = i2x.mb_head_ptr_reg + 4;
+ hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
+ intr_reg, ret);
+ if (!hwctx->priv->mbox_chann) {
+ XDNA_ERR(xdna, "not able to create channel");
+ ret = -EINVAL;
+ goto out_destroy_context;
+ }
+ ndev->hwctx_num++;
+
+ XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
+ hwctx->name, ret, resp.msix_id);
+ XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
+ hwctx->fw_ctx_id, hwctx->client->pasid);
+
+ return 0;
+
+out_destroy_context:
+ aie2_destroy_context(ndev, hwctx);
+ return ret;
+}
+
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ if (hwctx->fw_ctx_id == -1)
+ return 0;
+
+ xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
+
+ req.context_id = hwctx->fw_ctx_id;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
+
+ xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
+ XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
+ hwctx->fw_ctx_id);
+ hwctx->priv->mbox_chann = NULL;
+ hwctx->fw_ctx_id = -1;
+ ndev->hwctx_num--;
+
+ return ret;
+}
+
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
+{
+ DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ int ret;
+
+ req.context_id = context_id;
+ req.buf_addr = addr;
+ req.buf_size = size;
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret)
+ return ret;
+
+ XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
+ context_id, addr, size);
+
+ return 0;
+}
+
+static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ u32 *bitmap = arg;
+
+ *bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col);
+
+ return 0;
+}
+
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
+ u32 size, u32 *cols_filled)
+{
+ DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ struct amdxdna_client *client;
+ dma_addr_t dma_addr;
+ u32 aie_bitmap = 0;
+ u8 *buff_addr;
+ int ret;
+
+ buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!buff_addr)
+ return -ENOMEM;
+
+ /* Go through each hardware context and mark the AIE columns that are active */
+ list_for_each_entry(client, &xdna->client_list, node)
+ amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map);
+
+ *cols_filled = 0;
+ req.dump_buff_addr = dma_addr;
+ req.dump_buff_size = size;
+ req.num_cols = hweight32(aie_bitmap);
+ req.aie_bitmap = aie_bitmap;
+
+ drm_clflush_virt_range(buff_addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
+ goto fail;
+ }
+
+ XDNA_DBG(xdna, "Query NPU status completed");
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto fail;
+ }
+
+ if (copy_to_user(buf, buff_addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy NPU status to user space");
+ goto fail;
+ }
+
+ *cols_filled = aie_bitmap;
+
+fail:
+ dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header)
+{
+ DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ dma_addr_t dma_addr;
+ u8 *addr;
+ int ret;
+
+ if (header->type >= MAX_TELEMETRY_TYPE)
+ return -EINVAL;
+
+ addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+
+ req.buf_addr = dma_addr;
+ req.buf_size = size;
+ req.type = header->type;
+
+ drm_clflush_virt_range(addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
+ goto free_buf;
+ }
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto free_buf;
+ }
+
+ if (copy_to_user(buf, addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy telemetry to user space");
+ goto free_buf;
+ }
+
+ header->major = resp.major;
+ header->minor = resp.minor;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, void __iomem *, size_t))
+{
+ struct async_event_msg_req req = { 0 };
+ struct xdna_mailbox_msg msg = {
+ .send_data = (u8 *)&req,
+ .send_size = sizeof(req),
+ .handle = handle,
+ .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
+ .notify_cb = cb,
+ };
+
+ req.buf_addr = addr;
+ req.buf_size = size;
+
+ XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
+ return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
+}
+
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 shift = xdna->dev_info->dev_mem_buf_shift;
+ struct config_cu_req req = { 0 };
+ struct xdna_mailbox_msg msg;
+ struct drm_gem_object *gobj;
+ struct amdxdna_gem_obj *abo;
+ int i;
+
+ if (!chann)
+ return -ENODEV;
+
+ if (hwctx->cus->num_cus > MAX_NUM_CUS) {
+ XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < hwctx->cus->num_cus; i++) {
+ struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
+
+ if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -EINVAL;
+ }
+ abo = to_xdna_obj(gobj);
+
+ if (abo->type != AMDXDNA_BO_DEV) {
+ drm_gem_object_put(gobj);
+ XDNA_ERR(xdna, "Invalid BO type");
+ return -EINVAL;
+ }
+
+ req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
+ abo->mem.dev_addr >> shift);
+ req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
+ XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
+ abo->mem.dev_addr, req.cfgs[i]);
+ drm_gem_object_put(gobj);
+ }
+ req.num_cus = hwctx->cus->num_cus;
+
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.handle = hwctx;
+ msg.opcode = MSG_OP_CONFIG_CU;
+ msg.notify_cb = notify_cb;
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
+
+static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
+{
+ struct execute_buffer_req *cu_req = req;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len > sizeof(cu_req->payload))
+ return -EINVAL;
+
+ cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cu_req->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memcpy(cu_req->payload, cmd, cmd_len);
+
+ *size = sizeof(*cu_req);
+ *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
+ return 0;
+}
+
+static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
+{
+ struct exec_dpu_req *dpu_req = req;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
+ return -EINVAL;
+
+ dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_req->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ dpu_req->inst_buf_addr = sn->buffer;
+ dpu_req->inst_size = sn->buffer_size;
+ dpu_req->inst_prop_cnt = sn->prop_count;
+ memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
+
+ *size = sizeof(*dpu_req);
+ *msg_op = MSG_OP_EXEC_DPU;
+ return 0;
+}
+
+static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_req *chain_req = req;
+
+ chain_req->buf_addr = slot_addr;
+ chain_req->buf_size = size;
+ chain_req->count = cmd_cnt;
+}
+
+static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_npu_req *npu_chain_req = req;
+
+ npu_chain_req->flags = 0;
+ npu_chain_req->reserved = 0;
+ npu_chain_req->buf_addr = slot_addr;
+ npu_chain_req->buf_size = size;
+ npu_chain_req->count = cmd_cnt;
+}
+
+static int
+aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*cf_slot) + cmd_len)
+ return -EINVAL;
+
+ cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cf_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ cf_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(cf_slot->args, cmd, cmd_len);
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*cf_slot) + cmd_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_dpu *dpu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*dpu_slot) + arg_sz)
+ return -EINVAL;
+
+ dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ dpu_slot->inst_buf_addr = sn->buffer;
+ dpu_slot->inst_size = sn->buffer_size;
+ dpu_slot->inst_prop_cnt = sn->prop_count;
+ dpu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(dpu_slot->args, sn->prop_args, arg_sz);
+
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*dpu_slot) + arg_sz;
+ return 0;
+}
+
+static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ return -EOPNOTSUPP;
+}
+
+static u32 aie2_get_chain_msg_op(u32 cmd_op)
+{
+ switch (cmd_op) {
+ case ERT_START_CU:
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
+ break;
+ }
+
+ return MSG_OP_MAX_OPCODE;
+}
+
+static struct aie2_exec_msg_ops legacy_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_exec_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+ .fill_preempt_slot = aie2_cmdlist_unsupp,
+ .fill_elf_slot = aie2_cmdlist_unsupp,
+ .get_chain_msg_op = aie2_get_chain_msg_op,
+};
+
+static int
+aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*npu_slot) + cmd_len)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
+ npu_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(npu_slot->args, cmd, cmd_len);
+
+ *size = sizeof(*npu_slot) + cmd_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
+ npu_slot->inst_buf_addr = sn->buffer;
+ npu_slot->inst_size = sn->buffer_size;
+ npu_slot->inst_prop_cnt = sn->prop_count;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, sn->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, pd->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_ELF;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = 1;
+ npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
+
+ *size = struct_size(npu_slot, args, npu_slot->arg_cnt);
+ return 0;
+}
+
+static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
+{
+ return MSG_OP_CHAIN_EXEC_NPU;
+}
+
+static struct aie2_exec_msg_ops npu_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_npu_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+ .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
+ .fill_elf_slot = aie2_cmdlist_fill_npu_elf,
+ .get_chain_msg_op = aie2_get_npu_chain_msg_op,
+};
+
+static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *msg_op)
+{
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+ int ret;
+ u32 op;
+
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ switch (op) {
+ case ERT_START_CU:
+ ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
+ return ret;
+ }
+ break;
+ case ERT_START_NPU:
+ ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
+ return ret;
+ }
+
+ break;
+ default:
+ XDNA_ERR(xdna, "Unsupported op %d", op);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+static int
+aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *cmd_op)
+{
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ if (*cmd_op == ERT_INVALID_CMD)
+ *cmd_op = op;
+ else if (op != *cmd_op)
+ return -EINVAL;
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU:
+ ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT_ELF:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
+ break;
+ default:
+ XDNA_INFO(xdna, "Unsupported op %d", op);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
+{
+ if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
+ ndev->exec_msg_ops = &npu_exec_message_ops;
+ else
+ ndev->exec_msg_ops = &legacy_exec_message_ops;
+}
+
+static inline struct amdxdna_gem_obj *
+aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
+{
+ int idx = get_job_idx(job->seq);
+
+ return job->hwctx->priv->cmd_buf[idx];
+}
+
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ union exec_req req;
+ int ret;
+
+ if (!chann)
+ return -ENODEV;
+
+ ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
+ if (ret)
+ return ret;
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_cmd_chain *payload;
+ struct xdna_mailbox_msg msg;
+ union exec_chain_req req;
+ u32 payload_len;
+ u32 offset = 0;
+ size_t size;
+ int ret;
+ u32 op;
+ u32 i;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (op != ERT_CMD_CHAIN || !payload ||
+ payload_len < struct_size(payload, data, payload->command_count))
+ return -EINVAL;
+
+ op = ERT_INVALID_CMD;
+ for (i = 0; i < payload->command_count; i++) {
+ u32 boh = (u32)(payload->data[i]);
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
+ if (!abo) {
+ XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
+ return -ENOENT;
+ }
+
+ size = cmdbuf_abo->mem.size - offset;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
+ abo, &size, &op);
+ amdxdna_gem_put_obj(abo);
+ if (ret)
+ return ret;
+
+ offset += size;
+ }
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+
+ /* The offset is the accumulated total size of the cmd buffer */
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ offset, payload->command_count);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ union exec_chain_req req;
+ u32 op = ERT_INVALID_CMD;
+ size_t size;
+ int ret;
+
+ size = cmdbuf_abo->mem.size;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
+ if (ret)
+ return ret;
+
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ size, 1);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct xdna_mailbox_msg msg;
+ struct sync_bo_req req;
+ int ret = 0;
+
+ req.src_addr = 0;
+ req.dst_addr = amdxdna_dev_bo_offset(abo);
+ req.size = abo->mem.size;
+
+ /* Device to Host */
+ req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
+ FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
+
+ XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
+ req.size, req.src_addr, req.dst_addr);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_SYNC_BO;
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct config_debug_bo_req req;
+ struct xdna_mailbox_msg msg;
+
+ if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
+ req.config = DEBUG_BO_REGISTER;
+ else
+ req.config = DEBUG_BO_UNREGISTER;
+
+ req.offset = amdxdna_dev_bo_offset(abo);
+ req.size = abo->mem.size;
+
+ XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
+ req.offset, req.size, req.config);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
+
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
new file mode 100644
index 000000000000..1c957a6298d3
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MSG_PRIV_H_
+#define _AIE2_MSG_PRIV_H_
+
+enum aie2_msg_opcode {
+ MSG_OP_CREATE_CONTEXT = 0x2,
+ MSG_OP_DESTROY_CONTEXT = 0x3,
+ MSG_OP_GET_TELEMETRY = 0x4,
+ MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_EXECUTE_BUFFER_CF = 0xC,
+ MSG_OP_QUERY_COL_STATUS = 0xD,
+ MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
+ MSG_OP_QUERY_AIE_VERSION = 0xF,
+ MSG_OP_EXEC_DPU = 0x10,
+ MSG_OP_CONFIG_CU = 0x11,
+ MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
+ MSG_OP_CHAIN_EXEC_DPU = 0x13,
+ MSG_OP_CONFIG_DEBUG_BO = 0x14,
+ MSG_OP_CHAIN_EXEC_NPU = 0x18,
+ MSG_OP_MAX_XRT_OPCODE,
+ MSG_OP_SUSPEND = 0x101,
+ MSG_OP_RESUME = 0x102,
+ MSG_OP_ASSIGN_MGMT_PASID = 0x103,
+ MSG_OP_INVOKE_SELF_TEST = 0x104,
+ MSG_OP_MAP_HOST_BUFFER = 0x106,
+ MSG_OP_GET_FIRMWARE_VERSION = 0x108,
+ MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
+ MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
+ MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
+ MSG_OP_MAX_DRV_OPCODE,
+ MSG_OP_GET_PROTOCOL_VERSION = 0x301,
+ MSG_OP_MAX_OPCODE
+};
+
+enum aie2_msg_status {
+ AIE2_STATUS_SUCCESS = 0x0,
+ /* AIE Error codes */
+ AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001,
+ AIE2_STATUS_AIE_FP_ERROR = 0x1000002,
+ AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003,
+ AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004,
+ AIE2_STATUS_AIE_BUS_ERROR = 0x1000005,
+ AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006,
+ AIE2_STATUS_AIE_ECC_ERROR = 0x1000007,
+ AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008,
+ AIE2_STATUS_AIE_DMA_ERROR = 0x1000009,
+ AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a,
+ AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b,
+ AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c,
+ AIE2_STATUS_MAX_AIE_STATUS_CODE,
+ /* MGMT ERT Error codes */
+ AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001,
+ AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
+ AIE2_STATUS_MGMT_ERT_NOAVAIL,
+ AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
+ AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
+ AIE2_STATUS_MGMT_ERT_BUSY,
+ AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
+ MAX_MGMT_ERT_STATUS_CODE,
+ /* APP ERT Error codes */
+ AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001,
+ AIE2_STATUS_APP_INVALID_INSTR,
+ AIE2_STATUS_APP_LOAD_PDI_FAIL,
+ MAX_APP_ERT_STATUS_CODE,
+ /* NPU RTOS Error Codes */
+ AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001,
+ AIE2_STATUS_INVALID_COMMAND,
+ AIE2_STATUS_INVALID_PARAM,
+ AIE2_STATUS_INVALID_OPERATION = 0x4000006,
+ AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
+ AIE2_STATUS_MAX_RTOS_STATUS_CODE,
+ MAX_AIE2_STATUS_CODE
+};
+
+struct assign_mgmt_pasid_req {
+ __u16 pasid;
+ __u16 reserved;
+} __packed;
+
+struct assign_mgmt_pasid_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct map_host_buffer_req {
+ __u32 context_id;
+ __u64 buf_addr;
+ __u64 buf_size;
+} __packed;
+
+struct map_host_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+#define MAX_CQ_PAIRS 2
+struct cq_info {
+ __u32 head_addr;
+ __u32 tail_addr;
+ __u32 buf_addr;
+ __u32 buf_size;
+};
+
+struct cq_pair {
+ struct cq_info x2i_q;
+ struct cq_info i2x_q;
+};
+
+struct create_ctx_req {
+ __u32 aie_type;
+ __u8 start_col;
+ __u8 num_col;
+ __u16 reserved;
+ __u8 num_cq_pairs_requested;
+ __u8 reserved1;
+ __u16 pasid;
+ __u32 pad[2];
+ __u32 sec_comm_target_type;
+ __u32 context_priority;
+} __packed;
+
+struct create_ctx_resp {
+ enum aie2_msg_status status;
+ __u32 context_id;
+ __u16 msix_id;
+ __u8 num_cq_pairs_allocated;
+ __u8 reserved;
+ struct cq_pair cq_pair[MAX_CQ_PAIRS];
+} __packed;
+
+struct destroy_ctx_req {
+ __u32 context_id;
+} __packed;
+
+struct destroy_ctx_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+enum telemetry_type {
+ TELEMETRY_TYPE_DISABLED,
+ TELEMETRY_TYPE_HEALTH,
+ TELEMETRY_TYPE_ERROR_INFO,
+ TELEMETRY_TYPE_PROFILING,
+ TELEMETRY_TYPE_DEBUG,
+ MAX_TELEMETRY_TYPE
+};
+
+struct get_telemetry_req {
+ enum telemetry_type type;
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct get_telemetry_resp {
+ __u32 major;
+ __u32 minor;
+ __u32 size;
+ enum aie2_msg_status status;
+} __packed;
+
+struct execute_buffer_req {
+ __u32 cu_idx;
+ __u32 payload[19];
+} __packed;
+
+struct exec_dpu_req {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 payload[35];
+} __packed;
+
+enum exec_npu_type {
+ EXEC_NPU_TYPE_NON_ELF = 0x1,
+ EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
+ EXEC_NPU_TYPE_PREEMPT = 0x3,
+ EXEC_NPU_TYPE_ELF = 0x4,
+};
+
+union exec_req {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu_req;
+};
+
+struct execute_buffer_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct aie_tile_info {
+ __u32 size;
+ __u16 major;
+ __u16 minor;
+ __u16 cols;
+ __u16 rows;
+ __u16 core_rows;
+ __u16 mem_rows;
+ __u16 shim_rows;
+ __u16 core_row_start;
+ __u16 mem_row_start;
+ __u16 shim_row_start;
+ __u16 core_dma_channels;
+ __u16 mem_dma_channels;
+ __u16 shim_dma_channels;
+ __u16 core_locks;
+ __u16 mem_locks;
+ __u16 shim_locks;
+ __u16 core_events;
+ __u16 mem_events;
+ __u16 shim_events;
+ __u16 reserved;
+};
+
+struct aie_tile_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_tile_info_resp {
+ enum aie2_msg_status status;
+ struct aie_tile_info info;
+} __packed;
+
+struct aie_version_info_req {
+ __u32 reserved;
+} __packed;
+
+struct aie_version_info_resp {
+ enum aie2_msg_status status;
+ __u16 major;
+ __u16 minor;
+} __packed;
+
+struct aie_column_info_req {
+ __u64 dump_buff_addr;
+ __u32 dump_buff_size;
+ __u32 num_cols;
+ __u32 aie_bitmap;
+} __packed;
+
+struct aie_column_info_resp {
+ enum aie2_msg_status status;
+ __u32 size;
+} __packed;
+
+struct suspend_req {
+ __u32 place_holder;
+} __packed;
+
+struct suspend_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct resume_req {
+ __u32 place_holder;
+} __packed;
+
+struct resume_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct check_header_hash_req {
+ __u64 hash_high;
+ __u64 hash_low;
+} __packed;
+
+struct check_header_hash_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct query_error_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct query_error_resp {
+ enum aie2_msg_status status;
+ __u32 num_err;
+ __u32 has_next_err;
+ __u32 next_row;
+ __u32 next_column;
+ __u32 next_module;
+} __packed;
+
+struct protocol_version_req {
+ __u32 reserved;
+} __packed;
+
+struct protocol_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+} __packed;
+
+struct firmware_version_req {
+ __u32 reserved;
+} __packed;
+
+struct firmware_version_resp {
+ enum aie2_msg_status status;
+ __u32 major;
+ __u32 minor;
+ __u32 sub;
+ __u32 build;
+} __packed;
+
+#define MAX_NUM_CUS 32
+#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0)
+#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17)
+struct config_cu_req {
+ __u32 num_cus;
+ __u32 cfgs[MAX_NUM_CUS];
+} __packed;
+
+struct config_cu_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct set_runtime_cfg_req {
+ __u32 type;
+ __u64 value;
+} __packed;
+
+struct set_runtime_cfg_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+struct get_runtime_cfg_req {
+ __u32 type;
+} __packed;
+
+struct get_runtime_cfg_resp {
+ enum aie2_msg_status status;
+ __u64 value;
+} __packed;
+
+enum async_event_type {
+ ASYNC_EVENT_TYPE_AIE_ERROR,
+ ASYNC_EVENT_TYPE_EXCEPTION,
+ MAX_ASYNC_EVENT_TYPE
+};
+
+#define ASYNC_BUF_SIZE SZ_8K
+struct async_event_msg_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct async_event_msg_resp {
+ enum aie2_msg_status status;
+ enum async_event_type type;
+} __packed;
+
+#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
+
+struct cmd_chain_slot_execbuf_cf {
+ __u32 cu_idx;
+ __u32 arg_cnt;
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+struct cmd_chain_slot_dpu {
+ __u64 inst_buf_addr;
+ __u32 inst_size;
+ __u32 inst_prop_cnt;
+ __u32 cu_idx;
+ __u32 arg_cnt;
+#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
+ __u32 args[] __counted_by(arg_cnt);
+};
+
+#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3
+struct cmd_chain_slot_npu {
+ enum exec_npu_type type;
+ u64 inst_buf_addr;
+ u64 save_buf_addr;
+ u64 restore_buf_addr;
+ u32 inst_size;
+ u32 save_size;
+ u32 restore_size;
+ u32 inst_prop_cnt;
+ u32 cu_idx;
+ u32 arg_cnt;
+ u32 args[] __counted_by(arg_cnt);
+} __packed;
+
+struct cmd_chain_req {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 count;
+} __packed;
+
+struct cmd_chain_npu_req {
+ u32 flags;
+ u32 reserved;
+ u64 buf_addr;
+ u32 buf_size;
+ u32 count;
+} __packed;
+
+union exec_chain_req {
+ struct cmd_chain_npu_req npu_req;
+ struct cmd_chain_req req;
+};
+
+struct cmd_chain_resp {
+ enum aie2_msg_status status;
+ __u32 fail_cmd_idx;
+ enum aie2_msg_status fail_cmd_status;
+} __packed;
+
+#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0)
+#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4)
+struct sync_bo_req {
+ __u64 src_addr;
+ __u64 dst_addr;
+ __u32 size;
+#define SYNC_BO_DEV_MEM 0
+#define SYNC_BO_HOST_MEM 2
+ __u32 type;
+} __packed;
+
+struct sync_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
+
+#define DEBUG_BO_UNREGISTER 0
+#define DEBUG_BO_REGISTER 1
+struct config_debug_bo_req {
+ __u64 offset;
+ __u64 size;
+ /*
+ * config operations.
+ * DEBUG_BO_REGISTER: Register debug buffer
+ * DEBUG_BO_UNREGISTER: Unregister debug buffer
+ */
+ __u32 config;
+} __packed;
+
+struct config_debug_bo_resp {
+ enum aie2_msg_status status;
+} __packed;
+#endif /* _AIE2_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
new file mode 100644
index 000000000000..ceef1c502e9e
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "aie2_msg_priv.h"
+#include "aie2_pci.h"
+#include "aie2_solver.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
+
+static int aie2_max_col = XRS_MAX_COL;
+module_param(aie2_max_col, uint, 0600);
+MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
+
+/*
+ * The management mailbox channel is allocated by firmware.
+ * The related register and ring buffer information is on SRAM BAR.
+ * This struct is the register layout.
+ */
+#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
+struct mgmt_mbox_chann_info {
+ __u32 x2i_tail;
+ __u32 x2i_head;
+ __u32 x2i_buf;
+ __u32 x2i_buf_sz;
+ __u32 i2x_tail;
+ __u32 i2x_head;
+ __u32 i2x_buf;
+ __u32 i2x_buf_sz;
+ __u32 magic;
+ __u32 msi_id;
+ __u32 prot_major;
+ __u32 prot_minor;
+ __u32 rsvd[4];
+};
+
+static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
+{
+ const struct aie2_fw_feature_tbl *feature;
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ /*
+ * The driver supported mailbox behavior is defined by
+ * ndev->priv->protocol_major and protocol_minor.
+ *
+ * When protocol_major and fw_major are different, it means driver
+ * and firmware are incompatible.
+ */
+ if (ndev->priv->protocol_major != fw_major) {
+ XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
+ fw_major, fw_minor);
+ return -EINVAL;
+ }
+
+ /*
+ * When protocol_minor is greater then fw_minor, that means driver
+ * relies on operation the installed firmware does not support.
+ */
+ if (ndev->priv->protocol_minor > fw_minor) {
+ XDNA_ERR(xdna, "Firmware minor version smaller than supported");
+ return -EINVAL;
+ }
+
+ for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
+ feature++) {
+ if (fw_minor < feature->min_minor)
+ continue;
+ if (feature->max_minor > 0 && fw_minor > feature->max_minor)
+ continue;
+
+ set_bit(feature->feature, &ndev->feature_mask);
+ }
+
+ return 0;
+}
+
+static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr);
+ XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size);
+ XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg);
+ XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg);
+ XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
+ XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
+ XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
+ XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
+ XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
+}
+
+static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
+{
+ struct mgmt_mbox_chann_info info_regs;
+ struct xdna_mailbox_chann_res *i2x;
+ struct xdna_mailbox_chann_res *x2i;
+ u32 addr, off;
+ u32 *reg;
+ int ret;
+ int i;
+
+ /*
+ * Once firmware is alive, it will write management channel
+ * information in SRAM BAR and write the address of that information
+ * at FW_ALIVE_OFF offset in SRMA BAR.
+ *
+ * Read a non-zero value from FW_ALIVE_OFF implies that firmware
+ * is alive.
+ */
+ ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF),
+ addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret || !addr)
+ return -ETIME;
+
+ off = AIE2_SRAM_OFF(ndev, addr);
+ reg = (u32 *)&info_regs;
+ for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
+ reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
+
+ if (info_regs.magic != MGMT_MBOX_MAGIC) {
+ XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ i2x = &ndev->mgmt_i2x;
+ x2i = &ndev->mgmt_x2i;
+
+ i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head);
+ i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail);
+ i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf);
+ i2x->rb_size = info_regs.i2x_buf_sz;
+
+ x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head);
+ x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
+ x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
+ x2i->rb_size = info_regs.x2i_buf_sz;
+
+ ndev->mgmt_chan_idx = info_regs.msi_id;
+ ndev->mgmt_prot_major = info_regs.prot_major;
+ ndev->mgmt_prot_minor = info_regs.prot_minor;
+
+ ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
+
+done:
+ aie2_dump_chann_info_debug(ndev);
+
+ /* Must clear address at FW_ALIVE_OFF */
+ writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
+
+ return ret;
+}
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val)
+{
+ const struct rt_config *cfg;
+ u32 value;
+ int ret;
+
+ for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+ if (cfg->category != category)
+ continue;
+
+ if (cfg->feature_mask &&
+ bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX))
+ continue;
+
+ value = val ? *val : cfg->value;
+ ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+ cfg->type, value);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_suspend_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Suspend firmware failed");
+ return ret;
+ }
+
+ ret = aie2_resume_fw(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Resume firmware failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Runtime config failed");
+ return ret;
+ }
+
+ ret = aie2_assign_mgmt_pasid(ndev, 0);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Can not assign PASID");
+ return ret;
+ }
+
+ ret = aie2_xdna_reset(ndev);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Reset firmware failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "query firmware version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_version(ndev, &ndev->version);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE version failed");
+ return ret;
+ }
+
+ ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Query AIE metadata failed");
+ return ret;
+ }
+
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
+ return 0;
+}
+
+static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
+{
+ if (aie2_suspend_fw(ndev))
+ XDNA_ERR(ndev->xdna, "Suspend_fw failed");
+ XDNA_DBG(ndev->xdna, "Firmware suspended");
+}
+
+static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ hwctx->start_col = action->part.start_col;
+ hwctx->num_col = action->part.ncols;
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "create context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_unload(void *cb_arg)
+{
+ struct amdxdna_hwctx *hwctx = cb_arg;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = hwctx->client->xdna;
+
+ ret = aie2_destroy_context(xdna->dev_handle, hwctx);
+ if (ret)
+ XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
+
+ return ret;
+}
+
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_dev_hdl *ndev;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ ndev = xdna->dev_handle;
+ ndev->dft_dpm_level = dpm_level;
+ if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+ return 0;
+
+ return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
+static struct xrs_action_ops aie2_xrs_actions = {
+ .load = aie2_xrs_load,
+ .unload = aie2_xrs_unload,
+ .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
+};
+
+static void aie2_hw_stop(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ if (ndev->dev_status <= AIE2_DEV_INIT) {
+ XDNA_ERR(xdna, "device is already stopped");
+ return;
+ }
+
+ aie2_mgmt_fw_fini(ndev);
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ drmm_kfree(&xdna->ddev, ndev->mbox);
+ ndev->mbox = NULL;
+ aie2_psp_stop(ndev->psp_hdl);
+ aie2_smu_fini(ndev);
+ aie2_error_async_events_free(ndev);
+ pci_disable_device(pdev);
+
+ ndev->dev_status = AIE2_DEV_INIT;
+}
+
+static int aie2_hw_start(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ struct xdna_mailbox_res mbox_res;
+ u32 xdna_mailbox_intr_reg;
+ int mgmt_mb_irq, ret;
+
+ if (ndev->dev_status >= AIE2_DEV_START) {
+ XDNA_INFO(xdna, "device is already started");
+ return 0;
+ }
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
+ return ret;
+ }
+ pci_set_master(pdev);
+
+ ret = aie2_smu_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
+ goto disable_dev;
+ }
+
+ ret = aie2_psp_start(ndev->psp_hdl);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
+ goto fini_smu;
+ }
+
+ ret = aie2_get_mgmt_chann_info(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "firmware is not alive");
+ goto stop_psp;
+ }
+
+ mbox_res.ringbuf_base = ndev->sram_base;
+ mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
+ mbox_res.mbox_base = ndev->mbox_base;
+ mbox_res.mbox_size = MBOX_SIZE(ndev);
+ mbox_res.name = "xdna_mailbox";
+ ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
+ if (!ndev->mbox) {
+ XDNA_ERR(xdna, "failed to create mailbox device");
+ ret = -ENODEV;
+ goto stop_psp;
+ }
+
+ mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
+ if (mgmt_mb_irq < 0) {
+ ret = mgmt_mb_irq;
+ XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret);
+ goto stop_psp;
+ }
+
+ xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
+ ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
+ &ndev->mgmt_x2i,
+ &ndev->mgmt_i2x,
+ xdna_mailbox_intr_reg,
+ mgmt_mb_irq);
+ if (!ndev->mgmt_chann) {
+ XDNA_ERR(xdna, "failed to create management mailbox channel");
+ ret = -EINVAL;
+ goto stop_psp;
+ }
+
+ ret = aie2_pm_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_mgmt_fw_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ndev->dev_status = AIE2_DEV_START;
+
+ return 0;
+
+destroy_mgmt_chann:
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+stop_psp:
+ aie2_psp_stop(ndev->psp_hdl);
+fini_smu:
+ aie2_smu_fini(ndev);
+disable_dev:
+ pci_disable_device(pdev);
+
+ return ret;
+}
+
+static int aie2_hw_suspend(struct amdxdna_dev *xdna)
+{
+ struct amdxdna_client *client;
+
+ guard(mutex)(&xdna->dev_lock);
+ list_for_each_entry(client, &xdna->client_list, node)
+ aie2_hwctx_suspend(client);
+
+ aie2_hw_stop(xdna);
+
+ return 0;
+}
+
+static int aie2_hw_resume(struct amdxdna_dev *xdna)
+{
+ struct amdxdna_client *client;
+ int ret;
+
+ ret = aie2_hw_start(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Start hardware failed, %d", ret);
+ return ret;
+ }
+
+ list_for_each_entry(client, &xdna->client_list, node) {
+ ret = aie2_hwctx_resume(client);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int aie2_init(struct amdxdna_dev *xdna)
+{
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
+ struct init_config xrs_cfg = { 0 };
+ struct amdxdna_dev_hdl *ndev;
+ struct psp_config psp_conf;
+ const struct firmware *fw;
+ unsigned long bars = 0;
+ int i, nvec, ret;
+
+ ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
+ if (!ndev)
+ return -ENOMEM;
+
+ ndev->priv = xdna->dev_info->dev_priv;
+ ndev->xdna = xdna;
+
+ ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
+ ndev->priv->fw_path, ret);
+ return ret;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
+ goto release_fw;
+ }
+
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ set_bit(PSP_REG_BAR(ndev, i), &bars);
+
+ set_bit(xdna->dev_info->sram_bar, &bars);
+ set_bit(xdna->dev_info->smu_bar, &bars);
+ set_bit(xdna->dev_info->mbox_bar, &bars);
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ if (!test_bit(i, &bars))
+ continue;
+ tbl[i] = pcim_iomap(pdev, i, 0);
+ if (!tbl[i]) {
+ XDNA_ERR(xdna, "map bar %d failed", i);
+ ret = -ENOMEM;
+ goto release_fw;
+ }
+ }
+
+ ndev->sram_base = tbl[xdna->dev_info->sram_bar];
+ ndev->smu_base = tbl[xdna->dev_info->smu_bar];
+ ndev->mbox_base = tbl[xdna->dev_info->mbox_bar];
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
+ goto release_fw;
+ }
+
+ nvec = pci_msix_vec_count(pdev);
+ if (nvec <= 0) {
+ XDNA_ERR(xdna, "does not get number of interrupt vector");
+ ret = -EINVAL;
+ goto release_fw;
+ }
+
+ ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
+ goto release_fw;
+ }
+
+ psp_conf.fw_size = fw->size;
+ psp_conf.fw_buf = fw->data;
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
+ ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf);
+ if (!ndev->psp_hdl) {
+ XDNA_ERR(xdna, "failed to create psp");
+ ret = -ENOMEM;
+ goto release_fw;
+ }
+ xdna->dev_handle = ndev;
+
+ ret = aie2_hw_start(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "start npu failed, ret %d", ret);
+ goto release_fw;
+ }
+
+ xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+ for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+ xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
+ xrs_cfg.sys_eff_factor = 1;
+ xrs_cfg.ddev = &xdna->ddev;
+ xrs_cfg.actions = &aie2_xrs_actions;
+ xrs_cfg.total_col = ndev->total_col;
+
+ xdna->xrs_hdl = xrsm_init(&xrs_cfg);
+ if (!xdna->xrs_hdl) {
+ XDNA_ERR(xdna, "Initialize resolver failed");
+ ret = -EINVAL;
+ goto stop_hw;
+ }
+
+ release_firmware(fw);
+ aie2_msg_init(ndev);
+ amdxdna_pm_init(xdna);
+ return 0;
+
+stop_hw:
+ aie2_hw_stop(xdna);
+release_fw:
+ release_firmware(fw);
+
+ return ret;
+}
+
+static void aie2_fini(struct amdxdna_dev *xdna)
+{
+ amdxdna_pm_fini(xdna);
+ aie2_hw_stop(xdna);
+}
+
+static int aie2_get_aie_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_status status;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret;
+
+ ndev = xdna->dev_handle;
+ if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request into kernel");
+ return -EFAULT;
+ }
+
+ if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) {
+ XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.",
+ status.buffer_size, ndev->metadata.cols * ndev->metadata.size);
+ return -EINVAL;
+ }
+
+ ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer),
+ status.buffer_size, &status.cols_filled);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) {
+ XDNA_ERR(xdna, "Failed to copy AIE request info to user space");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_aie_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_metadata *meta;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ return -ENOMEM;
+
+ meta->col_size = ndev->metadata.size;
+ meta->cols = ndev->metadata.cols;
+ meta->rows = ndev->metadata.rows;
+
+ meta->version.major = ndev->metadata.version.major;
+ meta->version.minor = ndev->metadata.version.minor;
+
+ meta->core.row_count = ndev->metadata.core.row_count;
+ meta->core.row_start = ndev->metadata.core.row_start;
+ meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
+ meta->core.lock_count = ndev->metadata.core.lock_count;
+ meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
+
+ meta->mem.row_count = ndev->metadata.mem.row_count;
+ meta->mem.row_start = ndev->metadata.mem.row_start;
+ meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
+ meta->mem.lock_count = ndev->metadata.mem.lock_count;
+ meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
+
+ meta->shim.row_count = ndev->metadata.shim.row_count;
+ meta->shim.row_start = ndev->metadata.shim.row_start;
+ meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
+ meta->shim.lock_count = ndev->metadata.shim.lock_count;
+ meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta)))
+ ret = -EFAULT;
+
+ kfree(meta);
+ return ret;
+}
+
+static int aie2_get_aie_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ version.major = ndev->version.major;
+ version.minor = ndev->version.minor;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_firmware_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_firmware_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ version.major = xdna->fw_ver.major;
+ version.minor = xdna->fw_ver.minor;
+ version.patch = xdna->fw_ver.sub;
+ version.build = xdna->fw_ver.build;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_power_mode mode = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ mode.power_mode = ndev->pw_mode;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_clock_metadata(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_clock_metadata *clock;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+ int ret = 0;
+
+ ndev = xdna->dev_handle;
+ clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+ if (!clock)
+ return -ENOMEM;
+
+ snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+ "MP-NPU Clock");
+ clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+ snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+ clock->h_clock.freq_mhz = ndev->hclk_freq;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
+ ret = -EFAULT;
+
+ kfree(clock);
+ return ret;
+}
+
+static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_drm_hwctx_entry *tmp __free(kfree) = NULL;
+ struct amdxdna_drm_get_array *array_args = arg;
+ struct amdxdna_drm_hwctx_entry __user *buf;
+ u32 size;
+
+ if (!array_args->num_element)
+ return -EINVAL;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ tmp->pid = hwctx->client->pid;
+ tmp->context_id = hwctx->id;
+ tmp->start_col = hwctx->start_col;
+ tmp->num_col = hwctx->num_col;
+ tmp->command_submissions = hwctx->priv->seq;
+ tmp->command_completions = hwctx->priv->completed;
+ tmp->pasid = hwctx->client->pasid;
+ tmp->priority = hwctx->qos.priority;
+ tmp->gops = hwctx->qos.gops;
+ tmp->fps = hwctx->qos.fps;
+ tmp->dma_bandwidth = hwctx->qos.dma_bandwidth;
+ tmp->latency = hwctx->qos.latency;
+ tmp->frame_exec_time = hwctx->qos.frame_exec_time;
+ tmp->state = AMDXDNA_HWCTX_STATE_ACTIVE;
+
+ buf = u64_to_user_ptr(array_args->buffer);
+ size = min(sizeof(*tmp), array_args->element_size);
+
+ if (copy_to_user(buf, tmp, size))
+ return -EFAULT;
+
+ array_args->buffer += size;
+ array_args->num_element--;
+
+ return 0;
+}
+
+static int aie2_get_hwctx_status(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_array array_args;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ array_args.element_size = sizeof(struct amdxdna_drm_query_hwctx);
+ array_args.buffer = args->buffer;
+ array_args.num_element = args->buffer_size / array_args.element_size;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &array_args,
+ aie2_hwctx_status_cb);
+ if (ret)
+ break;
+ }
+
+ args->buffer_size -= (u32)(array_args.buffer - args->buffer);
+ return 0;
+}
+
+static int aie2_query_resource_info(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_resource_info res_info;
+ const struct amdxdna_dev_priv *priv;
+ struct amdxdna_dev_hdl *ndev;
+ struct amdxdna_dev *xdna;
+
+ xdna = client->xdna;
+ ndev = xdna->dev_handle;
+ priv = ndev->priv;
+
+ res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
+ res_info.npu_tops_max = ndev->max_tops;
+ res_info.npu_task_max = priv->hwctx_limit;
+ res_info.npu_tops_curr = ndev->curr_tops;
+ res_info.npu_task_curr = ndev->hwctx_num;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 *map = arg;
+
+ if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) {
+ XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id,
+ xdna->dev_handle->priv->hwctx_limit);
+ return -EINVAL;
+ }
+
+ map[hwctx->fw_ctx_id] = hwctx->id;
+ return 0;
+}
+
+static int aie2_get_telemetry(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL;
+ u32 telemetry_data_sz, header_sz, elem_num;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ elem_num = xdna->dev_handle->priv->hwctx_limit;
+ header_sz = struct_size(header, map, elem_num);
+ if (args->buffer_size <= header_sz) {
+ XDNA_ERR(xdna, "Invalid buffer size");
+ return -EINVAL;
+ }
+
+ telemetry_data_sz = args->buffer_size - header_sz;
+ if (telemetry_data_sz > SZ_4M) {
+ XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz);
+ return -EINVAL;
+ }
+
+ header = kzalloc(header_sz, GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+
+ if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) {
+ XDNA_ERR(xdna, "Failed to copy telemetry header from user");
+ return -EFAULT;
+ }
+
+ header->map_num_elements = elem_num;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &header->map,
+ aie2_fill_hwctx_map);
+ if (ret)
+ return ret;
+ }
+
+ ret = aie2_query_telemetry(xdna->dev_handle,
+ u64_to_user_ptr(args->buffer + header_sz),
+ telemetry_data_sz, header);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed ret %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) {
+ XDNA_ERR(xdna, "Copy header failed");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int aie2_get_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_attribute_state state = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE)
+ state.state = ndev->force_preempt_enabled;
+ else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE)
+ state.state = ndev->frame_boundary_preempt;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_QUERY_AIE_STATUS:
+ ret = aie2_get_aie_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_METADATA:
+ ret = aie2_get_aie_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_AIE_VERSION:
+ ret = aie2_get_aie_version(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
+ ret = aie2_get_clock_metadata(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
+ ret = aie2_get_hwctx_status(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
+ ret = aie2_get_firmware_version(client, args);
+ break;
+ case DRM_AMDXDNA_GET_POWER_MODE:
+ ret = aie2_get_power_mode(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_TELEMETRY:
+ ret = aie2_get_telemetry(client, args);
+ break;
+ case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
+ ret = aie2_query_resource_info(client, args);
+ break;
+ case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE:
+ case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE:
+ ret = aie2_get_preempt_state(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ }
+
+ amdxdna_pm_suspend_put(xdna);
+ XDNA_DBG(xdna, "Got param %d", args->param);
+
+dev_exit:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+static int aie2_query_ctx_status_array(struct amdxdna_client *client,
+ struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_drm_get_array array_args;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (args->element_size > SZ_4K || args->num_element > SZ_1K) {
+ XDNA_DBG(xdna, "Invalid element size %d or number of element %d",
+ args->element_size, args->num_element);
+ return -EINVAL;
+ }
+
+ array_args.element_size = min(args->element_size,
+ sizeof(struct amdxdna_drm_hwctx_entry));
+ array_args.buffer = args->buffer;
+ array_args.num_element = args->num_element * args->element_size /
+ array_args.element_size;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &array_args,
+ aie2_hwctx_status_cb);
+ if (ret)
+ break;
+ }
+
+ args->element_size = array_args.element_size;
+ args->num_element = (u32)((array_args.buffer - args->buffer) /
+ args->element_size);
+
+ return 0;
+}
+
+static int aie2_get_array(struct amdxdna_client *client,
+ struct amdxdna_drm_get_array *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_HW_CONTEXT_ALL:
+ ret = aie2_query_ctx_status_array(client, args);
+ break;
+ case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+ ret = aie2_get_array_async_error(xdna->dev_handle, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ }
+
+ amdxdna_pm_suspend_put(xdna);
+ XDNA_DBG(xdna, "Got param %d", args->param);
+
+dev_exit:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+static int aie2_set_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_drm_set_power_mode power_state;
+ enum amdxdna_power_mode_type power_mode;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+ sizeof(power_state))) {
+ XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+ return -EFAULT;
+ }
+
+ if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
+ return -EINVAL;
+
+ power_mode = power_state.power_mode;
+ if (power_mode > POWER_MODE_TURBO) {
+ XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+ return -EINVAL;
+ }
+
+ return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
+ struct amdxdna_drm_attribute_state state;
+ u32 val;
+ int ret;
+
+ if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state)))
+ return -EFAULT;
+
+ if (state.state > 1)
+ return -EINVAL;
+
+ if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad)))
+ return -EINVAL;
+
+ if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) {
+ ndev->force_preempt_enabled = state.state;
+ } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) {
+ val = state.state;
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+ &val);
+ if (ret)
+ return ret;
+
+ ndev->frame_boundary_preempt = state.state;
+ }
+
+ return 0;
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_SET_POWER_MODE:
+ ret = aie2_set_power_mode(client, args);
+ break;
+ case DRM_AMDXDNA_SET_FORCE_PREEMPT:
+ case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT:
+ ret = aie2_set_preempt_state(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ amdxdna_pm_suspend_put(xdna);
+dev_exit:
+ drm_dev_exit(idx);
+ return ret;
+}
+
+const struct amdxdna_dev_ops aie2_ops = {
+ .init = aie2_init,
+ .fini = aie2_fini,
+ .resume = aie2_hw_resume,
+ .suspend = aie2_hw_suspend,
+ .get_aie_info = aie2_get_info,
+ .set_aie_state = aie2_set_state,
+ .hwctx_init = aie2_hwctx_init,
+ .hwctx_fini = aie2_hwctx_fini,
+ .hwctx_config = aie2_hwctx_config,
+ .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo,
+ .cmd_submit = aie2_cmd_submit,
+ .hmm_invalidate = aie2_hmm_invalidate,
+ .get_array = aie2_get_array,
+};
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
new file mode 100644
index 000000000000..a5f9c42155d1
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_PCI_H_
+#define _AIE2_PCI_H_
+
+#include <drm/amdxdna_accel.h>
+#include <linux/semaphore.h>
+
+#include "amdxdna_mailbox.h"
+
+#define AIE2_INTERVAL 20000 /* us */
+#define AIE2_TIMEOUT 1000000 /* us */
+
+/* Firmware determines device memory base address and size */
+#define AIE2_DEVM_BASE 0x4000000
+#define AIE2_DEVM_SIZE SZ_64M
+
+#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
+
+#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
+#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
+
+#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
+#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
+#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
+
+#define SMU_REG(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
+})
+#define SRAM_GET_ADDR(ndev, idx) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
+})
+
+#define CHAN_SLOT_SZ SZ_8K
+#define MBOX_SIZE(ndev) \
+({ \
+ typeof(ndev) _ndev = (ndev); \
+ ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
+ pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
+})
+
+enum aie2_smu_reg_idx {
+ SMU_CMD_REG = 0,
+ SMU_ARG_REG,
+ SMU_INTR_REG,
+ SMU_RESP_REG,
+ SMU_OUT_REG,
+ SMU_MAX_REGS /* Keep this at the end */
+};
+
+enum aie2_sram_reg_idx {
+ MBOX_CHANN_OFF = 0,
+ FW_ALIVE_OFF,
+ SRAM_MAX_INDEX /* Keep this at the end */
+};
+
+enum psp_reg_idx {
+ PSP_CMD_REG = 0,
+ PSP_ARG0_REG,
+ PSP_ARG1_REG,
+ PSP_ARG2_REG,
+ PSP_NUM_IN_REGS, /* number of input registers */
+ PSP_INTR_REG = PSP_NUM_IN_REGS,
+ PSP_STATUS_REG,
+ PSP_RESP_REG,
+ PSP_MAX_REGS /* Keep this at the end */
+};
+
+struct amdxdna_client;
+struct amdxdna_fw_ver;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+struct psp_config {
+ const void *fw_buf;
+ u32 fw_size;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+struct aie_version {
+ u16 major;
+ u16 minor;
+};
+
+struct aie_tile_metadata {
+ u16 row_count;
+ u16 row_start;
+ u16 dma_channel_count;
+ u16 lock_count;
+ u16 event_reg_count;
+};
+
+struct aie_metadata {
+ u32 size;
+ u16 cols;
+ u16 rows;
+ struct aie_version version;
+ struct aie_tile_metadata core;
+ struct aie_tile_metadata mem;
+ struct aie_tile_metadata shim;
+};
+
+enum rt_config_category {
+ AIE2_RT_CFG_INIT,
+ AIE2_RT_CFG_CLK_GATING,
+ AIE2_RT_CFG_FORCE_PREEMPT,
+ AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+};
+
+struct rt_config {
+ u32 type;
+ u32 value;
+ u32 category;
+ unsigned long feature_mask;
+};
+
+struct dpm_clk_freq {
+ u32 npuclk;
+ u32 hclk;
+};
+
+/*
+ * Define the maximum number of pending commands in a hardware context.
+ * Must be power of 2!
+ */
+#define HWCTX_MAX_CMDS 4
+#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
+struct amdxdna_hwctx_priv {
+ struct amdxdna_gem_obj *heap;
+ void *mbox_chann;
+
+ struct drm_gpu_scheduler sched;
+ struct drm_sched_entity entity;
+
+ struct mutex io_lock; /* protect seq and cmd order */
+ struct wait_queue_head job_free_wq;
+ u32 num_pending;
+ u64 seq;
+ struct semaphore job_sem;
+ bool job_done;
+
+ /* Completed job counter */
+ u64 completed;
+
+ struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
+ struct drm_syncobj *syncobj;
+};
+
+enum aie2_dev_status {
+ AIE2_DEV_UNINIT,
+ AIE2_DEV_INIT,
+ AIE2_DEV_START,
+};
+
+struct aie2_exec_msg_ops {
+ int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
+ int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ u32 (*get_chain_msg_op)(u32 cmd_op);
+};
+
+struct amdxdna_dev_hdl {
+ struct amdxdna_dev *xdna;
+ const struct amdxdna_dev_priv *priv;
+ void __iomem *sram_base;
+ void __iomem *smu_base;
+ void __iomem *mbox_base;
+ struct psp_device *psp_hdl;
+
+ struct xdna_mailbox_chann_res mgmt_x2i;
+ struct xdna_mailbox_chann_res mgmt_i2x;
+ u32 mgmt_chan_idx;
+ u32 mgmt_prot_major;
+ u32 mgmt_prot_minor;
+
+ u32 total_col;
+ struct aie_version version;
+ struct aie_metadata metadata;
+ unsigned long feature_mask;
+ struct aie2_exec_msg_ops *exec_msg_ops;
+
+ /* power management and clock*/
+ enum amdxdna_power_mode_type pw_mode;
+ u32 dpm_level;
+ u32 dft_dpm_level;
+ u32 max_dpm_level;
+ u32 clk_gating;
+ u32 npuclk_freq;
+ u32 hclk_freq;
+ u32 max_tops;
+ u32 curr_tops;
+ u32 force_preempt_enabled;
+ u32 frame_boundary_preempt;
+
+ /* Mailbox and the management channel */
+ struct mailbox *mbox;
+ struct mailbox_channel *mgmt_chann;
+ struct async_events *async_events;
+
+ enum aie2_dev_status dev_status;
+ u32 hwctx_num;
+
+ struct amdxdna_async_error last_async_err;
+};
+
+#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
+ [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
+
+struct aie2_bar_off_pair {
+ int bar_idx;
+ u32 offset;
+};
+
+struct aie2_hw_ops {
+ int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
+enum aie2_fw_feature {
+ AIE2_NPU_COMMAND,
+ AIE2_PREEMPT,
+ AIE2_FEATURE_MAX
+};
+
+struct aie2_fw_feature_tbl {
+ enum aie2_fw_feature feature;
+ u32 max_minor;
+ u32 min_minor;
+};
+
+#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
+
+struct amdxdna_dev_priv {
+ const char *fw_path;
+ u64 protocol_major;
+ u64 protocol_minor;
+ const struct rt_config *rt_config;
+ const struct dpm_clk_freq *dpm_clk_tbl;
+ const struct aie2_fw_feature_tbl *fw_feature_tbl;
+
+#define COL_ALIGN_NONE 0
+#define COL_ALIGN_NATURE 1
+ u32 col_align;
+ u32 mbox_dev_addr;
+ /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
+ u32 mbox_size;
+ u32 hwctx_limit;
+ u32 sram_dev_addr;
+ struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
+ struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
+ struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
+ struct aie2_hw_ops hw_ops;
+};
+
+extern const struct amdxdna_dev_ops aie2_ops;
+
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
+
+/* aie2_smu.c */
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
+
+/* aie2_psp.c */
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
+int aie2_psp_start(struct psp_device *psp);
+void aie2_psp_stop(struct psp_device *psp);
+
+/* aie2_error.c */
+int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
+void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
+int aie2_error_async_msg_thread(void *data);
+int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_drm_get_array *args);
+
+/* aie2_message.c */
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
+int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
+int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
+int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
+int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
+int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
+int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
+ struct amdxdna_fw_ver *fw_ver);
+int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
+int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header);
+int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
+ void *handle, int (*cb)(void*, void __iomem *, size_t));
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t));
+
+/* aie2_hwctx.c */
+int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
+void aie2_hwctx_suspend(struct amdxdna_client *client);
+int aie2_hwctx_resume(struct amdxdna_client *client);
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+
+#endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE 0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+ if (ret)
+ return ret;
+
+ ndev->clk_gating = val;
+ return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ if (ndev->dev_status != AIE2_DEV_UNINIT) {
+ /* Resume device */
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+ ndev->max_dpm_level++;
+ ndev->max_dpm_level--;
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = POWER_MODE_DEFAULT;
+ ndev->dft_dpm_level = ndev->max_dpm_level;
+
+ return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 clk_gating, dpm_level;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (ndev->pw_mode == target)
+ return 0;
+
+ switch (target) {
+ case POWER_MODE_TURBO:
+ if (ndev->hwctx_num) {
+ XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+ return -EINVAL;
+ }
+
+ clk_gating = AIE2_CLK_GATING_DISABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_HIGH:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_DEFAULT:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->dft_dpm_level;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = target;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
new file mode 100644
index 000000000000..f28a060a8810
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+#define PSP_STATUS_READY BIT(31)
+
+/* PSP commands */
+#define PSP_VALIDATE 1
+#define PSP_START 2
+#define PSP_RELEASE_TMR 3
+
+/* PSP special arguments */
+#define PSP_START_COPY_FW 1
+
+/* PSP response error code */
+#define PSP_ERROR_CANCEL 0xFFFF0002
+#define PSP_ERROR_BAD_STATE 0xFFFF0007
+
+#define PSP_FW_ALIGN 0x10000
+#define PSP_POLL_INTERVAL 20000 /* us */
+#define PSP_POLL_TIMEOUT 1000000 /* us */
+
+#define PSP_REG(p, reg) ((p)->psp_regs[reg])
+
+struct psp_device {
+ struct drm_device *ddev;
+ struct psp_config conf;
+ u32 fw_buf_sz;
+ u64 fw_paddr;
+ void *fw_buffer;
+ void __iomem *psp_regs[PSP_MAX_REGS];
+};
+
+static int psp_exec(struct psp_device *psp, u32 *reg_vals)
+{
+ u32 resp_code;
+ int ret, i;
+ u32 ready;
+
+ /* Write command and argument registers */
+ for (i = 0; i < PSP_NUM_IN_REGS; i++)
+ writel(reg_vals[i], PSP_REG(psp, i));
+
+ /* clear and set PSP INTR register to kick off */
+ writel(0, PSP_REG(psp, PSP_INTR_REG));
+ writel(1, PSP_REG(psp, PSP_INTR_REG));
+
+ /* PSP should be busy. Wait for ready, so we know task is done. */
+ ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
+ FIELD_GET(PSP_STATUS_READY, ready),
+ PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
+ if (ret) {
+ drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
+ return ret;
+ }
+
+ resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
+ if (resp_code) {
+ drm_err(psp->ddev, "fw return error 0x%x", resp_code);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+void aie2_psp_stop(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
+ int ret;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret)
+ drm_err(psp->ddev, "release tmr failed, ret %d", ret);
+}
+
+int aie2_psp_start(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS];
+ int ret;
+
+ reg_vals[0] = PSP_VALIDATE;
+ reg_vals[1] = lower_32_bits(psp->fw_paddr);
+ reg_vals[2] = upper_32_bits(psp->fw_paddr);
+ reg_vals[3] = psp->fw_buf_sz;
+
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
+ return ret;
+ }
+
+ memset(reg_vals, 0, sizeof(reg_vals));
+ reg_vals[0] = PSP_START;
+ reg_vals[1] = PSP_START_COPY_FW;
+ ret = psp_exec(psp, reg_vals);
+ if (ret) {
+ drm_err(psp->ddev, "failed to start fw, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf)
+{
+ struct psp_device *psp;
+ u64 offset;
+
+ psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
+ if (!psp)
+ return NULL;
+
+ psp->ddev = ddev;
+ memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
+
+ psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
+ psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
+ if (!psp->fw_buffer) {
+ drm_err(ddev, "no memory for fw buffer");
+ return NULL;
+ }
+
+ /*
+ * AMD Platform Security Processor(PSP) requires host physical
+ * address to load NPU firmware.
+ */
+ psp->fw_paddr = virt_to_phys(psp->fw_buffer);
+ offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
+ psp->fw_paddr += offset;
+ memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
+
+ return psp;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
new file mode 100644
index 000000000000..bd94ee96c2bc
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iopoll.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
+
+#define SMU_RESULT_OK 1
+
+/* SMU commands */
+#define AIE2_SMU_POWER_ON 0x3
+#define AIE2_SMU_POWER_OFF 0x4
+#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
+#define AIE2_SMU_SET_HCLK_FREQ 0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
+
+#define NPU4_DPM_TOPS(ndev, dpm_level) \
+({ \
+ typeof(ndev) _ndev = ndev; \
+ (4096 * (_ndev)->total_col * \
+ (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
+})
+
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+ u32 reg_arg, u32 *out)
+{
+ u32 resp;
+ int ret;
+
+ writel(0, SMU_REG(ndev, SMU_RESP_REG));
+ writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG));
+ writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG));
+
+ /* Clear and set SMU_INTR_REG to kick off */
+ writel(0, SMU_REG(ndev, SMU_INTR_REG));
+ writel(1, SMU_REG(ndev, SMU_INTR_REG));
+
+ ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp,
+ resp, AIE2_INTERVAL, AIE2_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd);
+ return ret;
+ }
+
+ if (out)
+ *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
+ if (resp != SMU_RESULT_OK) {
+ XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ u32 freq;
+ int ret;
+
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
+ goto suspend_put;
+ }
+ ndev->npuclk_freq = freq;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
+ goto suspend_put;
+ }
+
+ amdxdna_pm_suspend_put(ndev->xdna);
+ ndev->hclk_freq = freq;
+ ndev->dpm_level = dpm_level;
+ ndev->max_tops = 2 * ndev->total_col;
+ ndev->curr_tops = ndev->max_tops * freq / 1028;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
+}
+
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+{
+ int ret;
+
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+ dpm_level, ret);
+ goto suspend_put;
+ }
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+ dpm_level, ret);
+ goto suspend_put;
+ }
+
+ amdxdna_pm_suspend_put(ndev->xdna);
+ ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+ ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+ ndev->dpm_level = dpm_level;
+ ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
+ ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ /*
+ * Failing to set power off indicates an unrecoverable hardware or
+ * firmware error.
+ */
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ndev->priv->hw_ops.set_dpm(ndev, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
+ if (ret)
+ XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
new file mode 100644
index 000000000000..2013d1f13aae
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+
+#include "aie2_solver.h"
+
+struct partition_node {
+ struct list_head list;
+ u32 nshared; /* # shared requests */
+ u32 start_col; /* start column */
+ u32 ncols; /* # columns */
+ bool exclusive; /* can not be shared if set */
+};
+
+struct solver_node {
+ struct list_head list;
+ u64 rid; /* Request ID from consumer */
+
+ struct partition_node *pt_node;
+ void *cb_arg;
+ u32 dpm_level;
+ u32 cols_len;
+ u32 start_cols[] __counted_by(cols_len);
+};
+
+struct solver_rgroup {
+ u32 rgid;
+ u32 nnode;
+ u32 npartition_node;
+
+ DECLARE_BITMAP(resbit, XRS_MAX_COL);
+ struct list_head node_list;
+ struct list_head pt_node_list;
+};
+
+struct solver_state {
+ struct solver_rgroup rgp;
+ struct init_config cfg;
+ struct xrs_action_ops *actions;
+};
+
+static u32 calculate_gops(struct aie_qos *rqos)
+{
+ u32 service_rate = 0;
+
+ if (rqos->latency)
+ service_rate = (1000 / rqos->latency);
+
+ if (rqos->fps > service_rate)
+ return rqos->fps * rqos->gops;
+
+ return service_rate * rqos->gops;
+}
+
+/*
+ * qos_meet() - Check the QOS request can be met.
+ */
+static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
+{
+ u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
+
+ if (request_gops <= cgops)
+ return 0;
+
+ return -EINVAL;
+}
+
+/*
+ * sanity_check() - Do a basic sanity check on allocation request.
+ */
+static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 cu_clk_freq;
+
+ if (cdop->ncols > xrs->cfg.total_col)
+ return -EINVAL;
+
+ /*
+ * We can find at least one CDOs groups that meet the
+ * GOPs requirement.
+ */
+ cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
+
+ if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+ /*
+ * gops is retrieved from the xmodel, so it's always set
+ * fps and latency are the configurable params from the application
+ */
+ if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
+ return true;
+
+ return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+ struct solver_rgroup *rgp = &xrs->rgp;
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 freq, max_dpm_level, level;
+ struct solver_node *node;
+
+ max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+ /* If no QoS parameters are passed, set it to the max DPM level */
+ if (!is_valid_qos_dpm_params(rqos)) {
+ level = max_dpm_level;
+ goto set_dpm;
+ }
+
+ /* Find one CDO group that meet the GOPs requirement. */
+ for (level = 0; level < max_dpm_level; level++) {
+ freq = xrs->cfg.clk_list.cu_clk_list[level];
+ if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+ break;
+ }
+
+ /* set the dpm level which fits all the sessions */
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->dpm_level > level)
+ level = node->dpm_level;
+ }
+
+set_dpm:
+ *dpm_level = level;
+ return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
+static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
+{
+ struct solver_node *node;
+
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->rid == rid)
+ return node;
+ }
+
+ return NULL;
+}
+
+static void remove_partition_node(struct solver_rgroup *rgp,
+ struct partition_node *pt_node)
+{
+ pt_node->nshared--;
+ if (pt_node->nshared > 0)
+ return;
+
+ list_del(&pt_node->list);
+ rgp->npartition_node--;
+
+ bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
+ kfree(pt_node);
+}
+
+static void remove_solver_node(struct solver_rgroup *rgp,
+ struct solver_node *node)
+{
+ list_del(&node->list);
+ rgp->nnode--;
+
+ if (node->pt_node)
+ remove_partition_node(rgp, node->pt_node);
+
+ kfree(node);
+}
+
+static int get_free_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node;
+ u32 ncols = req->cdo.ncols;
+ u32 col, i;
+
+ for (i = 0; i < snode->cols_len; i++) {
+ col = snode->start_cols[i];
+ if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
+ break;
+ }
+
+ if (i == snode->cols_len)
+ return -ENODEV;
+
+ pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
+ if (!pt_node)
+ return -ENOMEM;
+
+ pt_node->nshared = 1;
+ pt_node->start_col = col;
+ pt_node->ncols = ncols;
+
+ /*
+ * Always set exclusive to false for now.
+ */
+ pt_node->exclusive = false;
+
+ list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
+ xrs->rgp.npartition_node++;
+ bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
+
+ snode->pt_node = pt_node;
+
+ return 0;
+}
+
+static int allocate_partition(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct alloc_requests *req)
+{
+ struct partition_node *pt_node, *rpt_node = NULL;
+ int idx, ret;
+
+ ret = get_free_partition(xrs, snode, req);
+ if (!ret)
+ return ret;
+
+ /* try to get a share-able partition */
+ list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
+ if (pt_node->exclusive)
+ continue;
+
+ if (rpt_node && pt_node->nshared >= rpt_node->nshared)
+ continue;
+
+ for (idx = 0; idx < snode->cols_len; idx++) {
+ if (snode->start_cols[idx] != pt_node->start_col)
+ continue;
+
+ if (req->cdo.ncols != pt_node->ncols)
+ continue;
+
+ rpt_node = pt_node;
+ break;
+ }
+ }
+
+ if (!rpt_node)
+ return -ENODEV;
+
+ rpt_node->nshared++;
+ snode->pt_node = rpt_node;
+
+ return 0;
+}
+
+static struct solver_node *create_solver_node(struct solver_state *xrs,
+ struct alloc_requests *req)
+{
+ struct cdo_parts *cdop = &req->cdo;
+ struct solver_node *node;
+ int ret;
+
+ node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->rid = req->rid;
+ node->cols_len = cdop->cols_len;
+ memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
+
+ ret = allocate_partition(xrs, node, req);
+ if (ret)
+ goto free_node;
+
+ list_add_tail(&node->list, &xrs->rgp.node_list);
+ xrs->rgp.nnode++;
+ return node;
+
+free_node:
+ kfree(node);
+ return ERR_PTR(ret);
+}
+
+static void fill_load_action(struct solver_state *xrs,
+ struct solver_node *snode,
+ struct xrs_action_load *action)
+{
+ action->rid = snode->rid;
+ action->part.start_col = snode->pt_node->start_col;
+ action->part.ncols = snode->pt_node->ncols;
+}
+
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
+{
+ struct xrs_action_load load_act;
+ struct solver_node *snode;
+ struct solver_state *xrs;
+ u32 dpm_level;
+ int ret;
+
+ xrs = (struct solver_state *)hdl;
+
+ ret = sanity_check(xrs, req);
+ if (ret) {
+ drm_err(xrs->cfg.ddev, "invalid request");
+ return ret;
+ }
+
+ if (rg_search_node(&xrs->rgp, req->rid)) {
+ drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
+ return -EEXIST;
+ }
+
+ snode = create_solver_node(xrs, req);
+ if (IS_ERR(snode))
+ return PTR_ERR(snode);
+
+ fill_load_action(xrs, snode, &load_act);
+ ret = xrs->cfg.actions->load(cb_arg, &load_act);
+ if (ret)
+ goto free_node;
+
+ ret = set_dpm_level(xrs, req, &dpm_level);
+ if (ret)
+ goto free_node;
+
+ snode->dpm_level = dpm_level;
+ snode->cb_arg = cb_arg;
+
+ drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
+ snode->pt_node->start_col, snode->pt_node->ncols);
+
+ return 0;
+
+free_node:
+ remove_solver_node(&xrs->rgp, snode);
+
+ return ret;
+}
+
+int xrs_release_resource(void *hdl, u64 rid)
+{
+ struct solver_state *xrs = hdl;
+ struct solver_node *node;
+
+ node = rg_search_node(&xrs->rgp, rid);
+ if (!node) {
+ drm_err(xrs->cfg.ddev, "node not exist");
+ return -ENODEV;
+ }
+
+ xrs->cfg.actions->unload(node->cb_arg);
+ remove_solver_node(&xrs->rgp, node);
+
+ return 0;
+}
+
+void *xrsm_init(struct init_config *cfg)
+{
+ struct solver_rgroup *rgp;
+ struct solver_state *xrs;
+
+ xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
+ if (!xrs)
+ return NULL;
+
+ memcpy(&xrs->cfg, cfg, sizeof(*cfg));
+
+ rgp = &xrs->rgp;
+ INIT_LIST_HEAD(&rgp->node_list);
+ INIT_LIST_HEAD(&rgp->pt_node_list);
+
+ return xrs;
+}
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
new file mode 100644
index 000000000000..a2e3c52229e9
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_SOLVER_H
+#define _AIE2_SOLVER_H
+
+#define XRS_MAX_COL 128
+
+/*
+ * Structure used to describe a partition. A partition is column based
+ * allocation unit described by its start column and number of columns.
+ */
+struct aie_part {
+ u32 start_col;
+ u32 ncols;
+};
+
+/*
+ * The QoS capabilities of a given AIE partition.
+ */
+struct aie_qos_cap {
+ u32 opc; /* operations per cycle */
+ u32 dma_bw; /* DMA bandwidth */
+};
+
+/*
+ * QoS requirement of a resource allocation.
+ */
+struct aie_qos {
+ u32 gops; /* Giga operations */
+ u32 fps; /* Frames per second */
+ u32 dma_bw; /* DMA bandwidth */
+ u32 latency; /* Frame response latency */
+ u32 exec_time; /* Frame execution time */
+ u32 priority; /* Request priority */
+};
+
+/*
+ * Structure used to describe a relocatable CDO (Configuration Data Object).
+ */
+struct cdo_parts {
+ u32 *start_cols; /* Start column array */
+ u32 cols_len; /* Length of start column array */
+ u32 ncols; /* # of column */
+ struct aie_qos_cap qos_cap; /* CDO QoS capabilities */
+};
+
+/*
+ * Structure used to describe a request to allocate.
+ */
+struct alloc_requests {
+ u64 rid;
+ struct cdo_parts cdo;
+ struct aie_qos rqos; /* Requested QoS */
+};
+
+/*
+ * Load callback argument
+ */
+struct xrs_action_load {
+ u32 rid;
+ struct aie_part part;
+};
+
+/*
+ * Define the power level available
+ *
+ * POWER_LEVEL_MIN:
+ * Lowest power level. Usually set when all actions are unloaded.
+ *
+ * POWER_LEVEL_n
+ * Power levels 0 - n, is a step increase in system frequencies
+ */
+enum power_level {
+ POWER_LEVEL_MIN = 0x0,
+ POWER_LEVEL_0 = 0x1,
+ POWER_LEVEL_1 = 0x2,
+ POWER_LEVEL_2 = 0x3,
+ POWER_LEVEL_3 = 0x4,
+ POWER_LEVEL_4 = 0x5,
+ POWER_LEVEL_5 = 0x6,
+ POWER_LEVEL_6 = 0x7,
+ POWER_LEVEL_7 = 0x8,
+ POWER_LEVEL_NUM,
+};
+
+/*
+ * Structure used to describe the frequency table.
+ * Resource solver chooses the frequency from the table
+ * to meet the QOS requirements.
+ */
+struct clk_list_info {
+ u32 num_levels; /* available power levels */
+ u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/
+};
+
+struct xrs_action_ops {
+ int (*load)(void *cb_arg, struct xrs_action_load *action);
+ int (*unload)(void *cb_arg);
+ int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
+};
+
+/*
+ * Structure used to describe information for solver during initialization.
+ */
+struct init_config {
+ u32 total_col;
+ u32 sys_eff_factor; /* system efficiency factor */
+ u32 latency_adj; /* latency adjustment in ms */
+ struct clk_list_info clk_list; /* List of frequencies available in system */
+ struct drm_device *ddev;
+ struct xrs_action_ops *actions;
+};
+
+/*
+ * xrsm_init() - Register resource solver. Resource solver client needs
+ * to call this function to register itself.
+ *
+ * @cfg: The system metrics for resource solver to use
+ *
+ * Return: A resource solver handle
+ *
+ * Note: We should only create one handle per AIE array to be managed.
+ */
+void *xrsm_init(struct init_config *cfg);
+
+/*
+ * xrs_allocate_resource() - Request to allocate resources for a given context
+ * and a partition metadata. (See struct part_meta)
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @req: Input to the Resource solver including request id
+ * and partition metadata.
+ * @cb_arg: callback argument pointer
+ *
+ * Return: 0 when successful.
+ * Or standard error number when failing
+ *
+ * Note:
+ * There is no lock mechanism inside resource solver. So it is
+ * the caller's responsibility to lock down XCLBINs and grab
+ * necessary lock.
+ */
+int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg);
+
+/*
+ * xrs_release_resource() - Request to free resources for a given context.
+ *
+ * @hdl: Resource solver handle obtained from xrs_init()
+ * @rid: The Request ID to identify the requesting context
+ */
+int xrs_release_resource(void *hdl, u64 rid);
+#endif /* _AIE2_SOLVER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
new file mode 100644
index 000000000000..d17aef89a0ad
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/xarray.h>
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+#define MAX_HWCTX_ID 255
+#define MAX_ARG_COUNT 4095
+
+struct amdxdna_fence {
+ struct dma_fence base;
+ spinlock_t lock; /* for base */
+ struct amdxdna_hwctx *hwctx;
+};
+
+static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
+{
+ return KBUILD_MODNAME;
+}
+
+static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct amdxdna_fence *xdna_fence;
+
+ xdna_fence = container_of(fence, struct amdxdna_fence, base);
+
+ return xdna_fence->hwctx->name;
+}
+
+static const struct dma_fence_ops fence_ops = {
+ .get_driver_name = amdxdna_fence_get_driver_name,
+ .get_timeline_name = amdxdna_fence_get_timeline_name,
+};
+
+static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ fence->hwctx = hwctx;
+ spin_lock_init(&fence->lock);
+ dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
+ return &fence->base;
+}
+
+static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
+ struct srcu_struct *ss)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ synchronize_srcu(ss);
+
+ /* At this point, user is not able to submit new commands */
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+
+ kfree(hwctx->name);
+ kfree(hwctx);
+}
+
+int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
+ int (*walk)(struct amdxdna_hwctx *hwctx, void *arg))
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ int ret = 0, idx;
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ ret = walk(hwctx, arg);
+ if (ret)
+ break;
+ }
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+
+ return ret;
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, count;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ num_masks = 0;
+ else
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+
+ if (size) {
+ count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
+ if (unlikely(count <= num_masks)) {
+ *size = 0;
+ return NULL;
+ }
+ *size = (count - num_masks) * sizeof(u32);
+ }
+ return &cmd->data[num_masks];
+}
+
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, i;
+ u32 *cu_mask;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ return INVALID_CU_IDX;
+
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+ cu_mask = cmd->data;
+ for (i = 0; i < num_masks; i++) {
+ if (cu_mask[i])
+ return ffs(cu_mask[i]) - 1;
+ }
+
+ return INVALID_CU_IDX;
+}
+
+/*
+ * This should be called in close() and remove(). DO NOT call in other syscalls.
+ * This guarantee that when hwctx and resources will be released, if user
+ * doesn't call amdxdna_drm_destroy_hwctx_ioctl.
+ */
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ XDNA_DBG(client->xdna, "PID %d close HW context %d",
+ client->pid, hwctx->id);
+ xa_erase(&client->hwctx_xa, hwctx->id);
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+ }
+}
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_create_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
+ if (!hwctx)
+ return -ENOMEM;
+
+ if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
+ XDNA_ERR(xdna, "Access QoS info failed");
+ kfree(hwctx);
+ return -EFAULT;
+ }
+
+ hwctx->client = client;
+ hwctx->fw_ctx_id = -1;
+ hwctx->num_tiles = args->num_tiles;
+ hwctx->mem_size = args->mem_size;
+ hwctx->max_opc = args->max_opc;
+
+ guard(mutex)(&xdna->dev_lock);
+
+ if (!drm_dev_enter(dev, &idx)) {
+ ret = -ENODEV;
+ goto free_hwctx;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto dev_exit;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id);
+ if (!hwctx->name) {
+ ret = -ENOMEM;
+ goto fini_hwctx;
+ }
+
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+ goto free_name;
+ }
+
+ args->handle = hwctx->id;
+ args->syncobj_handle = hwctx->syncobj_hdl;
+
+ atomic64_set(&hwctx->job_submit_cnt, 0);
+ atomic64_set(&hwctx->job_free_cnt, 0);
+ XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
+ drm_dev_exit(idx);
+ return 0;
+
+free_name:
+ kfree(hwctx->name);
+fini_hwctx:
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+dev_exit:
+ drm_dev_exit(idx);
+free_hwctx:
+ kfree(hwctx);
+ return ret;
+}
+
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_destroy_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret = 0, idx;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
+
+ mutex_lock(&xdna->dev_lock);
+ hwctx = xa_erase(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ ret = -EINVAL;
+ XDNA_DBG(xdna, "PID %d HW context %d not exist",
+ client->pid, args->handle);
+ goto out;
+ }
+
+ /*
+ * The pushed jobs are handled by DRM scheduler during destroy.
+ * SRCU to synchronize with exec command ioctls.
+ */
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
+
+ XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
+out:
+ mutex_unlock(&xdna->dev_lock);
+ drm_dev_exit(idx);
+ return ret;
+}
+
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_config_hwctx *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+ u32 buf_size;
+ void *buf;
+ u64 val;
+
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
+ if (!xdna->dev_info->ops->hwctx_config)
+ return -EOPNOTSUPP;
+
+ val = args->param_val;
+ buf_size = args->param_val_size;
+
+ switch (args->param_type) {
+ case DRM_AMDXDNA_HWCTX_CONFIG_CU:
+ /* For those types that param_val is pointer */
+ if (buf_size > PAGE_SIZE) {
+ XDNA_ERR(xdna, "Config CU param buffer too large");
+ return -E2BIG;
+ }
+
+ /* Hwctx needs to keep buf */
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ break;
+ case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
+ case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
+ /* For those types that param_val is a value */
+ buf = NULL;
+ buf_size = 0;
+ break;
+ default:
+ XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
+ return -EINVAL;
+ }
+
+ mutex_lock(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, args->handle);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ mutex_unlock(&xdna->dev_lock);
+ kfree(buf);
+ return ret;
+}
+
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret, idx;
+
+ if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
+ return -EOPNOTSUPP;
+
+ gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl);
+ if (!gobj)
+ return -EINVAL;
+
+ abo = to_xdna_obj(gobj);
+ guard(mutex)(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
+ if (!hwctx) {
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
+
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
+static void
+amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
+{
+ int i;
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ if (!job->bos[i])
+ break;
+ drm_gem_object_put(job->bos[i]);
+ }
+}
+
+static int
+amdxdna_arg_bos_lookup(struct amdxdna_client *client,
+ struct amdxdna_sched_job *job,
+ u32 *bo_hdls, u32 bo_cnt)
+{
+ struct drm_gem_object *gobj;
+ int i, ret;
+
+ job->bo_cnt = bo_cnt;
+ for (i = 0; i < job->bo_cnt; i++) {
+ struct amdxdna_gem_obj *abo;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
+ if (!gobj) {
+ ret = -ENOENT;
+ goto put_shmem_bo;
+ }
+ abo = to_xdna_obj(gobj);
+
+ mutex_lock(&abo->lock);
+ if (abo->pinned) {
+ mutex_unlock(&abo->lock);
+ job->bos[i] = gobj;
+ continue;
+ }
+
+ ret = amdxdna_gem_pin_nolock(abo);
+ if (ret) {
+ mutex_unlock(&abo->lock);
+ drm_gem_object_put(gobj);
+ goto put_shmem_bo;
+ }
+ abo->pinned = true;
+ mutex_unlock(&abo->lock);
+
+ job->bos[i] = gobj;
+ }
+
+ return 0;
+
+put_shmem_bo:
+ amdxdna_arg_bos_put(job);
+ return ret;
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
+{
+ trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
+ amdxdna_arg_bos_put(job);
+ amdxdna_gem_put_obj(job->cmd_bo);
+ dma_fence_put(job->fence);
+}
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ struct amdxdna_drv_cmd *drv_cmd,
+ u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_sched_job *job;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
+ job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ job->drv_cmd = drv_cmd;
+
+ if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
+ job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
+ if (!job->cmd_bo) {
+ XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
+ ret = -EINVAL;
+ goto free_job;
+ }
+ }
+
+ ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
+ if (ret) {
+ XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
+ goto cmd_put;
+ }
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
+ client->pid, hwctx_hdl);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+
+ job->hwctx = hwctx;
+ job->mm = current->mm;
+
+ job->fence = amdxdna_fence_create(hwctx);
+ if (!job->fence) {
+ XDNA_ERR(xdna, "Failed to create fence");
+ ret = -ENOMEM;
+ goto unlock_srcu;
+ }
+ kref_init(&job->refcnt);
+
+ ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
+ if (ret)
+ goto put_fence;
+
+ /*
+ * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
+ * resource after synchronize_srcu(). The submitted jobs should be
+ * handled by the queue, for example DRM scheduler, in device layer.
+ * For here we can unlock SRCU.
+ */
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
+
+ return 0;
+
+put_fence:
+ dma_fence_put(job->fence);
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ amdxdna_arg_bos_put(job);
+cmd_put:
+ amdxdna_gem_put_obj(job->cmd_bo);
+free_job:
+ kfree(job);
+ return ret;
+}
+
+/*
+ * The submit command ioctl submits a command to firmware. One firmware command
+ * may contain multiple command BOs for processing as a whole.
+ * The command sequence number is returned which can be used for wait command ioctl.
+ */
+static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
+ struct amdxdna_drm_exec_cmd *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ u32 *arg_bo_hdls = NULL;
+ u32 cmd_bo_hdl;
+ int ret;
+
+ if (args->arg_count > MAX_ARG_COUNT) {
+ XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
+ return -EINVAL;
+ }
+
+ /* Only support single command for now. */
+ if (args->cmd_count != 1) {
+ XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
+ return -EINVAL;
+ }
+
+ cmd_bo_hdl = (u32)args->cmd_handles;
+ if (args->arg_count) {
+ arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+ if (!arg_bo_hdls)
+ return -ENOMEM;
+ ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+ args->arg_count * sizeof(u32));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_cmd_bo_hdls;
+ }
+ }
+
+ ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls,
+ args->arg_count, args->hwctx, &args->seq);
+ if (ret)
+ XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
+
+free_cmd_bo_hdls:
+ kfree(arg_bo_hdls);
+ if (!ret)
+ XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
+ return ret;
+}
+
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_exec_cmd *args = data;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ switch (args->type) {
+ case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
+ return amdxdna_drm_submit_execbuf(client, args);
+ }
+
+ XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
+ return -EINVAL;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
new file mode 100644
index 000000000000..b6151244d64f
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_CTX_H_
+#define _AMDXDNA_CTX_H_
+
+#include <linux/bitfield.h>
+
+#include "amdxdna_gem.h"
+
+struct amdxdna_hwctx_priv;
+
+enum ert_cmd_opcode {
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+ ERT_START_NPU_PREEMPT = 21,
+ ERT_START_NPU_PREEMPT_ELF = 22,
+ ERT_INVALID_CMD = ~0U,
+};
+
+enum ert_cmd_state {
+ ERT_CMD_STATE_INVALID,
+ ERT_CMD_STATE_NEW,
+ ERT_CMD_STATE_QUEUED,
+ ERT_CMD_STATE_RUNNING,
+ ERT_CMD_STATE_COMPLETED,
+ ERT_CMD_STATE_ERROR,
+ ERT_CMD_STATE_ABORT,
+ ERT_CMD_STATE_SUBMITTED,
+ ERT_CMD_STATE_TIMEOUT,
+ ERT_CMD_STATE_NORESPONSE,
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_start_npu {
+ u64 buffer; /* instruction buffer address */
+ u32 buffer_size; /* size of buffer in bytes */
+ u32 prop_count; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
+ */
+struct amdxdna_cmd_chain {
+ u32 command_count;
+ u32 submit_index;
+ u32 error_index;
+ u32 reserved[3];
+ u64 data[] __counted_by(command_count);
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_preempt_data {
+ u64 inst_buf; /* instruction buffer address */
+ u64 save_buf; /* save buffer address */
+ u64 restore_buf; /* restore buffer address */
+ u32 inst_size; /* size of instruction buffer in bytes */
+ u32 save_size; /* size of save buffer in bytes */
+ u32 restore_size; /* size of restore buffer in bytes */
+ u32 inst_prop_cnt; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
+/* Exec buffer command header format */
+#define AMDXDNA_CMD_STATE GENMASK(3, 0)
+#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
+#define AMDXDNA_CMD_COUNT GENMASK(22, 12)
+#define AMDXDNA_CMD_OPCODE GENMASK(27, 23)
+struct amdxdna_cmd {
+ u32 header;
+ u32 data[];
+};
+
+#define INVALID_CU_IDX (~0U)
+
+struct amdxdna_hwctx {
+ struct amdxdna_client *client;
+ struct amdxdna_hwctx_priv *priv;
+ char *name;
+
+ u32 id;
+ u32 max_opc;
+ u32 num_tiles;
+ u32 mem_size;
+ u32 fw_ctx_id;
+ u32 col_list_len;
+ u32 *col_list;
+ u32 start_col;
+ u32 num_col;
+#define HWCTX_STAT_INIT 0
+#define HWCTX_STAT_READY 1
+#define HWCTX_STAT_STOP 2
+ u32 status;
+ u32 old_status;
+
+ struct amdxdna_qos_info qos;
+ struct amdxdna_hwctx_param_config_cu *cus;
+ u32 syncobj_hdl;
+
+ atomic64_t job_submit_cnt;
+ atomic64_t job_free_cnt ____cacheline_aligned_in_smp;
+};
+
+#define drm_job_to_xdna_job(j) \
+ container_of(j, struct amdxdna_sched_job, base)
+
+enum amdxdna_job_opcode {
+ SYNC_DEBUG_BO,
+ ATTACH_DEBUG_BO,
+ DETACH_DEBUG_BO,
+};
+
+struct amdxdna_drv_cmd {
+ enum amdxdna_job_opcode opcode;
+ u32 result;
+};
+
+struct amdxdna_sched_job {
+ struct drm_sched_job base;
+ struct kref refcnt;
+ struct amdxdna_hwctx *hwctx;
+ struct mm_struct *mm;
+ /* The fence to notice DRM scheduler that job is done by hardware */
+ struct dma_fence *fence;
+ /* user can wait on this fence */
+ struct dma_fence *out_fence;
+ bool job_done;
+ bool job_timeout;
+ u64 seq;
+ struct amdxdna_drv_cmd *drv_cmd;
+ struct amdxdna_gem_obj *cmd_bo;
+ size_t bo_cnt;
+ struct drm_gem_object *bos[] __counted_by(bo_cnt);
+};
+
+static inline u32
+amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
+}
+
+static inline void
+amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ cmd->header &= ~AMDXDNA_CMD_STATE;
+ cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
+}
+
+static inline enum ert_cmd_state
+amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
+void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
+int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
+ int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
+int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl);
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls,
+ u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq);
+
+int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
+ u64 seq, u32 timeout);
+
+int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h
new file mode 100644
index 000000000000..c51de86ec12b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_error.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_ERROR_H_
+#define _AMDXDNA_ERROR_H_
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define AMDXDNA_ERR_DRV_AIE 4
+#define AMDXDNA_ERR_SEV_CRITICAL 3
+#define AMDXDNA_ERR_CLASS_AIE 2
+
+#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0)
+#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16)
+#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24)
+#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32)
+#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40)
+
+enum amdxdna_error_num {
+ AMDXDNA_ERROR_NUM_AIE_SATURATION = 3,
+ AMDXDNA_ERROR_NUM_AIE_FP,
+ AMDXDNA_ERROR_NUM_AIE_STREAM,
+ AMDXDNA_ERROR_NUM_AIE_ACCESS,
+ AMDXDNA_ERROR_NUM_AIE_BUS,
+ AMDXDNA_ERROR_NUM_AIE_INSTRUCTION,
+ AMDXDNA_ERROR_NUM_AIE_ECC,
+ AMDXDNA_ERROR_NUM_AIE_LOCK,
+ AMDXDNA_ERROR_NUM_AIE_DMA,
+ AMDXDNA_ERROR_NUM_AIE_MEM_PARITY,
+ AMDXDNA_ERROR_NUM_UNKNOWN = 15,
+};
+
+enum amdxdna_error_module {
+ AMDXDNA_ERROR_MODULE_AIE_CORE = 3,
+ AMDXDNA_ERROR_MODULE_AIE_MEMORY,
+ AMDXDNA_ERROR_MODULE_AIE_SHIM,
+ AMDXDNA_ERROR_MODULE_AIE_NOC,
+ AMDXDNA_ERROR_MODULE_AIE_PL,
+ AMDXDNA_ERROR_MODULE_UNKNOWN = 8,
+};
+
+#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \
+ (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \
+ FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \
+ FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE))
+
+#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0)
+#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8)
+
+#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \
+ (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \
+ FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row))
+
+#endif /* _AMDXDNA_ERROR_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
new file mode 100644
index 000000000000..dfa916eeb2d9
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -0,0 +1,972 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direct.h>
+#include <linux/iosys-map.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_ubuf.h"
+
+#define XDNA_MAX_CMD_BO_SIZE SZ_32K
+
+MODULE_IMPORT_NS("DMA_BUF");
+
+static int
+amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_client *client = abo->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_mem *mem = &abo->mem;
+ struct amdxdna_gem_obj *heap;
+ u64 offset;
+ u32 align;
+ int ret;
+
+ mutex_lock(&client->mm_lock);
+
+ heap = client->dev_heap;
+ if (!heap) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
+ if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
+ XDNA_ERR(xdna, "Invalid dev heap userptr");
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
+ if (mem->size == 0 || mem->size > heap->mem.size) {
+ XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx",
+ mem->size, heap->mem.size);
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
+ align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
+ ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node,
+ mem->size, align,
+ 0, DRM_MM_INSERT_BEST);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ goto unlock_out;
+ }
+
+ mem->dev_addr = abo->mm_node.start;
+ offset = mem->dev_addr - heap->mem.dev_addr;
+ mem->userptr = heap->mem.userptr + offset;
+ mem->kva = heap->mem.kva + offset;
+
+ drm_gem_object_get(to_gobj(heap));
+
+unlock_out:
+ mutex_unlock(&client->mm_lock);
+
+ return ret;
+}
+
+static void
+amdxdna_gem_destroy_obj(struct amdxdna_gem_obj *abo)
+{
+ mutex_destroy(&abo->lock);
+ kfree(abo);
+}
+
+static void
+amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_gem_obj *heap;
+
+ mutex_lock(&abo->client->mm_lock);
+
+ drm_mm_remove_node(&abo->mm_node);
+
+ heap = abo->client->dev_heap;
+ drm_gem_object_put(to_gobj(heap));
+
+ mutex_unlock(&abo->client->mm_lock);
+}
+
+static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier);
+ struct amdxdna_gem_obj *abo = mapp->abo;
+ struct amdxdna_dev *xdna;
+
+ xdna = to_xdna_dev(to_gobj(abo)->dev);
+ XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d",
+ mapp->vma->vm_start, mapp->vma->vm_end, abo->type);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ down_write(&xdna->notifier_lock);
+ abo->mem.map_invalid = true;
+ mapp->invalid = true;
+ mmu_interval_set_seq(&mapp->notifier, cur_seq);
+ up_write(&xdna->notifier_lock);
+
+ xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+
+ if (range->event == MMU_NOTIFY_UNMAP) {
+ down_write(&xdna->notifier_lock);
+ if (!mapp->unmapped) {
+ queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+ mapp->unmapped = true;
+ }
+ up_write(&xdna->notifier_lock);
+ }
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
+ .invalidate = amdxdna_hmm_invalidate,
+};
+
+static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct amdxdna_umap *mapp;
+
+ down_read(&xdna->notifier_lock);
+ list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+ if (!vma || mapp->vma == vma) {
+ if (!mapp->unmapped) {
+ queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+ mapp->unmapped = true;
+ }
+ if (vma)
+ break;
+ }
+ }
+ up_read(&xdna->notifier_lock);
+}
+
+static void amdxdna_umap_release(struct kref *ref)
+{
+ struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt);
+ struct vm_area_struct *vma = mapp->vma;
+ struct amdxdna_dev *xdna;
+
+ mmu_interval_notifier_remove(&mapp->notifier);
+ if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping)
+ mapping_clear_unevictable(vma->vm_file->f_mapping);
+
+ xdna = to_xdna_dev(to_gobj(mapp->abo)->dev);
+ down_write(&xdna->notifier_lock);
+ list_del(&mapp->node);
+ up_write(&xdna->notifier_lock);
+
+ kvfree(mapp->range.hmm_pfns);
+ kfree(mapp);
+}
+
+void amdxdna_umap_put(struct amdxdna_umap *mapp)
+{
+ kref_put(&mapp->refcnt, amdxdna_umap_release);
+}
+
+static void amdxdna_hmm_unreg_work(struct work_struct *work)
+{
+ struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap,
+ hmm_unreg_work);
+
+ amdxdna_umap_put(mapp);
+}
+
+static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ unsigned long len = vma->vm_end - vma->vm_start;
+ unsigned long addr = vma->vm_start;
+ struct amdxdna_umap *mapp;
+ u32 nr_pages;
+ int ret;
+
+ if (!xdna->dev_info->ops->hmm_invalidate)
+ return 0;
+
+ mapp = kzalloc(sizeof(*mapp), GFP_KERNEL);
+ if (!mapp)
+ return -ENOMEM;
+
+ nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
+ mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns),
+ GFP_KERNEL);
+ if (!mapp->range.hmm_pfns) {
+ ret = -ENOMEM;
+ goto free_map;
+ }
+
+ ret = mmu_interval_notifier_insert_locked(&mapp->notifier,
+ current->mm,
+ addr,
+ len,
+ &amdxdna_hmm_ops);
+ if (ret) {
+ XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
+ goto free_pfns;
+ }
+
+ mapp->range.notifier = &mapp->notifier;
+ mapp->range.start = vma->vm_start;
+ mapp->range.end = vma->vm_end;
+ mapp->range.default_flags = HMM_PFN_REQ_FAULT;
+ mapp->vma = vma;
+ mapp->abo = abo;
+ kref_init(&mapp->refcnt);
+
+ if (abo->mem.userptr == AMDXDNA_INVALID_ADDR)
+ abo->mem.userptr = addr;
+ INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work);
+ if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping)
+ mapping_set_unevictable(vma->vm_file->f_mapping);
+
+ down_write(&xdna->notifier_lock);
+ list_add_tail(&mapp->node, &abo->mem.umap_list);
+ up_write(&xdna->notifier_lock);
+
+ return 0;
+
+free_pfns:
+ kvfree(mapp->range.hmm_pfns);
+free_map:
+ kfree(mapp);
+ return ret;
+}
+
+static void amdxdna_gem_dev_obj_free(struct drm_gem_object *gobj)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
+
+ amdxdna_gem_heap_free(abo);
+ drm_gem_object_release(gobj);
+ amdxdna_gem_destroy_obj(abo);
+}
+
+static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ unsigned long num_pages = vma_pages(vma);
+ unsigned long offset = 0;
+ int ret;
+
+ if (!is_import_bo(abo)) {
+ ret = drm_gem_shmem_mmap(&abo->base, vma);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed shmem mmap %d", ret);
+ return ret;
+ }
+
+ /* The buffer is based on memory pages. Fix the flag. */
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+ &num_pages);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed insert pages %d", ret);
+ vma->vm_ops->close(vma);
+ return ret;
+ }
+
+ return 0;
+ }
+
+ vma->vm_private_data = NULL;
+ vma->vm_ops = NULL;
+ ret = dma_buf_mmap(abo->dma_buf, vma, 0);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret);
+ return ret;
+ }
+
+ do {
+ vm_fault_t fault_ret;
+
+ fault_ret = handle_mm_fault(vma, vma->vm_start + offset,
+ FAULT_FLAG_WRITE, NULL);
+ if (fault_ret & VM_FAULT_ERROR) {
+ vma->vm_ops->close(vma);
+ XDNA_ERR(xdna, "Fault in page failed");
+ return -EFAULT;
+ }
+
+ offset += PAGE_SIZE;
+ } while (--num_pages);
+
+ /* Drop the reference drm_gem_mmap_obj() acquired.*/
+ drm_gem_object_put(to_gobj(abo));
+
+ return 0;
+}
+
+static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
+ struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ int ret;
+
+ ret = amdxdna_hmm_register(abo, vma);
+ if (ret)
+ return ret;
+
+ ret = amdxdna_insert_pages(abo, vma);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed insert pages, ret %d", ret);
+ goto hmm_unreg;
+ }
+
+ XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx",
+ drm_vma_node_offset_addr(&gobj->vma_node), abo->type,
+ vma->vm_start, gobj->size);
+ return 0;
+
+hmm_unreg:
+ amdxdna_hmm_unregister(abo, vma);
+ return ret;
+}
+
+static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+ struct drm_gem_object *gobj = dma_buf->priv;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ unsigned long num_pages = vma_pages(vma);
+ int ret;
+
+ vma->vm_ops = &drm_gem_shmem_vm_ops;
+ vma->vm_private_data = gobj;
+
+ drm_gem_object_get(gobj);
+ ret = drm_gem_shmem_mmap(&abo->base, vma);
+ if (ret)
+ goto put_obj;
+
+ /* The buffer is based on memory pages. Fix the flag. */
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+ ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+ &num_pages);
+ if (ret)
+ goto close_vma;
+
+ return 0;
+
+close_vma:
+ vma->vm_ops->close(vma);
+put_obj:
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
+static const struct dma_buf_ops amdxdna_dmabuf_ops = {
+ .attach = drm_gem_map_attach,
+ .detach = drm_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = drm_gem_dmabuf_release,
+ .mmap = amdxdna_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
+static int amdxdna_gem_obj_vmap(struct amdxdna_gem_obj *abo, void **vaddr)
+{
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
+ int ret;
+
+ if (is_import_bo(abo))
+ ret = dma_buf_vmap_unlocked(abo->dma_buf, &map);
+ else
+ ret = drm_gem_vmap(to_gobj(abo), &map);
+
+ *vaddr = map.vaddr;
+ return ret;
+}
+
+static void amdxdna_gem_obj_vunmap(struct amdxdna_gem_obj *abo)
+{
+ struct iosys_map map;
+
+ if (!abo->mem.kva)
+ return;
+
+ iosys_map_set_vaddr(&map, abo->mem.kva);
+
+ if (is_import_bo(abo))
+ dma_buf_vunmap_unlocked(abo->dma_buf, &map);
+ else
+ drm_gem_vunmap(to_gobj(abo), &map);
+}
+
+static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
+{
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+ if (abo->dma_buf) {
+ get_dma_buf(abo->dma_buf);
+ return abo->dma_buf;
+ }
+
+ exp_info.ops = &amdxdna_dmabuf_ops;
+ exp_info.size = gobj->size;
+ exp_info.flags = flags;
+ exp_info.priv = gobj;
+ exp_info.resv = gobj->resv;
+
+ return drm_gem_dmabuf_export(gobj->dev, &exp_info);
+}
+
+static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
+{
+ dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL);
+ dma_buf_detach(abo->dma_buf, abo->attach);
+ dma_buf_put(abo->dma_buf);
+ drm_gem_object_release(to_gobj(abo));
+ kfree(abo);
+}
+
+static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+
+ amdxdna_hmm_unregister(abo, NULL);
+ flush_workqueue(xdna->notifier_wq);
+
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
+
+ if (abo->type == AMDXDNA_BO_DEV_HEAP)
+ drm_mm_takedown(&abo->mm);
+
+ amdxdna_gem_obj_vunmap(abo);
+ mutex_destroy(&abo->lock);
+
+ if (is_import_bo(abo)) {
+ amdxdna_imported_obj_free(abo);
+ return;
+ }
+
+ drm_gem_shmem_free(&abo->base);
+}
+
+static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
+ .free = amdxdna_gem_dev_obj_free,
+};
+
+static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
+ .free = amdxdna_gem_obj_free,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = amdxdna_gem_obj_mmap,
+ .vm_ops = &drm_gem_shmem_vm_ops,
+ .export = amdxdna_gem_prime_export,
+};
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = kzalloc(sizeof(*abo), GFP_KERNEL);
+ if (!abo)
+ return ERR_PTR(-ENOMEM);
+
+ abo->pinned = false;
+ abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
+ mutex_init(&abo->lock);
+
+ abo->mem.userptr = AMDXDNA_INVALID_ADDR;
+ abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
+ abo->mem.size = size;
+ INIT_LIST_HEAD(&abo->mem.umap_list);
+
+ return abo;
+}
+
+/* For drm_driver->gem_create_object callback */
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
+{
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_create_obj(dev, size);
+ if (IS_ERR(abo))
+ return ERR_CAST(abo);
+
+ to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs;
+
+ return to_gobj(abo);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_shmem_object(struct drm_device *dev, size_t size)
+{
+ struct drm_gem_shmem_object *shmem = drm_gem_shmem_create(dev, size);
+
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
+
+ shmem->map_wc = false;
+ return to_xdna_obj(&shmem->base);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ enum amdxdna_ubuf_flag flags = 0;
+ struct amdxdna_drm_va_tbl va_tbl;
+ struct drm_gem_object *gobj;
+ struct dma_buf *dma_buf;
+
+ if (copy_from_user(&va_tbl, u64_to_user_ptr(args->vaddr), sizeof(va_tbl))) {
+ XDNA_DBG(xdna, "Access va table failed");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (va_tbl.num_entries) {
+ if (args->type == AMDXDNA_BO_CMD)
+ flags |= AMDXDNA_UBUF_FLAG_MAP_DMA;
+
+ dma_buf = amdxdna_get_ubuf(dev, flags, va_tbl.num_entries,
+ u64_to_user_ptr(args->vaddr + sizeof(va_tbl)));
+ } else {
+ dma_buf = dma_buf_get(va_tbl.dmabuf_fd);
+ }
+
+ if (IS_ERR(dma_buf))
+ return ERR_CAST(dma_buf);
+
+ gobj = amdxdna_gem_prime_import(dev, dma_buf);
+ if (IS_ERR(gobj)) {
+ dma_buf_put(dma_buf);
+ return ERR_CAST(gobj);
+ }
+
+ dma_buf_put(dma_buf);
+
+ return to_xdna_obj(gobj);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_object(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args)
+{
+ size_t aligned_sz = PAGE_ALIGN(args->size);
+
+ if (args->vaddr)
+ return amdxdna_gem_create_ubuf_object(dev, args);
+
+ return amdxdna_gem_create_shmem_object(dev, aligned_sz);
+}
+
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+ struct dma_buf_attachment *attach;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ struct sg_table *sgt;
+ int ret;
+
+ get_dma_buf(dma_buf);
+
+ attach = dma_buf_attach(dma_buf, dev->dev);
+ if (IS_ERR(attach)) {
+ ret = PTR_ERR(attach);
+ goto put_buf;
+ }
+
+ sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto fail_detach;
+ }
+
+ gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+ if (IS_ERR(gobj)) {
+ ret = PTR_ERR(gobj);
+ goto fail_unmap;
+ }
+
+ abo = to_xdna_obj(gobj);
+ abo->attach = attach;
+ abo->dma_buf = dma_buf;
+
+ return gobj;
+
+fail_unmap:
+ dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
+fail_detach:
+ dma_buf_detach(dma_buf, attach);
+put_buf:
+ dma_buf_put(dma_buf);
+
+ return ERR_PTR(ret);
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_alloc_shmem(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_create_object(dev, args);
+ if (IS_ERR(abo))
+ return ERR_CAST(abo);
+
+ abo->client = client;
+ abo->type = AMDXDNA_BO_SHMEM;
+
+ return abo;
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_dev_heap(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->size > xdna->dev_info->dev_mem_size) {
+ XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx",
+ args->size, xdna->dev_info->dev_mem_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mutex_lock(&client->mm_lock);
+ if (client->dev_heap) {
+ XDNA_DBG(client->xdna, "dev heap is already created");
+ ret = -EBUSY;
+ goto mm_unlock;
+ }
+
+ abo = amdxdna_gem_create_object(dev, args);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto mm_unlock;
+ }
+
+ abo->type = AMDXDNA_BO_DEV_HEAP;
+ abo->client = client;
+ abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
+ drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
+
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret);
+ goto release_obj;
+ }
+
+ client->dev_heap = abo;
+ drm_gem_object_get(to_gobj(abo));
+ mutex_unlock(&client->mm_lock);
+
+ return abo;
+
+release_obj:
+ drm_gem_object_put(to_gobj(abo));
+mm_unlock:
+ mutex_unlock(&client->mm_lock);
+ return ERR_PTR(ret);
+}
+
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ size_t aligned_sz = PAGE_ALIGN(args->size);
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
+ if (IS_ERR(abo))
+ return abo;
+
+ to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
+ abo->type = AMDXDNA_BO_DEV;
+ abo->client = client;
+
+ ret = amdxdna_gem_heap_alloc(abo);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
+ amdxdna_gem_destroy_obj(abo);
+ return ERR_PTR(ret);
+ }
+
+ drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
+
+ return abo;
+}
+
+static struct amdxdna_gem_obj *
+amdxdna_drm_create_cmd_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->size > XDNA_MAX_CMD_BO_SIZE) {
+ XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (args->size < sizeof(struct amdxdna_cmd)) {
+ XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ abo = amdxdna_gem_create_object(dev, args);
+ if (IS_ERR(abo))
+ return ERR_CAST(abo);
+
+ abo->type = AMDXDNA_BO_CMD;
+ abo->client = filp->driver_priv;
+
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
+ goto release_obj;
+ }
+
+ return abo;
+
+release_obj:
+ drm_gem_object_put(to_gobj(abo));
+ return ERR_PTR(ret);
+}
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_create_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ int ret;
+
+ if (args->flags)
+ return -EINVAL;
+
+ XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx",
+ args->type, args->vaddr, args->size, args->flags);
+ switch (args->type) {
+ case AMDXDNA_BO_SHMEM:
+ abo = amdxdna_drm_alloc_shmem(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV_HEAP:
+ abo = amdxdna_drm_create_dev_heap(dev, args, filp);
+ break;
+ case AMDXDNA_BO_DEV:
+ abo = amdxdna_drm_alloc_dev_bo(dev, args, filp);
+ break;
+ case AMDXDNA_BO_CMD:
+ abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (IS_ERR(abo))
+ return PTR_ERR(abo);
+
+ /* ready to publish object to userspace */
+ ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
+ if (ret) {
+ XDNA_ERR(xdna, "Create handle failed");
+ goto put_obj;
+ }
+
+ XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx",
+ args->handle, args->type, abo->mem.userptr,
+ abo->mem.dev_addr, abo->mem.size);
+put_obj:
+ /* Dereference object reference. Handle holds it now. */
+ drm_gem_object_put(to_gobj(abo));
+ return ret;
+}
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ int ret;
+
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->client->dev_heap;
+
+ if (is_import_bo(abo))
+ return 0;
+
+ ret = drm_gem_shmem_pin(&abo->base);
+
+ XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
+ return ret;
+}
+
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
+{
+ int ret;
+
+ mutex_lock(&abo->lock);
+ ret = amdxdna_gem_pin_nolock(abo);
+ mutex_unlock(&abo->lock);
+
+ return ret;
+}
+
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
+{
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->client->dev_heap;
+
+ if (is_import_bo(abo))
+ return;
+
+ mutex_lock(&abo->lock);
+ drm_gem_shmem_unpin(&abo->base);
+ mutex_unlock(&abo->lock);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdl);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Can not find bo %d", bo_hdl);
+ return NULL;
+ }
+
+ abo = to_xdna_obj(gobj);
+ if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type)
+ return abo;
+
+ drm_gem_object_put(gobj);
+ return NULL;
+}
+
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_drm_get_bo_info *args = data;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret = 0;
+
+ if (args->ext || args->ext_flags || args->pad)
+ return -EINVAL;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle);
+ return -ENOENT;
+ }
+
+ abo = to_xdna_obj(gobj);
+ args->vaddr = abo->mem.userptr;
+ args->xdna_addr = abo->mem.dev_addr;
+
+ if (abo->type != AMDXDNA_BO_DEV)
+ args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node);
+ else
+ args->map_offset = AMDXDNA_INVALID_ADDR;
+
+ XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx",
+ args->handle, args->map_offset, args->vaddr, args->xdna_addr);
+
+ drm_gem_object_put(gobj);
+ return ret;
+}
+
+/*
+ * The sync bo ioctl is to make sure the CPU cache is in sync with memory.
+ * This is required because NPU is not cache coherent device. CPU cache
+ * flushing/invalidation is expensive so it is best to handle this outside
+ * of the command submission path. This ioctl allows explicit cache
+ * flushing/invalidation outside of the critical path.
+ */
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_sync_bo *args = data;
+ struct amdxdna_gem_obj *abo;
+ struct drm_gem_object *gobj;
+ int ret;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj) {
+ XDNA_ERR(xdna, "Lookup GEM object failed");
+ return -ENOENT;
+ }
+ abo = to_xdna_obj(gobj);
+
+ ret = amdxdna_gem_pin(abo);
+ if (ret) {
+ XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret);
+ goto put_obj;
+ }
+
+ if (is_import_bo(abo))
+ drm_clflush_sg(abo->base.sgt);
+ else if (abo->mem.kva)
+ drm_clflush_virt_range(abo->mem.kva + args->offset, args->size);
+ else if (abo->base.pages)
+ drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
+ else
+ drm_WARN(&xdna->ddev, 1, "Can not get flush memory");
+
+ amdxdna_gem_unpin(abo);
+
+ XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
+ args->handle, args->offset, args->size);
+
+ if (args->direction == SYNC_DIRECT_FROM_DEVICE)
+ ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle);
+
+put_obj:
+ drm_gem_object_put(gobj);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
new file mode 100644
index 000000000000..f79fc7f3c93b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_GEM_H_
+#define _AMDXDNA_GEM_H_
+
+#include <linux/hmm.h>
+#include "amdxdna_pci_drv.h"
+
+struct amdxdna_umap {
+ struct vm_area_struct *vma;
+ struct mmu_interval_notifier notifier;
+ struct hmm_range range;
+ struct work_struct hmm_unreg_work;
+ struct amdxdna_gem_obj *abo;
+ struct list_head node;
+ struct kref refcnt;
+ bool invalid;
+ bool unmapped;
+};
+
+struct amdxdna_mem {
+ u64 userptr;
+ void *kva;
+ u64 dev_addr;
+ size_t size;
+ struct page **pages;
+ u32 nr_pages;
+ struct list_head umap_list;
+ bool map_invalid;
+};
+
+struct amdxdna_gem_obj {
+ struct drm_gem_shmem_object base;
+ struct amdxdna_client *client;
+ u8 type;
+ bool pinned;
+ struct mutex lock; /* Protects: pinned */
+ struct amdxdna_mem mem;
+
+ /* Below members is uninitialized when needed */
+ struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
+ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
+ u32 assigned_hwctx;
+ struct dma_buf *dma_buf;
+ struct dma_buf_attachment *attach;
+};
+
+#define to_gobj(obj) (&(obj)->base.base)
+#define is_import_bo(obj) ((obj)->attach)
+
+static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
+{
+ return container_of(gobj, struct amdxdna_gem_obj, base.base);
+}
+
+struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
+ u32 bo_hdl, u8 bo_type);
+static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
+{
+ drm_gem_object_put(to_gobj(abo));
+}
+
+static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo)
+{
+ return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr;
+}
+
+void amdxdna_umap_put(struct amdxdna_umap *mapp);
+
+struct drm_gem_object *
+amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
+struct amdxdna_gem_obj *
+amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
+ struct amdxdna_drm_create_bo *args,
+ struct drm_file *filp);
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
+int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
+void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
+
+int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+#endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
new file mode 100644
index 000000000000..858df97cd3fb
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/amdxdna.h>
+
+#include "amdxdna_mailbox.h"
+
+#define MB_ERR(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_DBG(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+#define MB_WARN_ONCE(chann, fmt, args...) \
+({ \
+ typeof(chann) _chann = chann; \
+ dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
+ (_chann)->msix_irq, ##args); \
+})
+
+#define MAGIC_VAL 0x1D000000U
+#define MAGIC_VAL_MASK 0xFF000000
+#define MAX_MSG_ID_ENTRIES 256
+#define MSG_RX_TIMER 200 /* milliseconds */
+#define MAILBOX_NAME "xdna_mailbox"
+
+enum channel_res_type {
+ CHAN_RES_X2I,
+ CHAN_RES_I2X,
+ CHAN_RES_NUM
+};
+
+struct mailbox {
+ struct device *dev;
+ struct xdna_mailbox_res res;
+};
+
+struct mailbox_channel {
+ struct mailbox *mb;
+ struct xdna_mailbox_chann_res res[CHAN_RES_NUM];
+ int msix_irq;
+ u32 iohub_int_addr;
+ struct xarray chan_xa;
+ u32 next_msgid;
+ u32 x2i_tail;
+
+ /* Received msg related fields */
+ struct workqueue_struct *work_q;
+ struct work_struct rx_work;
+ u32 i2x_head;
+ bool bad_state;
+};
+
+#define MSG_BODY_SZ GENMASK(10, 0)
+#define MSG_PROTO_VER GENMASK(23, 16)
+struct xdna_msg_header {
+ __u32 total_size;
+ __u32 sz_ver;
+ __u32 id;
+ __u32 opcode;
+} __packed;
+
+static_assert(sizeof(struct xdna_msg_header) == 16);
+
+struct mailbox_pkg {
+ struct xdna_msg_header header;
+ __u32 payload[];
+};
+
+/* The protocol version. */
+#define MSG_PROTOCOL_VERSION 0x1
+/* The tombstone value. */
+#define TOMBSTONE 0xDEADFACE
+
+struct mailbox_msg {
+ void *handle;
+ int (*notify_cb)(void *handle, void __iomem *data, size_t size);
+ size_t pkg_size; /* package size in bytes */
+ struct mailbox_pkg pkg;
+};
+
+static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ writel(data, ringbuf_addr);
+}
+
+static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+
+ return readl(ringbuf_addr);
+}
+
+static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val)
+{
+ struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
+ void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
+ int ret, value;
+
+ /* Poll till value is not zero */
+ ret = readx_poll_timeout(readl, ringbuf_addr, value,
+ value, 1 /* us */, 100);
+ if (ret < 0)
+ return ret;
+
+ *val = value;
+ return 0;
+}
+
+static inline void
+mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val);
+ mb_chann->i2x_head = headptr_val;
+}
+
+static inline void
+mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val)
+{
+ mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val);
+ mb_chann->x2i_tail = tailptr_val;
+}
+
+static inline u32
+mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg);
+}
+
+static inline u32
+mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg);
+}
+
+static inline u32
+mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type)
+{
+ return mb_chann->res[type].rb_size;
+}
+
+static inline int mailbox_validate_msgid(int msg_id)
+{
+ return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL;
+}
+
+static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ u32 msg_id;
+ int ret;
+
+ ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
+ XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
+ &mb_chann->next_msgid, GFP_NOWAIT);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Add MAGIC_VAL to the higher bits.
+ */
+ msg_id |= MAGIC_VAL;
+ return msg_id;
+}
+
+static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
+{
+ msg_id &= ~MAGIC_VAL_MASK;
+ xa_erase_irq(&mb_chann->chan_xa, msg_id);
+}
+
+static void mailbox_release_msg(struct mailbox_channel *mb_chann,
+ struct mailbox_msg *mb_msg)
+{
+ MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
+ mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
+ if (mb_msg->notify_cb)
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ kfree(mb_msg);
+}
+
+static int
+mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
+{
+ void __iomem *write_addr;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ u32 tmp_tail;
+
+ head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I);
+ tail = mb_chann->x2i_tail;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I);
+ start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr;
+ tmp_tail = tail + mb_msg->pkg_size;
+
+ if (tail < head && tmp_tail >= head)
+ goto no_space;
+
+ if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) &&
+ mb_msg->pkg_size >= head))
+ goto no_space;
+
+ if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) {
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ writel(TOMBSTONE, write_addr);
+
+ /* tombstone is set. Write from the start of the ringbuf */
+ tail = 0;
+ }
+
+ write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
+ memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size);
+ mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size);
+
+ trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq,
+ mb_msg->pkg.header.opcode,
+ mb_msg->pkg.header.id);
+
+ return 0;
+
+no_space:
+ return -ENOSPC;
+}
+
+static int
+mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header,
+ void __iomem *data)
+{
+ struct mailbox_msg *mb_msg;
+ int msg_id;
+ int ret = 0;
+
+ msg_id = header->id;
+ if (!mailbox_validate_msgid(msg_id)) {
+ MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ msg_id &= ~MAGIC_VAL_MASK;
+ mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
+ if (!mb_msg) {
+ MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
+ return -EINVAL;
+ }
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+ if (mb_msg->notify_cb) {
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+ }
+
+ kfree(mb_msg);
+ return ret;
+}
+
+static int mailbox_get_msg(struct mailbox_channel *mb_chann)
+{
+ struct xdna_msg_header header;
+ void __iomem *read_addr;
+ u32 msg_size, rest;
+ u32 ringbuf_size;
+ u32 head, tail;
+ u32 start_addr;
+ int ret;
+
+ if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail))
+ return -EINVAL;
+ head = mb_chann->i2x_head;
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X);
+ start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr;
+
+ if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail);
+ return -EINVAL;
+ }
+
+ /* ringbuf empty */
+ if (head == tail)
+ return -ENOENT;
+
+ if (head == ringbuf_size)
+ head = 0;
+
+ /* Peek size of the message or TOMBSTONE */
+ read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head;
+ header.total_size = readl(read_addr);
+ /* size is TOMBSTONE, set next read from 0 */
+ if (header.total_size == TOMBSTONE) {
+ if (head < tail) {
+ MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x",
+ head, tail);
+ return -EINVAL;
+ }
+ mailbox_set_headptr(mb_chann, 0);
+ return 0;
+ }
+
+ if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) {
+ MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size);
+ return -EINVAL;
+ }
+ msg_size = sizeof(header) + header.total_size;
+
+ if (msg_size > ringbuf_size - head || msg_size > tail - head) {
+ MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d",
+ msg_size, tail, head);
+ return -EINVAL;
+ }
+
+ rest = sizeof(header) - sizeof(u32);
+ read_addr += sizeof(u32);
+ memcpy_fromio((u32 *)&header + 1, read_addr, rest);
+ read_addr += rest;
+
+ ret = mailbox_get_resp(mb_chann, &header, read_addr);
+
+ mailbox_set_headptr(mb_chann, head + msg_size);
+ /* After update head, it can equal to ringbuf_size. This is expected. */
+ trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq,
+ header.opcode, header.id);
+
+ return ret;
+}
+
+static irqreturn_t mailbox_irq_handler(int irq, void *p)
+{
+ struct mailbox_channel *mb_chann = p;
+
+ trace_mbox_irq_handle(MAILBOX_NAME, irq);
+ /* Schedule a rx_work to call the callback functions */
+ queue_work(mb_chann->work_q, &mb_chann->rx_work);
+
+ return IRQ_HANDLED;
+}
+
+static void mailbox_rx_worker(struct work_struct *rx_work)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state, work aborted");
+ return;
+ }
+
+again:
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
+
+ while (1) {
+ /*
+ * If return is 0, keep consuming next message, until there is
+ * no messages or an error happened.
+ */
+ ret = mailbox_get_msg(mb_chann);
+ if (ret == -ENOENT)
+ break;
+
+ /* Other error means device doesn't look good, disable irq. */
+ if (unlikely(ret)) {
+ MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret);
+ WRITE_ONCE(mb_chann->bad_state, true);
+ return;
+ }
+ }
+
+ /*
+ * The hardware will not generate interrupt if firmware creates a new
+ * response right after driver clears interrupt register. Check
+ * the interrupt register to make sure there is not any new response
+ * before exiting.
+ */
+ if (mailbox_reg_read(mb_chann, mb_chann->iohub_int_addr))
+ goto again;
+}
+
+int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout)
+{
+ struct xdna_msg_header *header;
+ struct mailbox_msg *mb_msg;
+ size_t pkg_size;
+ int ret;
+
+ pkg_size = sizeof(*header) + msg->send_size;
+ if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) {
+ MB_ERR(mb_chann, "Message size larger than ringbuf size");
+ return -EINVAL;
+ }
+
+ if (unlikely(!IS_ALIGNED(msg->send_size, 4))) {
+ MB_ERR(mb_chann, "Message must be 4 bytes align");
+ return -EINVAL;
+ }
+
+ /* The fist word in payload can NOT be TOMBSTONE */
+ if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) {
+ MB_ERR(mb_chann, "Tomb stone in data");
+ return -EINVAL;
+ }
+
+ if (READ_ONCE(mb_chann->bad_state)) {
+ MB_ERR(mb_chann, "Channel in bad state");
+ return -EPIPE;
+ }
+
+ mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL);
+ if (!mb_msg)
+ return -ENOMEM;
+
+ mb_msg->handle = msg->handle;
+ mb_msg->notify_cb = msg->notify_cb;
+ mb_msg->pkg_size = pkg_size;
+
+ header = &mb_msg->pkg.header;
+ /*
+ * Hardware use total_size and size to split huge message.
+ * We do not support it here. Thus the values are the same.
+ */
+ header->total_size = msg->send_size;
+ header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) |
+ FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION);
+ header->opcode = msg->opcode;
+ memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size);
+
+ ret = mailbox_acquire_msgid(mb_chann, mb_msg);
+ if (unlikely(ret < 0)) {
+ MB_ERR(mb_chann, "mailbox_acquire_msgid failed");
+ goto msg_id_failed;
+ }
+ header->id = ret;
+
+ MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
+ header->opcode, header->total_size, header->id);
+
+ ret = mailbox_send_msg(mb_chann, mb_msg);
+ if (ret) {
+ MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret);
+ goto release_id;
+ }
+
+ return 0;
+
+release_id:
+ mailbox_release_msgid(mb_chann, header->id);
+msg_id_failed:
+ kfree(mb_msg);
+ return ret;
+}
+
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mb,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 iohub_int_addr,
+ int mb_irq)
+{
+ struct mailbox_channel *mb_chann;
+ int ret;
+
+ if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
+ pr_err("Ring buf size must be power of 2");
+ return NULL;
+ }
+
+ mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL);
+ if (!mb_chann)
+ return NULL;
+
+ mb_chann->mb = mb;
+ mb_chann->msix_irq = mb_irq;
+ mb_chann->iohub_int_addr = iohub_int_addr;
+ memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
+ memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
+
+ xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
+ mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
+
+ INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
+ mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
+ if (!mb_chann->work_q) {
+ MB_ERR(mb_chann, "Create workqueue failed");
+ goto free_and_out;
+ }
+
+ /* Everything look good. Time to enable irq handler */
+ ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann);
+ if (ret) {
+ MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret);
+ goto destroy_wq;
+ }
+
+ mb_chann->bad_state = false;
+ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
+
+ MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
+ return mb_chann;
+
+destroy_wq:
+ destroy_workqueue(mb_chann->work_q);
+free_and_out:
+ kfree(mb_chann);
+ return NULL;
+}
+
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
+{
+ struct mailbox_msg *mb_msg;
+ unsigned long msg_id;
+
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+ free_irq(mb_chann->msix_irq, mb_chann);
+ destroy_workqueue(mb_chann->work_q);
+ /* We can clean up and release resources */
+
+ xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+ mailbox_release_msg(mb_chann, mb_msg);
+
+ xa_destroy(&mb_chann->chan_xa);
+
+ MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
+ kfree(mb_chann);
+ return 0;
+}
+
+void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
+{
+ /* Disable an irq and wait. This might sleep. */
+ disable_irq(mb_chann->msix_irq);
+
+ /* Cancel RX work and wait for it to finish */
+ cancel_work_sync(&mb_chann->rx_work);
+ MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+}
+
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res)
+{
+ struct mailbox *mb;
+
+ mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL);
+ if (!mb)
+ return NULL;
+ mb->dev = ddev->dev;
+
+ /* mailbox and ring buf base and size information */
+ memcpy(&mb->res, res, sizeof(*res));
+
+ return mb;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h
new file mode 100644
index 000000000000..ea367f2fb738
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_MAILBOX_H_
+#define _AIE2_MAILBOX_H_
+
+struct mailbox;
+struct mailbox_channel;
+
+/*
+ * xdna_mailbox_msg - message struct
+ *
+ * @opcode: opcode for firmware
+ * @handle: handle used for the notify callback
+ * @notify_cb: callback function to notify the sender when there is response
+ * @send_data: pointing to sending data
+ * @send_size: size of the sending data
+ *
+ * The mailbox will split the sending data in to multiple firmware message if
+ * the size of the data is too big. This is transparent to the sender. The
+ * sender will receive one notification.
+ */
+struct xdna_mailbox_msg {
+ u32 opcode;
+ void *handle;
+ int (*notify_cb)(void *handle, void __iomem *data, size_t size);
+ u8 *send_data;
+ size_t send_size;
+};
+
+/*
+ * xdna_mailbox_res - mailbox hardware resource
+ *
+ * @ringbuf_base: ring buffer base address
+ * @ringbuf_size: ring buffer size
+ * @mbox_base: mailbox base address
+ * @mbox_size: mailbox size
+ */
+struct xdna_mailbox_res {
+ void __iomem *ringbuf_base;
+ size_t ringbuf_size;
+ void __iomem *mbox_base;
+ size_t mbox_size;
+ const char *name;
+};
+
+/*
+ * xdna_mailbox_chann_res - resources
+ *
+ * @rb_start_addr: ring buffer start address
+ * @rb_size: ring buffer size
+ * @mb_head_ptr_reg: mailbox head pointer register
+ * @mb_tail_ptr_reg: mailbox tail pointer register
+ */
+struct xdna_mailbox_chann_res {
+ u32 rb_start_addr;
+ u32 rb_size;
+ u32 mb_head_ptr_reg;
+ u32 mb_tail_ptr_reg;
+};
+
+/*
+ * xdna_mailbox_create() -- create mailbox subsystem and initialize
+ *
+ * @ddev: device pointer
+ * @res: SRAM and mailbox resources
+ *
+ * Return: If success, return a handle of mailbox subsystem.
+ * Otherwise, return NULL pointer.
+ */
+struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
+ const struct xdna_mailbox_res *res);
+
+/*
+ * xdna_mailbox_create_channel() -- Create a mailbox channel instance
+ *
+ * @mailbox: the handle return from xdna_mailbox_create()
+ * @x2i: host to firmware mailbox resources
+ * @i2x: firmware to host mailbox resources
+ * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt
+ * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index
+ *
+ * Return: If success, return a handle of mailbox channel. Otherwise, return NULL.
+ */
+struct mailbox_channel *
+xdna_mailbox_create_channel(struct mailbox *mailbox,
+ const struct xdna_mailbox_chann_res *x2i,
+ const struct xdna_mailbox_chann_res *i2x,
+ u32 xdna_mailbox_intr_reg,
+ int mb_irq);
+
+/*
+ * xdna_mailbox_destroy_channel() -- destroy mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_stop_channel() -- stop mailbox channel
+ *
+ * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
+ *
+ * Return: if success, return 0. otherwise return error code
+ */
+void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
+
+/*
+ * xdna_mailbox_send_msg() -- Send a message
+ *
+ * @mailbox_chann: Mailbox channel handle
+ * @msg: message struct for message information
+ * @tx_timeout: the timeout value for sending the message in ms.
+ *
+ * Return: If success return 0, otherwise, return error code
+ */
+int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann,
+ const struct xdna_mailbox_msg *msg, u64 tx_timeout);
+
+#endif /* _AIE2_MAILBOX_ */
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
new file mode 100644
index 000000000000..6d0c24513476
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/completion.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+int xdna_msg_cb(void *handle, void __iomem *data, size_t size)
+{
+ struct xdna_notify *cb_arg = handle;
+ int ret;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(cb_arg->size != size)) {
+ cb_arg->error = -EINVAL;
+ goto out;
+ }
+
+ memcpy_fromio(cb_arg->data, data, cb_arg->size);
+ print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET,
+ 16, 4, cb_arg->data, cb_arg->size, true);
+out:
+ ret = cb_arg->error;
+ complete(&cb_arg->comp);
+ return ret;
+}
+
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg)
+{
+ struct xdna_notify *hdl = msg->handle;
+ int ret;
+
+ ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = wait_for_completion_timeout(&hdl->comp,
+ msecs_to_jiffies(RX_TIMEOUT));
+ if (!ret) {
+ XDNA_ERR(xdna, "Wait for completion timeout");
+ return -ETIME;
+ }
+
+ return hdl->error;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
new file mode 100644
index 000000000000..556c712cad0a
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_MAILBOX_HELPER_H
+#define _AMDXDNA_MAILBOX_HELPER_H
+
+#define TX_TIMEOUT 2000 /* milliseconds */
+#define RX_TIMEOUT 5000 /* milliseconds */
+
+struct amdxdna_dev;
+
+struct xdna_notify {
+ struct completion comp;
+ u32 *data;
+ size_t size;
+ int error;
+ u32 *status;
+};
+
+#define DECLARE_XDNA_MSG_COMMON(name, op, s) \
+ struct name##_req req = { 0 }; \
+ struct name##_resp resp = { .status = s }; \
+ struct xdna_notify hdl = { \
+ .error = 0, \
+ .data = (u32 *)&resp, \
+ .size = sizeof(resp), \
+ .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ .status = (u32 *)&resp.status, \
+ }; \
+ struct xdna_mailbox_msg msg = { \
+ .send_data = (u8 *)&req, \
+ .send_size = sizeof(req), \
+ .handle = &hdl, \
+ .opcode = op, \
+ .notify_cb = xdna_msg_cb, \
+ }
+
+int xdna_msg_cb(void *handle, void __iomem *data, size_t size);
+int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
+ struct xdna_mailbox_msg *msg);
+
+#endif /* _AMDXDNA_MAILBOX_HELPER_H */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
new file mode 100644
index 000000000000..1973ab67721b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
+
+MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
+
+/*
+ * 0.0: Initial version
+ * 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY
+ * 0.2: Support getting last error hardware error
+ * 0.3: Support firmware debug buffer
+ * 0.4: Support getting resource information
+ * 0.5: Support getting telemetry data
+ * 0.6: Support preemption
+ */
+#define AMDXDNA_DRIVER_MAJOR 0
+#define AMDXDNA_DRIVER_MINOR 6
+
+/*
+ * Bind the driver base on (vendor_id, device_id) pair and later use the
+ * (device_id, rev_id) pair as a key to select the devices. The devices with
+ * same device_id have very similar interface to host driver.
+ */
+static const struct pci_device_id pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
+ {0}
+};
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static const struct amdxdna_device_id amdxdna_ids[] = {
+ { 0x1502, 0x0, &dev_npu1_info },
+ { 0x17f0, 0x0, &dev_npu2_info },
+ { 0x17f0, 0x10, &dev_npu4_info },
+ { 0x17f0, 0x11, &dev_npu5_info },
+ { 0x17f0, 0x20, &dev_npu6_info },
+ {0}
+};
+
+static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_client *client;
+ int ret;
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client)
+ return -ENOMEM;
+
+ client->pid = pid_nr(rcu_access_pointer(filp->pid));
+ client->xdna = xdna;
+
+ client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
+ if (IS_ERR(client->sva)) {
+ ret = PTR_ERR(client->sva);
+ XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
+ goto failed;
+ }
+ client->pasid = iommu_sva_get_pasid(client->sva);
+ if (client->pasid == IOMMU_PASID_INVALID) {
+ XDNA_ERR(xdna, "SVA get pasid failed");
+ ret = -ENODEV;
+ goto unbind_sva;
+ }
+ init_srcu_struct(&client->hwctx_srcu);
+ xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
+ mutex_init(&client->mm_lock);
+
+ mutex_lock(&xdna->dev_lock);
+ list_add_tail(&client->node, &xdna->client_list);
+ mutex_unlock(&xdna->dev_lock);
+
+ filp->driver_priv = client;
+ client->filp = filp;
+
+ XDNA_DBG(xdna, "pid %d opened", client->pid);
+ return 0;
+
+unbind_sva:
+ iommu_sva_unbind_device(client->sva);
+failed:
+ kfree(client);
+
+ return ret;
+}
+
+static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+
+ XDNA_DBG(xdna, "closing pid %d", client->pid);
+
+ xa_destroy(&client->hwctx_xa);
+ cleanup_srcu_struct(&client->hwctx_srcu);
+ mutex_destroy(&client->mm_lock);
+ if (client->dev_heap)
+ drm_gem_object_put(to_gobj(client->dev_heap));
+
+ iommu_sva_unbind_device(client->sva);
+
+ XDNA_DBG(xdna, "pid %d closed", client->pid);
+ kfree(client);
+}
+
+static int amdxdna_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *filp = f->private_data;
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = client->xdna;
+ int idx;
+
+ XDNA_DBG(xdna, "PID %d flushing...", client->pid);
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return 0;
+
+ mutex_lock(&xdna->dev_lock);
+ list_del_init(&client->node);
+ amdxdna_hwctx_remove_all(client);
+ mutex_unlock(&xdna->dev_lock);
+
+ drm_dev_exit(idx);
+ return 0;
+}
+
+static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_get_info *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->get_aie_info)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->get_aie_info(client, args);
+ mutex_unlock(&xdna->dev_lock);
+ return ret;
+}
+
+static int amdxdna_drm_get_array_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_get_array *args = data;
+
+ if (!xdna->dev_info->ops->get_array)
+ return -EOPNOTSUPP;
+
+ if (args->pad || !args->num_element || !args->element_size)
+ return -EINVAL;
+
+ guard(mutex)(&xdna->dev_lock);
+ return xdna->dev_info->ops->get_array(client, args);
+}
+
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_set_state *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->set_aie_state)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->set_aie_state(client, args);
+ mutex_unlock(&xdna->dev_lock);
+
+ return ret;
+}
+
+static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
+ /* Context */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0),
+ /* BO */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
+ /* Execution */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+ /* AIE hardware */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
+};
+
+static const struct file_operations amdxdna_fops = {
+ .owner = THIS_MODULE,
+ .open = accel_open,
+ .release = drm_release,
+ .flush = amdxdna_flush,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = noop_llseek,
+ .mmap = drm_gem_mmap,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+const struct drm_driver amdxdna_drm_drv = {
+ .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL |
+ DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
+ .fops = &amdxdna_fops,
+ .name = "amdxdna_accel_driver",
+ .desc = "AMD XDNA DRM implementation",
+ .major = AMDXDNA_DRIVER_MAJOR,
+ .minor = AMDXDNA_DRIVER_MINOR,
+ .open = amdxdna_drm_open,
+ .postclose = amdxdna_drm_close,
+ .ioctls = amdxdna_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
+
+ .gem_create_object = amdxdna_gem_create_object_cb,
+ .gem_prime_import = amdxdna_gem_prime_import,
+};
+
+static const struct amdxdna_dev_info *
+amdxdna_get_dev_info(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) {
+ if (pdev->device == amdxdna_ids[i].device &&
+ pdev->revision == amdxdna_ids[i].revision)
+ return amdxdna_ids[i].dev_info;
+ }
+ return NULL;
+}
+
+static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct amdxdna_dev *xdna;
+ int ret;
+
+ xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
+ if (IS_ERR(xdna))
+ return PTR_ERR(xdna);
+
+ xdna->dev_info = amdxdna_get_dev_info(pdev);
+ if (!xdna->dev_info)
+ return -ENODEV;
+
+ drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+ init_rwsem(&xdna->notifier_lock);
+ INIT_LIST_HEAD(&xdna->client_list);
+ pci_set_drvdata(pdev, xdna);
+
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xdna->notifier_lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
+ xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0);
+ if (!xdna->notifier_wq)
+ return -ENOMEM;
+
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->init(xdna);
+ mutex_unlock(&xdna->dev_lock);
+ if (ret) {
+ XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
+ goto destroy_notifier_wq;
+ }
+
+ ret = amdxdna_sysfs_init(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret);
+ goto failed_dev_fini;
+ }
+
+ ret = drm_dev_register(&xdna->ddev, 0);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
+ goto failed_sysfs_fini;
+ }
+
+ return 0;
+
+failed_sysfs_fini:
+ amdxdna_sysfs_fini(xdna);
+failed_dev_fini:
+ mutex_lock(&xdna->dev_lock);
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+destroy_notifier_wq:
+ destroy_workqueue(xdna->notifier_wq);
+ return ret;
+}
+
+static void amdxdna_remove(struct pci_dev *pdev)
+{
+ struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
+ struct amdxdna_client *client;
+
+ destroy_workqueue(xdna->notifier_wq);
+
+ drm_dev_unplug(&xdna->ddev);
+ amdxdna_sysfs_fini(xdna);
+
+ mutex_lock(&xdna->dev_lock);
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ while (client) {
+ list_del_init(&client->node);
+ amdxdna_hwctx_remove_all(client);
+
+ client = list_first_entry_or_null(&xdna->client_list,
+ struct amdxdna_client, node);
+ }
+
+ xdna->dev_info->ops->fini(xdna);
+ mutex_unlock(&xdna->dev_lock);
+}
+
+static const struct dev_pm_ops amdxdna_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
+ RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
+};
+
+static struct pci_driver amdxdna_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = pci_ids,
+ .probe = amdxdna_probe,
+ .remove = amdxdna_remove,
+ .driver.pm = &amdxdna_pm_ops,
+};
+
+module_pci_driver(amdxdna_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
+MODULE_DESCRIPTION("amdxdna driver");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
new file mode 100644
index 000000000000..c99477f5e454
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PCI_DRV_H_
+#define _AMDXDNA_PCI_DRV_H_
+
+#include <drm/drm_print.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+
+#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
+#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args)
+#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
+
+#define XDNA_MBZ_DBG(xdna, ptr, sz) \
+ ({ \
+ int __i; \
+ int __ret = 0; \
+ u8 *__ptr = (u8 *)(ptr); \
+ for (__i = 0; __i < (sz); __i++) { \
+ if (__ptr[__i]) { \
+ XDNA_DBG(xdna, "MBZ check failed"); \
+ __ret = -EINVAL; \
+ break; \
+ } \
+ } \
+ __ret; \
+ })
+
+#define to_xdna_dev(drm_dev) \
+ ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
+
+extern const struct drm_driver amdxdna_drm_drv;
+
+struct amdxdna_client;
+struct amdxdna_dev;
+struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
+struct amdxdna_gem_obj;
+struct amdxdna_hwctx;
+struct amdxdna_sched_job;
+
+/*
+ * struct amdxdna_dev_ops - Device hardware operation callbacks
+ */
+struct amdxdna_dev_ops {
+ int (*init)(struct amdxdna_dev *xdna);
+ void (*fini)(struct amdxdna_dev *xdna);
+ int (*resume)(struct amdxdna_dev *xdna);
+ int (*suspend)(struct amdxdna_dev *xdna);
+ int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
+ void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
+ int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+ int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
+ void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+ int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+ int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+ int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
+ int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
+};
+
+/*
+ * struct amdxdna_dev_info - Device hardware information
+ * Record device static information, like reg, mbox, PSP, SMU bar index
+ */
+struct amdxdna_dev_info {
+ int reg_bar;
+ int mbox_bar;
+ int sram_bar;
+ int psp_bar;
+ int smu_bar;
+ int device_type;
+ int first_col;
+ u32 dev_mem_buf_shift;
+ u64 dev_mem_base;
+ size_t dev_mem_size;
+ char *vbnv;
+ const struct amdxdna_dev_priv *dev_priv;
+ const struct amdxdna_dev_ops *ops;
+};
+
+struct amdxdna_fw_ver {
+ u32 major;
+ u32 minor;
+ u32 sub;
+ u32 build;
+};
+
+struct amdxdna_dev {
+ struct drm_device ddev;
+ struct amdxdna_dev_hdl *dev_handle;
+ const struct amdxdna_dev_info *dev_info;
+ void *xrs_hdl;
+
+ struct mutex dev_lock; /* per device lock */
+ struct list_head client_list;
+ struct amdxdna_fw_ver fw_ver;
+ struct rw_semaphore notifier_lock; /* for mmu notifier*/
+ struct workqueue_struct *notifier_wq;
+ bool rpm_on;
+};
+
+/*
+ * struct amdxdna_device_id - PCI device info
+ */
+struct amdxdna_device_id {
+ unsigned short device;
+ u8 revision;
+ const struct amdxdna_dev_info *dev_info;
+};
+
+/*
+ * struct amdxdna_client - amdxdna client
+ * A per fd data structure for managing context and other user process stuffs.
+ */
+struct amdxdna_client {
+ struct list_head node;
+ pid_t pid;
+ struct srcu_struct hwctx_srcu;
+ struct xarray hwctx_xa;
+ u32 next_hwctxid;
+ struct amdxdna_dev *xdna;
+ struct drm_file *filp;
+
+ struct mutex mm_lock; /* protect memory related */
+ struct amdxdna_gem_obj *dev_heap;
+
+ struct iommu_sva *sva;
+ int pasid;
+};
+
+#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
+ xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
+
+/* Add device info below */
+extern const struct amdxdna_dev_info dev_npu1_info;
+extern const struct amdxdna_dev_info dev_npu2_info;
+extern const struct amdxdna_dev_info dev_npu4_info;
+extern const struct amdxdna_dev_info dev_npu5_info;
+extern const struct amdxdna_dev_info dev_npu6_info;
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PCI_DRV_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
new file mode 100644
index 000000000000..fa38e65d617c
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_drv.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_pm.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+int amdxdna_pm_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->suspend) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->suspend(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Suspend done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->resume) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->resume(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Resume done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+ int ret;
+
+ if (!xdna->rpm_on)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed: %d", ret);
+ pm_runtime_set_suspended(dev);
+ }
+
+ return ret;
+}
+
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ if (!xdna->rpm_on)
+ return;
+
+ pm_runtime_put_autosuspend(dev);
+}
+
+void amdxdna_pm_init(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ pm_runtime_set_active(dev);
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_put_autosuspend(dev);
+ xdna->rpm_on = true;
+}
+
+void amdxdna_pm_fini(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ xdna->rpm_on = false;
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
new file mode 100644
index 000000000000..77b2d6e45570
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PM_H_
+#define _AMDXDNA_PM_H_
+
+#include "amdxdna_pci_drv.h"
+
+int amdxdna_pm_suspend(struct device *dev);
+int amdxdna_pm_resume(struct device *dev);
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
+void amdxdna_pm_init(struct amdxdna_dev *xdna);
+void amdxdna_pm_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c
new file mode 100644
index 000000000000..f27e4ee960a0
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_sysfs.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "amdxdna_gem.h"
+#include "amdxdna_pci_drv.h"
+
+static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", xdna->dev_info->vbnv);
+}
+static DEVICE_ATTR_RO(vbnv);
+
+static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", xdna->dev_info->device_type);
+}
+static DEVICE_ATTR_RO(device_type);
+
+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major,
+ xdna->fw_ver.minor, xdna->fw_ver.sub,
+ xdna->fw_ver.build);
+}
+static DEVICE_ATTR_RO(fw_version);
+
+static struct attribute *amdxdna_attrs[] = {
+ &dev_attr_device_type.attr,
+ &dev_attr_vbnv.attr,
+ &dev_attr_fw_version.attr,
+ NULL,
+};
+
+static struct attribute_group amdxdna_attr_group = {
+ .attrs = amdxdna_attrs,
+};
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna)
+{
+ int ret;
+
+ ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+ if (ret)
+ XDNA_ERR(xdna, "Create attr group failed");
+
+ return ret;
+}
+
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna)
+{
+ sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c
new file mode 100644
index 000000000000..077b2261cf2a
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ubuf.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <linux/dma-buf.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+
+#include "amdxdna_pci_drv.h"
+#include "amdxdna_ubuf.h"
+
+struct amdxdna_ubuf_priv {
+ struct page **pages;
+ u64 nr_pages;
+ enum amdxdna_ubuf_flag flags;
+ struct mm_struct *mm;
+};
+
+static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach,
+ enum dma_data_direction direction)
+{
+ struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv;
+ struct sg_table *sg;
+ int ret;
+
+ sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ return ERR_PTR(-ENOMEM);
+
+ ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0,
+ ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) {
+ ret = dma_map_sgtable(attach->dev, sg, direction, 0);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return sg;
+}
+
+static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach,
+ struct sg_table *sg,
+ enum dma_data_direction direction)
+{
+ struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv;
+
+ if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA)
+ dma_unmap_sgtable(attach->dev, sg, direction, 0);
+
+ sg_free_table(sg);
+ kfree(sg);
+}
+
+static void amdxdna_ubuf_release(struct dma_buf *dbuf)
+{
+ struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
+
+ unpin_user_pages(ubuf->pages, ubuf->nr_pages);
+ kvfree(ubuf->pages);
+ atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm);
+ mmdrop(ubuf->mm);
+ kfree(ubuf);
+}
+
+static vm_fault_t amdxdna_ubuf_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct amdxdna_ubuf_priv *ubuf;
+ unsigned long pfn;
+ pgoff_t pgoff;
+
+ ubuf = vma->vm_private_data;
+ pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+
+ pfn = page_to_pfn(ubuf->pages[pgoff]);
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static const struct vm_operations_struct amdxdna_ubuf_vm_ops = {
+ .fault = amdxdna_ubuf_vm_fault,
+};
+
+static int amdxdna_ubuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
+{
+ struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
+
+ vma->vm_ops = &amdxdna_ubuf_vm_ops;
+ vma->vm_private_data = ubuf;
+ vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
+ return 0;
+}
+
+static int amdxdna_ubuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+ struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
+ void *kva;
+
+ kva = vmap(ubuf->pages, ubuf->nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!kva)
+ return -EINVAL;
+
+ iosys_map_set_vaddr(map, kva);
+ return 0;
+}
+
+static void amdxdna_ubuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+ vunmap(map->vaddr);
+}
+
+static const struct dma_buf_ops amdxdna_ubuf_dmabuf_ops = {
+ .map_dma_buf = amdxdna_ubuf_map,
+ .unmap_dma_buf = amdxdna_ubuf_unmap,
+ .release = amdxdna_ubuf_release,
+ .mmap = amdxdna_ubuf_mmap,
+ .vmap = amdxdna_ubuf_vmap,
+ .vunmap = amdxdna_ubuf_vunmap,
+};
+
+struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev,
+ enum amdxdna_ubuf_flag flags,
+ u32 num_entries, void __user *va_entries)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ unsigned long lock_limit, new_pinned;
+ struct amdxdna_drm_va_entry *va_ent;
+ struct amdxdna_ubuf_priv *ubuf;
+ u32 npages, start = 0;
+ struct dma_buf *dbuf;
+ int i, ret;
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+ if (!can_do_mlock())
+ return ERR_PTR(-EPERM);
+
+ ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
+ if (!ubuf)
+ return ERR_PTR(-ENOMEM);
+
+ ubuf->flags = flags;
+ ubuf->mm = current->mm;
+ mmgrab(ubuf->mm);
+
+ va_ent = kvcalloc(num_entries, sizeof(*va_ent), GFP_KERNEL);
+ if (!va_ent) {
+ ret = -ENOMEM;
+ goto free_ubuf;
+ }
+
+ if (copy_from_user(va_ent, va_entries, sizeof(*va_ent) * num_entries)) {
+ XDNA_DBG(xdna, "Access va entries failed");
+ ret = -EINVAL;
+ goto free_ent;
+ }
+
+ for (i = 0, exp_info.size = 0; i < num_entries; i++) {
+ if (!IS_ALIGNED(va_ent[i].vaddr, PAGE_SIZE) ||
+ !IS_ALIGNED(va_ent[i].len, PAGE_SIZE)) {
+ XDNA_ERR(xdna, "Invalid address or len %llx, %llx",
+ va_ent[i].vaddr, va_ent[i].len);
+ ret = -EINVAL;
+ goto free_ent;
+ }
+
+ exp_info.size += va_ent[i].len;
+ }
+
+ ubuf->nr_pages = exp_info.size >> PAGE_SHIFT;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ new_pinned = atomic64_add_return(ubuf->nr_pages, &ubuf->mm->pinned_vm);
+ if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
+ XDNA_DBG(xdna, "New pin %ld, limit %ld, cap %d",
+ new_pinned, lock_limit, capable(CAP_IPC_LOCK));
+ ret = -ENOMEM;
+ goto sub_pin_cnt;
+ }
+
+ ubuf->pages = kvmalloc_array(ubuf->nr_pages, sizeof(*ubuf->pages), GFP_KERNEL);
+ if (!ubuf->pages) {
+ ret = -ENOMEM;
+ goto sub_pin_cnt;
+ }
+
+ for (i = 0; i < num_entries; i++) {
+ npages = va_ent[i].len >> PAGE_SHIFT;
+
+ ret = pin_user_pages_fast(va_ent[i].vaddr, npages,
+ FOLL_WRITE | FOLL_LONGTERM,
+ &ubuf->pages[start]);
+ if (ret < 0 || ret != npages) {
+ ret = -ENOMEM;
+ XDNA_ERR(xdna, "Failed to pin pages ret %d", ret);
+ goto destroy_pages;
+ }
+
+ start += ret;
+ }
+
+ exp_info.ops = &amdxdna_ubuf_dmabuf_ops;
+ exp_info.priv = ubuf;
+ exp_info.flags = O_RDWR | O_CLOEXEC;
+
+ dbuf = dma_buf_export(&exp_info);
+ if (IS_ERR(dbuf)) {
+ ret = PTR_ERR(dbuf);
+ goto destroy_pages;
+ }
+ kvfree(va_ent);
+
+ return dbuf;
+
+destroy_pages:
+ if (start)
+ unpin_user_pages(ubuf->pages, start);
+ kvfree(ubuf->pages);
+sub_pin_cnt:
+ atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm);
+free_ent:
+ kvfree(va_ent);
+free_ubuf:
+ mmdrop(ubuf->mm);
+ kfree(ubuf);
+ return ERR_PTR(ret);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.h b/drivers/accel/amdxdna/amdxdna_ubuf.h
new file mode 100644
index 000000000000..e5cb3bdb3ec9
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_ubuf.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDXDNA_UBUF_H_
+#define _AMDXDNA_UBUF_H_
+
+#include <drm/drm_device.h>
+#include <linux/dma-buf.h>
+
+enum amdxdna_ubuf_flag {
+ AMDXDNA_UBUF_FLAG_MAP_DMA = 1,
+};
+
+struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev,
+ enum amdxdna_ubuf_flag flags,
+ u32 num_entries, void __user *va_entries);
+
+#endif /* _AMDXDNA_UBUF_H_ */
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
new file mode 100644
index 000000000000..ec407f3b48fc
--- /dev/null
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* Address definition from NPU1 docs */
+#define MPNPU_PUB_SEC_INTR 0x3010090
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010094
+#define MPNPU_PUB_SCRATCH2 0x30100A0
+#define MPNPU_PUB_SCRATCH3 0x30100A4
+#define MPNPU_PUB_SCRATCH4 0x30100A8
+#define MPNPU_PUB_SCRATCH5 0x30100AC
+#define MPNPU_PUB_SCRATCH6 0x30100B0
+#define MPNPU_PUB_SCRATCH7 0x30100B4
+#define MPNPU_PUB_SCRATCH9 0x30100BC
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000
+#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000
+#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000
+
+#define MPNPU_APERTURE0_BASE 0x3000000
+#define MPNPU_APERTURE1_BASE 0x3080000
+#define MPNPU_APERTURE2_BASE 0x30C0000
+
+/* PCIe BAR Index for NPU1 */
+#define NPU1_REG_BAR_INDEX 0
+#define NPU1_MBOX_BAR_INDEX 4
+#define NPU1_PSP_BAR_INDEX 0
+#define NPU1_SMU_BAR_INDEX 0
+#define NPU1_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE
+#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
+#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
+
+const struct rt_config npu1_default_rt_cfg[] = {
+ { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
+
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+ {400, 800},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {720, 1309},
+ {720, 1309},
+ {847, 1600},
+ { 0 }
+};
+
+static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu1_dev_priv = {
+ .fw_path = "amdnpu/1502_00/npu.sbin",
+ .protocol_major = 0x5,
+ .protocol_minor = 0x7,
+ .rt_config = npu1_default_rt_cfg,
+ .dpm_clk_tbl = npu1_dpm_clk_table,
+ .fw_feature_tbl = npu1_fw_feature_table,
+ .col_align = COL_ALIGN_NONE,
+ .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU1_SRAM_BAR_BASE,
+ .hwctx_limit = 6,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
+ },
+ .hw_ops = {
+ .set_dpm = npu1_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu1_info = {
+ .reg_bar = NPU1_REG_BAR_INDEX,
+ .mbox_bar = NPU1_MBOX_BAR_INDEX,
+ .sram_bar = NPU1_SRAM_BAR_INDEX,
+ .psp_bar = NPU1_PSP_BAR_INDEX,
+ .smu_bar = NPU1_SMU_BAR_INDEX,
+ .first_col = 1,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu1",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu1_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
new file mode 100644
index 000000000000..86f87d0d1354
--- /dev/null
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU2 */
+#define NPU2_REG_BAR_INDEX 0
+#define NPU2_MBOX_BAR_INDEX 0
+#define NPU2_PSP_BAR_INDEX 4
+#define NPU2_SMU_BAR_INDEX 5
+#define NPU2_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu2_dev_priv = {
+ .fw_path = "amdnpu/17f0_00/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 0x6,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU2_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu2_info = {
+ .reg_bar = NPU2_REG_BAR_INDEX,
+ .mbox_bar = NPU2_MBOX_BAR_INDEX,
+ .sram_bar = NPU2_SRAM_BAR_INDEX,
+ .psp_bar = NPU2_PSP_BAR_INDEX,
+ .smu_bar = NPU2_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu2",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu2_dev_priv,
+ .ops = &aie2_ops, /* NPU2 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
new file mode 100644
index 000000000000..986a5f28ba24
--- /dev/null
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU4 */
+#define NPU4_REG_BAR_INDEX 0
+#define NPU4_MBOX_BAR_INDEX 0
+#define NPU4_PSP_BAR_INDEX 4
+#define NPU4_SMU_BAR_INDEX 5
+#define NPU4_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+const struct rt_config npu4_default_rt_cfg[] = {
+ { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
+ { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
+ { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
+ { 0 },
+};
+
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+ {396, 792},
+ {600, 1056},
+ {792, 1152},
+ {975, 1267},
+ {975, 1267},
+ {1056, 1408},
+ {1152, 1584},
+ {1267, 1800},
+ { 0 }
+};
+
+const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+ { .feature = AIE2_PREEMPT, .min_minor = 12 },
+ { 0 }
+};
+
+static const struct amdxdna_dev_priv npu4_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU4_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu4_info = {
+ .reg_bar = NPU4_REG_BAR_INDEX,
+ .mbox_bar = NPU4_MBOX_BAR_INDEX,
+ .sram_bar = NPU4_SRAM_BAR_INDEX,
+ .psp_bar = NPU4_PSP_BAR_INDEX,
+ .smu_bar = NPU4_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu4",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu4_dev_priv,
+ .ops = &aie2_ops, /* NPU4 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
new file mode 100644
index 000000000000..75ad97f0b937
--- /dev/null
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU5 */
+#define NPU5_REG_BAR_INDEX 0
+#define NPU5_MBOX_BAR_INDEX 0
+#define NPU5_PSP_BAR_INDEX 4
+#define NPU5_SMU_BAR_INDEX 5
+#define NPU5_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu5_dev_priv = {
+ .fw_path = "amdnpu/17f0_11/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU5_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+};
+
+const struct amdxdna_dev_info dev_npu5_info = {
+ .reg_bar = NPU5_REG_BAR_INDEX,
+ .mbox_bar = NPU5_MBOX_BAR_INDEX,
+ .sram_bar = NPU5_SRAM_BAR_INDEX,
+ .psp_bar = NPU5_PSP_BAR_INDEX,
+ .smu_bar = NPU5_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu5",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu5_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
new file mode 100644
index 000000000000..758dc013fe13
--- /dev/null
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU6 */
+#define NPU6_REG_BAR_INDEX 0
+#define NPU6_MBOX_BAR_INDEX 0
+#define NPU6_PSP_BAR_INDEX 4
+#define NPU6_SMU_BAR_INDEX 5
+#define NPU6_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+static const struct amdxdna_dev_priv npu6_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .hwctx_limit = 16,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+
+};
+
+const struct amdxdna_dev_info dev_npu6_info = {
+ .reg_bar = NPU6_REG_BAR_INDEX,
+ .mbox_bar = NPU6_MBOX_BAR_INDEX,
+ .sram_bar = NPU6_SRAM_BAR_INDEX,
+ .psp_bar = NPU6_PSP_BAR_INDEX,
+ .smu_bar = NPU6_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu6",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu6_dev_priv,
+ .ops = &aie2_ops,
+};
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 16c3edb8c46e..ca3357acd127 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -8,7 +8,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <drm/drm_accel.h>
#include <drm/drm_auth.h>
@@ -18,10 +18,7 @@
#include <drm/drm_ioctl.h>
#include <drm/drm_print.h>
-static DEFINE_SPINLOCK(accel_minor_lock);
-static struct idr accel_minors_idr;
-
-static struct dentry *accel_debugfs_root;
+DEFINE_XARRAY_ALLOC(accel_minors_xa);
static const struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
@@ -75,17 +72,6 @@ static const struct drm_info_list accel_debugfs_list[] = {
#define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
/**
- * accel_debugfs_init() - Initialize debugfs for device
- * @dev: Pointer to the device instance.
- *
- * This function creates a root directory for the device in debugfs.
- */
-void accel_debugfs_init(struct drm_device *dev)
-{
- drm_debugfs_dev_init(dev, accel_debugfs_root);
-}
-
-/**
* accel_debugfs_register() - Register debugfs for device
* @dev: Pointer to the device instance.
*
@@ -118,99 +104,6 @@ void accel_set_device_instance_params(struct device *kdev, int index)
}
/**
- * accel_minor_alloc() - Allocates a new accel minor
- *
- * This function access the accel minors idr and allocates from it
- * a new id to represent a new accel minor
- *
- * Return: A new id on success or error code in case idr_alloc failed
- */
-int accel_minor_alloc(void)
-{
- unsigned long flags;
- int r;
-
- spin_lock_irqsave(&accel_minor_lock, flags);
- r = idr_alloc(&accel_minors_idr, NULL, 0, ACCEL_MAX_MINORS, GFP_NOWAIT);
- spin_unlock_irqrestore(&accel_minor_lock, flags);
-
- return r;
-}
-
-/**
- * accel_minor_remove() - Remove an accel minor
- * @index: The minor id to remove.
- *
- * This function access the accel minors idr and removes from
- * it the member with the id that is passed to this function.
- */
-void accel_minor_remove(int index)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&accel_minor_lock, flags);
- idr_remove(&accel_minors_idr, index);
- spin_unlock_irqrestore(&accel_minor_lock, flags);
-}
-
-/**
- * accel_minor_replace() - Replace minor pointer in accel minors idr.
- * @minor: Pointer to the new minor.
- * @index: The minor id to replace.
- *
- * This function access the accel minors idr structure and replaces the pointer
- * that is associated with an existing id. Because the minor pointer can be
- * NULL, we need to explicitly pass the index.
- *
- * Return: 0 for success, negative value for error
- */
-void accel_minor_replace(struct drm_minor *minor, int index)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&accel_minor_lock, flags);
- idr_replace(&accel_minors_idr, minor, index);
- spin_unlock_irqrestore(&accel_minor_lock, flags);
-}
-
-/*
- * Looks up the given minor-ID and returns the respective DRM-minor object. The
- * refence-count of the underlying device is increased so you must release this
- * object with accel_minor_release().
- *
- * The object can be only a drm_minor that represents an accel device.
- *
- * As long as you hold this minor, it is guaranteed that the object and the
- * minor->dev pointer will stay valid! However, the device may get unplugged and
- * unregistered while you hold the minor.
- */
-static struct drm_minor *accel_minor_acquire(unsigned int minor_id)
-{
- struct drm_minor *minor;
- unsigned long flags;
-
- spin_lock_irqsave(&accel_minor_lock, flags);
- minor = idr_find(&accel_minors_idr, minor_id);
- if (minor)
- drm_dev_get(minor->dev);
- spin_unlock_irqrestore(&accel_minor_lock, flags);
-
- if (!minor) {
- return ERR_PTR(-ENODEV);
- } else if (drm_dev_is_unplugged(minor->dev)) {
- drm_dev_put(minor->dev);
- return ERR_PTR(-ENODEV);
- }
-
- return minor;
-}
-
-static void accel_minor_release(struct drm_minor *minor)
-{
- drm_dev_put(minor->dev);
-}
-
-/**
* accel_open - open method for ACCEL file
* @inode: device inode
* @filp: file pointer.
@@ -227,7 +120,7 @@ int accel_open(struct inode *inode, struct file *filp)
struct drm_minor *minor;
int retcode;
- minor = accel_minor_acquire(iminor(inode));
+ minor = drm_minor_acquire(&accel_minors_xa, iminor(inode));
if (IS_ERR(minor))
return PTR_ERR(minor);
@@ -246,7 +139,7 @@ int accel_open(struct inode *inode, struct file *filp)
err_undo:
atomic_dec(&dev->open_count);
- accel_minor_release(minor);
+ drm_minor_release(minor);
return retcode;
}
EXPORT_SYMBOL_GPL(accel_open);
@@ -257,7 +150,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp)
struct drm_minor *minor;
int err;
- minor = accel_minor_acquire(iminor(inode));
+ minor = drm_minor_acquire(&accel_minors_xa, iminor(inode));
if (IS_ERR(minor))
return PTR_ERR(minor);
@@ -274,7 +167,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp)
err = 0;
out:
- accel_minor_release(minor);
+ drm_minor_release(minor);
return err;
}
@@ -288,25 +181,20 @@ static const struct file_operations accel_stub_fops = {
void accel_core_exit(void)
{
unregister_chrdev(ACCEL_MAJOR, "accel");
- debugfs_remove(accel_debugfs_root);
accel_sysfs_destroy();
- idr_destroy(&accel_minors_idr);
+ WARN_ON(!xa_empty(&accel_minors_xa));
}
int __init accel_core_init(void)
{
int ret;
- idr_init(&accel_minors_idr);
-
ret = accel_sysfs_init();
if (ret < 0) {
DRM_ERROR("Cannot create ACCEL class: %d\n", ret);
goto error;
}
- accel_debugfs_root = debugfs_create_dir("accel", NULL);
-
ret = register_chrdev(ACCEL_MAJOR, "accel", &accel_stub_fops);
if (ret < 0)
DRM_ERROR("Cannot register ACCEL major: %d\n", ret);
diff --git a/drivers/accel/ethosu/Kconfig b/drivers/accel/ethosu/Kconfig
new file mode 100644
index 000000000000..d25f9b3eb317
--- /dev/null
+++ b/drivers/accel/ethosu/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_ARM_ETHOSU
+ tristate "Arm Ethos-U65/U85 NPU"
+ depends on HAS_IOMEM
+ depends on DRM_ACCEL
+ select DRM_GEM_DMA_HELPER
+ select DRM_SCHED
+ select GENERIC_ALLOCATOR
+ help
+ Enables driver for Arm Ethos-U65/U85 NPUs
diff --git a/drivers/accel/ethosu/Makefile b/drivers/accel/ethosu/Makefile
new file mode 100644
index 000000000000..17db5a600416
--- /dev/null
+++ b/drivers/accel/ethosu/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) := ethosu.o
+ethosu-y += ethosu_drv.o ethosu_gem.o ethosu_job.o
diff --git a/drivers/accel/ethosu/ethosu_device.h b/drivers/accel/ethosu/ethosu_device.h
new file mode 100644
index 000000000000..b189fa783d6a
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_device.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-only or MIT */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_DEVICE_H__
+#define __ETHOSU_DEVICE_H__
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+
+#include <drm/ethosu_accel.h>
+
+struct clk;
+struct gen_pool;
+
+#define NPU_REG_ID 0x0000
+#define NPU_REG_STATUS 0x0004
+#define NPU_REG_CMD 0x0008
+#define NPU_REG_RESET 0x000c
+#define NPU_REG_QBASE 0x0010
+#define NPU_REG_QBASE_HI 0x0014
+#define NPU_REG_QREAD 0x0018
+#define NPU_REG_QCONFIG 0x001c
+#define NPU_REG_QSIZE 0x0020
+#define NPU_REG_PROT 0x0024
+#define NPU_REG_CONFIG 0x0028
+#define NPU_REG_REGIONCFG 0x003c
+#define NPU_REG_AXILIMIT0 0x0040 // U65
+#define NPU_REG_AXILIMIT1 0x0044 // U65
+#define NPU_REG_AXILIMIT2 0x0048 // U65
+#define NPU_REG_AXILIMIT3 0x004c // U65
+#define NPU_REG_MEM_ATTR0 0x0040 // U85
+#define NPU_REG_MEM_ATTR1 0x0044 // U85
+#define NPU_REG_MEM_ATTR2 0x0048 // U85
+#define NPU_REG_MEM_ATTR3 0x004c // U85
+#define NPU_REG_AXI_SRAM 0x0050 // U85
+#define NPU_REG_AXI_EXT 0x0054 // U85
+
+#define NPU_REG_BASEP(x) (0x0080 + (x) * 8)
+#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8)
+#define NPU_BASEP_REGION_MAX 8
+
+#define ID_ARCH_MAJOR_MASK GENMASK(31, 28)
+#define ID_ARCH_MINOR_MASK GENMASK(27, 20)
+#define ID_ARCH_PATCH_MASK GENMASK(19, 16)
+#define ID_VER_MAJOR_MASK GENMASK(11, 8)
+#define ID_VER_MINOR_MASK GENMASK(7, 4)
+
+#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0)
+#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4)
+
+#define STATUS_STATE_RUNNING BIT(0)
+#define STATUS_IRQ_RAISED BIT(1)
+#define STATUS_BUS_STATUS BIT(2)
+#define STATUS_RESET_STATUS BIT(3)
+#define STATUS_CMD_PARSE_ERR BIT(4)
+#define STATUS_CMD_END_REACHED BIT(5)
+
+#define CMD_CLEAR_IRQ BIT(1)
+#define CMD_TRANSITION_TO_RUN BIT(0)
+
+#define RESET_PENDING_CSL BIT(1)
+#define RESET_PENDING_CPL BIT(0)
+
+#define PROT_ACTIVE_CSL BIT(1)
+
+enum ethosu_cmds {
+ NPU_OP_CONV = 0x2,
+ NPU_OP_DEPTHWISE = 0x3,
+ NPU_OP_POOL = 0x5,
+ NPU_OP_ELEMENTWISE = 0x6,
+ NPU_OP_RESIZE = 0x7, // U85 only
+ NPU_OP_DMA_START = 0x10,
+ NPU_SET_IFM_PAD_TOP = 0x100,
+ NPU_SET_IFM_PAD_LEFT = 0x101,
+ NPU_SET_IFM_PAD_RIGHT = 0x102,
+ NPU_SET_IFM_PAD_BOTTOM = 0x103,
+ NPU_SET_IFM_DEPTH_M1 = 0x104,
+ NPU_SET_IFM_PRECISION = 0x105,
+ NPU_SET_IFM_BROADCAST = 0x108,
+ NPU_SET_IFM_WIDTH0_M1 = 0x10a,
+ NPU_SET_IFM_HEIGHT0_M1 = 0x10b,
+ NPU_SET_IFM_HEIGHT1_M1 = 0x10c,
+ NPU_SET_IFM_REGION = 0x10f,
+ NPU_SET_OFM_WIDTH_M1 = 0x111,
+ NPU_SET_OFM_HEIGHT_M1 = 0x112,
+ NPU_SET_OFM_DEPTH_M1 = 0x113,
+ NPU_SET_OFM_PRECISION = 0x114,
+ NPU_SET_OFM_WIDTH0_M1 = 0x11a,
+ NPU_SET_OFM_HEIGHT0_M1 = 0x11b,
+ NPU_SET_OFM_HEIGHT1_M1 = 0x11c,
+ NPU_SET_OFM_REGION = 0x11f,
+ NPU_SET_KERNEL_WIDTH_M1 = 0x120,
+ NPU_SET_KERNEL_HEIGHT_M1 = 0x121,
+ NPU_SET_KERNEL_STRIDE = 0x122,
+ NPU_SET_WEIGHT_REGION = 0x128,
+ NPU_SET_SCALE_REGION = 0x129,
+ NPU_SET_DMA0_SRC_REGION = 0x130,
+ NPU_SET_DMA0_DST_REGION = 0x131,
+ NPU_SET_DMA0_SIZE0 = 0x132,
+ NPU_SET_DMA0_SIZE1 = 0x133,
+ NPU_SET_IFM2_BROADCAST = 0x180,
+ NPU_SET_IFM2_PRECISION = 0x185,
+ NPU_SET_IFM2_WIDTH0_M1 = 0x18a,
+ NPU_SET_IFM2_HEIGHT0_M1 = 0x18b,
+ NPU_SET_IFM2_HEIGHT1_M1 = 0x18c,
+ NPU_SET_IFM2_REGION = 0x18f,
+ NPU_SET_IFM_BASE0 = 0x4000,
+ NPU_SET_IFM_BASE1 = 0x4001,
+ NPU_SET_IFM_BASE2 = 0x4002,
+ NPU_SET_IFM_BASE3 = 0x4003,
+ NPU_SET_IFM_STRIDE_X = 0x4004,
+ NPU_SET_IFM_STRIDE_Y = 0x4005,
+ NPU_SET_IFM_STRIDE_C = 0x4006,
+ NPU_SET_OFM_BASE0 = 0x4010,
+ NPU_SET_OFM_BASE1 = 0x4011,
+ NPU_SET_OFM_BASE2 = 0x4012,
+ NPU_SET_OFM_BASE3 = 0x4013,
+ NPU_SET_OFM_STRIDE_X = 0x4014,
+ NPU_SET_OFM_STRIDE_Y = 0x4015,
+ NPU_SET_OFM_STRIDE_C = 0x4016,
+ NPU_SET_WEIGHT_BASE = 0x4020,
+ NPU_SET_WEIGHT_LENGTH = 0x4021,
+ NPU_SET_SCALE_BASE = 0x4022,
+ NPU_SET_SCALE_LENGTH = 0x4023,
+ NPU_SET_DMA0_SRC = 0x4030,
+ NPU_SET_DMA0_DST = 0x4031,
+ NPU_SET_DMA0_LEN = 0x4032,
+ NPU_SET_DMA0_SRC_STRIDE0 = 0x4033,
+ NPU_SET_DMA0_SRC_STRIDE1 = 0x4034,
+ NPU_SET_DMA0_DST_STRIDE0 = 0x4035,
+ NPU_SET_DMA0_DST_STRIDE1 = 0x4036,
+ NPU_SET_IFM2_BASE0 = 0x4080,
+ NPU_SET_IFM2_BASE1 = 0x4081,
+ NPU_SET_IFM2_BASE2 = 0x4082,
+ NPU_SET_IFM2_BASE3 = 0x4083,
+ NPU_SET_IFM2_STRIDE_X = 0x4084,
+ NPU_SET_IFM2_STRIDE_Y = 0x4085,
+ NPU_SET_IFM2_STRIDE_C = 0x4086,
+ NPU_SET_WEIGHT1_BASE = 0x4090,
+ NPU_SET_WEIGHT1_LENGTH = 0x4091,
+ NPU_SET_SCALE1_BASE = 0x4092,
+ NPU_SET_WEIGHT2_BASE = 0x4092,
+ NPU_SET_SCALE1_LENGTH = 0x4093,
+ NPU_SET_WEIGHT2_LENGTH = 0x4093,
+ NPU_SET_WEIGHT3_BASE = 0x4094,
+ NPU_SET_WEIGHT3_LENGTH = 0x4095,
+};
+
+#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */
+
+/**
+ * struct ethosu_device - Ethosu device
+ */
+struct ethosu_device {
+ /** @base: Base drm_device. */
+ struct drm_device base;
+
+ /** @iomem: CPU mapping of the registers. */
+ void __iomem *regs;
+
+ void __iomem *sram;
+ struct gen_pool *srampool;
+ dma_addr_t sramphys;
+
+ struct clk_bulk_data *clks;
+ int num_clks;
+ int irq;
+
+ struct drm_ethosu_npu_info npu_info;
+
+ struct ethosu_job *in_flight_job;
+ /* For in_flight_job and ethosu_job_hw_submit() */
+ struct mutex job_lock;
+
+ /* For dma_fence */
+ spinlock_t fence_lock;
+
+ struct drm_gpu_scheduler sched;
+ /* For ethosu_job_do_push() */
+ struct mutex sched_lock;
+ u64 fence_context;
+ u64 emit_seqno;
+};
+
+#define to_ethosu_device(drm_dev) \
+ ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base))
+
+static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev)
+{
+ return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1;
+}
+
+#endif
diff --git a/drivers/accel/ethosu/ethosu_drv.c b/drivers/accel/ethosu/ethosu_drv.c
new file mode 100644
index 000000000000..e05a69bf5574
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_drv.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only or MIT
+// Copyright (C) 2025 Arm, Ltd.
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_utils.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_accel.h>
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_drv.h"
+#include "ethosu_device.h"
+#include "ethosu_gem.h"
+#include "ethosu_job.h"
+
+static int ethosu_ioctl_dev_query(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct ethosu_device *ethosudev = to_ethosu_device(ddev);
+ struct drm_ethosu_dev_query *args = data;
+
+ if (!args->pointer) {
+ switch (args->type) {
+ case DRM_ETHOSU_DEV_QUERY_NPU_INFO:
+ args->size = sizeof(ethosudev->npu_info);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (args->type) {
+ case DRM_ETHOSU_DEV_QUERY_NPU_INFO:
+ if (args->size < offsetofend(struct drm_ethosu_npu_info, sram_size))
+ return -EINVAL;
+ return copy_struct_to_user(u64_to_user_ptr(args->pointer),
+ args->size,
+ &ethosudev->npu_info,
+ sizeof(ethosudev->npu_info), NULL);
+ default:
+ return -EINVAL;
+ }
+}
+
+#define ETHOSU_BO_FLAGS DRM_ETHOSU_BO_NO_MMAP
+
+static int ethosu_ioctl_bo_create(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_create *args = data;
+ int cookie, ret;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ if (!args->size || (args->flags & ~ETHOSU_BO_FLAGS)) {
+ ret = -EINVAL;
+ goto out_dev_exit;
+ }
+
+ ret = ethosu_gem_create_with_handle(file, ddev, &args->size,
+ args->flags, &args->handle);
+
+out_dev_exit:
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_ioctl_bo_wait(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_wait *args = data;
+ int cookie, ret;
+ unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+ if (args->pad)
+ return -EINVAL;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ ret = drm_gem_dma_resv_wait(file, args->handle, true, timeout);
+
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_bo_mmap_offset *args = data;
+ struct drm_gem_object *obj;
+
+ if (args->pad)
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+ drm_gem_object_put(obj);
+ return 0;
+}
+
+static int ethosu_ioctl_cmdstream_bo_create(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_ethosu_cmdstream_bo_create *args = data;
+ int cookie, ret;
+
+ if (!drm_dev_enter(ddev, &cookie))
+ return -ENODEV;
+
+ if (!args->size || !args->data || args->pad || args->flags) {
+ ret = -EINVAL;
+ goto out_dev_exit;
+ }
+
+ args->flags |= DRM_ETHOSU_BO_NO_MMAP;
+
+ ret = ethosu_gem_cmdstream_create(file, ddev, args->size, args->data,
+ args->flags, &args->handle);
+
+out_dev_exit:
+ drm_dev_exit(cookie);
+ return ret;
+}
+
+static int ethosu_open(struct drm_device *ddev, struct drm_file *file)
+{
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ struct ethosu_file_priv __free(kfree) *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto err_put_mod;
+ }
+ priv->edev = to_ethosu_device(ddev);
+
+ ret = ethosu_job_open(priv);
+ if (ret)
+ goto err_put_mod;
+
+ file->driver_priv = no_free_ptr(priv);
+ return 0;
+
+err_put_mod:
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static void ethosu_postclose(struct drm_device *ddev, struct drm_file *file)
+{
+ ethosu_job_close(file->driver_priv);
+ kfree(file->driver_priv);
+ module_put(THIS_MODULE);
+}
+
+static const struct drm_ioctl_desc ethosu_drm_driver_ioctls[] = {
+#define ETHOSU_IOCTL(n, func, flags) \
+ DRM_IOCTL_DEF_DRV(ETHOSU_##n, ethosu_ioctl_##func, flags)
+
+ ETHOSU_IOCTL(DEV_QUERY, dev_query, 0),
+ ETHOSU_IOCTL(BO_CREATE, bo_create, 0),
+ ETHOSU_IOCTL(BO_WAIT, bo_wait, 0),
+ ETHOSU_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, 0),
+ ETHOSU_IOCTL(CMDSTREAM_BO_CREATE, cmdstream_bo_create, 0),
+ ETHOSU_IOCTL(SUBMIT, submit, 0),
+};
+
+DEFINE_DRM_ACCEL_FOPS(ethosu_drm_driver_fops);
+
+/*
+ * Ethosu driver version:
+ * - 1.0 - initial interface
+ */
+static const struct drm_driver ethosu_drm_driver = {
+ .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM,
+ .open = ethosu_open,
+ .postclose = ethosu_postclose,
+ .ioctls = ethosu_drm_driver_ioctls,
+ .num_ioctls = ARRAY_SIZE(ethosu_drm_driver_ioctls),
+ .fops = &ethosu_drm_driver_fops,
+ .name = "ethosu",
+ .desc = "Arm Ethos-U Accel driver",
+ .major = 1,
+ .minor = 0,
+
+ .gem_create_object = ethosu_gem_create_object,
+};
+
+#define U65_DRAM_AXI_LIMIT_CFG 0x1f3f0002
+#define U65_SRAM_AXI_LIMIT_CFG 0x1f3f00b0
+#define U85_AXI_EXT_CFG 0x00021f3f
+#define U85_AXI_SRAM_CFG 0x00021f3f
+#define U85_MEM_ATTR0_CFG 0x00000000
+#define U85_MEM_ATTR2_CFG 0x000000b7
+
+static int ethosu_reset(struct ethosu_device *ethosudev)
+{
+ int ret;
+ u32 reg;
+
+ writel_relaxed(RESET_PENDING_CSL, ethosudev->regs + NPU_REG_RESET);
+ ret = readl_poll_timeout(ethosudev->regs + NPU_REG_STATUS, reg,
+ !FIELD_GET(STATUS_RESET_STATUS, reg),
+ USEC_PER_MSEC, USEC_PER_SEC);
+ if (ret)
+ return ret;
+
+ if (!FIELD_GET(PROT_ACTIVE_CSL, readl_relaxed(ethosudev->regs + NPU_REG_PROT))) {
+ dev_warn(ethosudev->base.dev, "Could not reset to non-secure mode (PROT = %x)\n",
+ readl_relaxed(ethosudev->regs + NPU_REG_PROT));
+ }
+
+ /*
+ * Assign region 2 (SRAM) to AXI M0 (AXILIMIT0),
+ * everything else to AXI M1 (AXILIMIT2)
+ */
+ writel_relaxed(0x0000aa8a, ethosudev->regs + NPU_REG_REGIONCFG);
+ if (ethosu_is_u65(ethosudev)) {
+ writel_relaxed(U65_SRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT0);
+ writel_relaxed(U65_DRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT2);
+ } else {
+ writel_relaxed(U85_AXI_SRAM_CFG, ethosudev->regs + NPU_REG_AXI_SRAM);
+ writel_relaxed(U85_AXI_EXT_CFG, ethosudev->regs + NPU_REG_AXI_EXT);
+ writel_relaxed(U85_MEM_ATTR0_CFG, ethosudev->regs + NPU_REG_MEM_ATTR0); // SRAM
+ writel_relaxed(U85_MEM_ATTR2_CFG, ethosudev->regs + NPU_REG_MEM_ATTR2); // DRAM
+ }
+
+ if (ethosudev->sram)
+ memset_io(ethosudev->sram, 0, ethosudev->npu_info.sram_size);
+
+ return 0;
+}
+
+static int ethosu_device_resume(struct device *dev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_bulk_prepare_enable(ethosudev->num_clks, ethosudev->clks);
+ if (ret)
+ return ret;
+
+ ret = ethosu_reset(ethosudev);
+ if (!ret)
+ return 0;
+
+ clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks);
+ return ret;
+}
+
+static int ethosu_device_suspend(struct device *dev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(dev);
+
+ clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks);
+ return 0;
+}
+
+static int ethosu_sram_init(struct ethosu_device *ethosudev)
+{
+ ethosudev->npu_info.sram_size = 0;
+
+ ethosudev->srampool = of_gen_pool_get(ethosudev->base.dev->of_node, "sram", 0);
+ if (!ethosudev->srampool)
+ return 0;
+
+ ethosudev->npu_info.sram_size = gen_pool_size(ethosudev->srampool);
+
+ ethosudev->sram = (void __iomem *)gen_pool_dma_alloc(ethosudev->srampool,
+ ethosudev->npu_info.sram_size,
+ &ethosudev->sramphys);
+ if (!ethosudev->sram) {
+ dev_err(ethosudev->base.dev, "failed to allocate from SRAM pool\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int ethosu_init(struct ethosu_device *ethosudev)
+{
+ int ret;
+ u32 id, config;
+
+ ret = ethosu_device_resume(ethosudev->base.dev);
+ if (ret)
+ return ret;
+
+ pm_runtime_set_autosuspend_delay(ethosudev->base.dev, 50);
+ pm_runtime_use_autosuspend(ethosudev->base.dev);
+ ret = devm_pm_runtime_set_active_enabled(ethosudev->base.dev);
+ if (ret)
+ return ret;
+ pm_runtime_get_noresume(ethosudev->base.dev);
+
+ ethosudev->npu_info.id = id = readl_relaxed(ethosudev->regs + NPU_REG_ID);
+ ethosudev->npu_info.config = config = readl_relaxed(ethosudev->regs + NPU_REG_CONFIG);
+
+ ethosu_sram_init(ethosudev);
+
+ dev_info(ethosudev->base.dev,
+ "Ethos-U NPU, arch v%ld.%ld.%ld, rev r%ldp%ld, cmd stream ver%ld, %d MACs, %dKB SRAM\n",
+ FIELD_GET(ID_ARCH_MAJOR_MASK, id),
+ FIELD_GET(ID_ARCH_MINOR_MASK, id),
+ FIELD_GET(ID_ARCH_PATCH_MASK, id),
+ FIELD_GET(ID_VER_MAJOR_MASK, id),
+ FIELD_GET(ID_VER_MINOR_MASK, id),
+ FIELD_GET(CONFIG_CMD_STREAM_VER_MASK, config),
+ 1 << FIELD_GET(CONFIG_MACS_PER_CC_MASK, config),
+ ethosudev->npu_info.sram_size / 1024);
+
+ return 0;
+}
+
+static int ethosu_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct ethosu_device *ethosudev;
+
+ ethosudev = devm_drm_dev_alloc(&pdev->dev, &ethosu_drm_driver,
+ struct ethosu_device, base);
+ if (IS_ERR(ethosudev))
+ return -ENOMEM;
+ platform_set_drvdata(pdev, ethosudev);
+
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+
+ ethosudev->regs = devm_platform_ioremap_resource(pdev, 0);
+
+ ethosudev->num_clks = devm_clk_bulk_get_all(&pdev->dev, &ethosudev->clks);
+ if (ethosudev->num_clks < 0)
+ return ethosudev->num_clks;
+
+ ret = ethosu_job_init(ethosudev);
+ if (ret)
+ return ret;
+
+ ret = ethosu_init(ethosudev);
+ if (ret)
+ return ret;
+
+ ret = drm_dev_register(&ethosudev->base, 0);
+ if (ret)
+ pm_runtime_dont_use_autosuspend(ethosudev->base.dev);
+
+ pm_runtime_put_autosuspend(ethosudev->base.dev);
+ return ret;
+}
+
+static void ethosu_remove(struct platform_device *pdev)
+{
+ struct ethosu_device *ethosudev = dev_get_drvdata(&pdev->dev);
+
+ drm_dev_unregister(&ethosudev->base);
+ ethosu_job_fini(ethosudev);
+ if (ethosudev->sram)
+ gen_pool_free(ethosudev->srampool, (unsigned long)ethosudev->sram,
+ ethosudev->npu_info.sram_size);
+}
+
+static const struct of_device_id dt_match[] = {
+ { .compatible = "arm,ethos-u65" },
+ { .compatible = "arm,ethos-u85" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static DEFINE_RUNTIME_DEV_PM_OPS(ethosu_pm_ops,
+ ethosu_device_suspend,
+ ethosu_device_resume,
+ NULL);
+
+static struct platform_driver ethosu_driver = {
+ .probe = ethosu_probe,
+ .remove = ethosu_remove,
+ .driver = {
+ .name = "ethosu",
+ .pm = pm_ptr(&ethosu_pm_ops),
+ .of_match_table = dt_match,
+ },
+};
+module_platform_driver(ethosu_driver);
+
+MODULE_AUTHOR("Rob Herring <robh@kernel.org>");
+MODULE_DESCRIPTION("Arm Ethos-U Accel Driver");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/accel/ethosu/ethosu_drv.h b/drivers/accel/ethosu/ethosu_drv.h
new file mode 100644
index 000000000000..9e21dfe94184
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_drv.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright 2025 Arm, Ltd. */
+#ifndef __ETHOSU_DRV_H__
+#define __ETHOSU_DRV_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct ethosu_device;
+
+struct ethosu_file_priv {
+ struct ethosu_device *edev;
+ struct drm_sched_entity sched_entity;
+};
+
+#endif
diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c
new file mode 100644
index 000000000000..473b5f5d7514
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-only or MIT
+/* Copyright 2025 Arm, Ltd. */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_device.h"
+#include "ethosu_gem.h"
+
+static void ethosu_gem_free_object(struct drm_gem_object *obj)
+{
+ struct ethosu_gem_object *bo = to_ethosu_bo(obj);
+
+ kfree(bo->info);
+ drm_gem_free_mmap_offset(&bo->base.base);
+ drm_gem_dma_free(&bo->base);
+}
+
+static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct ethosu_gem_object *bo = to_ethosu_bo(obj);
+
+ /* Don't allow mmap on objects that have the NO_MMAP flag set. */
+ if (bo->flags & DRM_ETHOSU_BO_NO_MMAP)
+ return -EINVAL;
+
+ return drm_gem_dma_object_mmap(obj, vma);
+}
+
+static const struct drm_gem_object_funcs ethosu_gem_funcs = {
+ .free = ethosu_gem_free_object,
+ .print_info = drm_gem_dma_object_print_info,
+ .get_sg_table = drm_gem_dma_object_get_sg_table,
+ .vmap = drm_gem_dma_object_vmap,
+ .mmap = ethosu_gem_mmap,
+ .vm_ops = &drm_gem_dma_vm_ops,
+};
+
+/**
+ * ethosu_gem_create_object - Implementation of driver->gem_create_object.
+ * @ddev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size)
+{
+ struct ethosu_gem_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ obj->base.base.funcs = &ethosu_gem_funcs;
+ return &obj->base.base;
+}
+
+/**
+ * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_ethosu_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int ethosu_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ u64 *size, u32 flags, u32 *handle)
+{
+ struct drm_gem_dma_object *mem;
+ struct ethosu_gem_object *bo;
+ int ret;
+
+ mem = drm_gem_dma_create(ddev, *size);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ bo = to_ethosu_bo(&mem->base);
+ bo->flags = flags;
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file, &mem->base, handle);
+ if (!ret)
+ *size = bo->base.base.size;
+
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&mem->base);
+
+ return ret;
+}
+
+struct dma {
+ s8 region;
+ u64 len;
+ u64 offset;
+ s64 stride[2];
+};
+
+struct dma_state {
+ u16 size0;
+ u16 size1;
+ s8 mode;
+ struct dma src;
+ struct dma dst;
+};
+
+struct buffer {
+ u64 base;
+ u32 length;
+ s8 region;
+};
+
+struct feat_matrix {
+ u64 base[4];
+ s64 stride_x;
+ s64 stride_y;
+ s64 stride_c;
+ s8 region;
+ u8 broadcast;
+ u16 stride_kernel;
+ u16 precision;
+ u16 depth;
+ u16 width;
+ u16 width0;
+ u16 height[3];
+ u8 pad_top;
+ u8 pad_left;
+ u8 pad_bottom;
+ u8 pad_right;
+};
+
+struct cmd_state {
+ struct dma_state dma;
+ struct buffer scale[2];
+ struct buffer weight[4];
+ struct feat_matrix ofm;
+ struct feat_matrix ifm;
+ struct feat_matrix ifm2;
+};
+
+static void cmd_state_init(struct cmd_state *st)
+{
+ /* Initialize to all 1s to detect missing setup */
+ memset(st, 0xff, sizeof(*st));
+}
+
+static u64 cmd_to_addr(u32 *cmd)
+{
+ return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1];
+}
+
+static u64 dma_length(struct ethosu_validated_cmdstream_info *info,
+ struct dma_state *dma_st, struct dma *dma)
+{
+ s8 mode = dma_st->mode;
+ u64 len = dma->len;
+
+ if (mode >= 1) {
+ len += dma->stride[0];
+ len *= dma_st->size0;
+ }
+ if (mode == 2) {
+ len += dma->stride[1];
+ len *= dma_st->size1;
+ }
+ if (dma->region >= 0)
+ info->region_size[dma->region] = max(info->region_size[dma->region],
+ len + dma->offset);
+
+ return len;
+}
+
+static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info,
+ struct feat_matrix *fm,
+ u32 x, u32 y, u32 c)
+{
+ u32 element_size, storage = fm->precision >> 14;
+ int tile = 0;
+ u64 addr;
+
+ if (fm->region < 0)
+ return U64_MAX;
+
+ switch (storage) {
+ case 0:
+ if (x >= fm->width0 + 1) {
+ x -= fm->width0 + 1;
+ tile += 1;
+ }
+ if (y >= fm->height[tile] + 1) {
+ y -= fm->height[tile] + 1;
+ tile += 2;
+ }
+ break;
+ case 1:
+ if (y >= fm->height[1] + 1) {
+ y -= fm->height[1] + 1;
+ tile = 2;
+ } else if (y >= fm->height[0] + 1) {
+ y -= fm->height[0] + 1;
+ tile = 1;
+ }
+ break;
+ }
+ if (fm->base[tile] == U64_MAX)
+ return U64_MAX;
+
+ addr = fm->base[tile] + y * fm->stride_y;
+
+ switch ((fm->precision >> 6) & 0x3) { // format
+ case 0: //nhwc:
+ addr += x * fm->stride_x + c;
+ break;
+ case 1: //nhcwb16:
+ element_size = BIT((fm->precision >> 1) & 0x3);
+
+ addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size;
+ break;
+ }
+
+ info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1);
+
+ return addr;
+}
+
+static int calc_sizes(struct drm_device *ddev,
+ struct ethosu_validated_cmdstream_info *info,
+ u16 op, struct cmd_state *st,
+ bool ifm, bool ifm2, bool weight, bool scale)
+{
+ u64 len;
+
+ if (ifm) {
+ if (st->ifm.stride_kernel == U16_MAX)
+ return -EINVAL;
+ u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) +
+ ((st->ifm.stride_kernel >> 1) & 0x1) + 1;
+ u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) +
+ (st->ifm.stride_kernel & 0x1) + 1;
+ u32 ifm_height = st->ofm.height[2] * stride_y +
+ st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom);
+ u32 ifm_width = st->ofm.width * stride_x +
+ st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right);
+
+ len = feat_matrix_length(info, &st->ifm, ifm_width,
+ ifm_height, st->ifm.depth);
+ dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
+ op, st->ifm.region, st->ifm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (ifm2) {
+ len = feat_matrix_length(info, &st->ifm2, st->ifm.depth,
+ 0, st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
+ op, st->ifm2.region, st->ifm2.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (weight) {
+ dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n",
+ op, st->weight[0].region, st->weight[0].base,
+ st->weight[0].base + st->weight[0].length - 1);
+ if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX ||
+ st->weight[0].length == U32_MAX)
+ return -EINVAL;
+ info->region_size[st->weight[0].region] =
+ max(info->region_size[st->weight[0].region],
+ st->weight[0].base + st->weight[0].length);
+ }
+
+ if (scale) {
+ dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n",
+ op, st->scale[0].region, st->scale[0].base,
+ st->scale[0].base + st->scale[0].length - 1);
+ if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX ||
+ st->scale[0].length == U32_MAX)
+ return -EINVAL;
+ info->region_size[st->scale[0].region] =
+ max(info->region_size[st->scale[0].region],
+ st->scale[0].base + st->scale[0].length);
+ }
+
+ len = feat_matrix_length(info, &st->ofm, st->ofm.width,
+ st->ofm.height[2], st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
+ op, st->ofm.region, st->ofm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ info->output_region[st->ofm.region] = true;
+
+ return 0;
+}
+
+static int calc_sizes_elemwise(struct drm_device *ddev,
+ struct ethosu_validated_cmdstream_info *info,
+ u16 op, struct cmd_state *st,
+ bool ifm, bool ifm2)
+{
+ u32 height, width, depth;
+ u64 len;
+
+ if (ifm) {
+ height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2];
+ width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width;
+ depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth;
+
+ len = feat_matrix_length(info, &st->ifm, width,
+ height, depth);
+ dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
+ op, st->ifm.region, st->ifm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ if (ifm2) {
+ height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2];
+ width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width;
+ depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth;
+
+ len = feat_matrix_length(info, &st->ifm2, width,
+ height, depth);
+ dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
+ op, st->ifm2.region, st->ifm2.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ }
+
+ len = feat_matrix_length(info, &st->ofm, st->ofm.width,
+ st->ofm.height[2], st->ofm.depth);
+ dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
+ op, st->ofm.region, st->ofm.base[0], len);
+ if (len == U64_MAX)
+ return -EINVAL;
+ info->output_region[st->ofm.region] = true;
+
+ return 0;
+}
+
+static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev,
+ u32 __user *ucmds,
+ struct ethosu_gem_object *bo,
+ u32 size)
+{
+ struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL);
+ struct ethosu_device *edev = to_ethosu_device(ddev);
+ u32 *bocmds = bo->base.vaddr;
+ struct cmd_state st;
+ int i, ret;
+
+ if (!info)
+ return -ENOMEM;
+ info->cmd_size = size;
+
+ cmd_state_init(&st);
+
+ for (i = 0; i < size / 4; i++) {
+ bool use_ifm, use_ifm2, use_scale;
+ u64 dstlen, srclen;
+ u16 cmd, param;
+ u32 cmds[2];
+ u64 addr;
+
+ if (get_user(cmds[0], ucmds++))
+ return -EFAULT;
+
+ bocmds[i] = cmds[0];
+
+ cmd = cmds[0];
+ param = cmds[0] >> 16;
+
+ if (cmd & 0x4000) {
+ if (get_user(cmds[1], ucmds++))
+ return -EFAULT;
+
+ i++;
+ bocmds[i] = cmds[1];
+ addr = cmd_to_addr(cmds);
+ }
+
+ switch (cmd) {
+ case NPU_OP_DMA_START:
+ srclen = dma_length(info, &st.dma, &st.dma.src);
+ dstlen = dma_length(info, &st.dma, &st.dma.dst);
+
+ if (st.dma.dst.region >= 0)
+ info->output_region[st.dma.dst.region] = true;
+ dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n",
+ st.dma.src.region, st.dma.src.offset, srclen,
+ st.dma.dst.region, st.dma.dst.offset, dstlen);
+ break;
+ case NPU_OP_CONV:
+ case NPU_OP_DEPTHWISE:
+ use_ifm2 = param & 0x1; // weights_ifm2
+ use_scale = !(st.ofm.precision & 0x100);
+ ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2,
+ !use_ifm2, use_scale);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_POOL:
+ use_ifm = param != 0x4; // pooling mode
+ use_scale = !(st.ofm.precision & 0x100);
+ ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false,
+ false, use_scale);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_ELEMENTWISE:
+ use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) ||
+ (param == 6) || (param == 7) || (param == 0x24));
+ use_ifm = st.ifm.broadcast != 8;
+ ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);
+ if (ret)
+ return ret;
+ break;
+ case NPU_OP_RESIZE: // U85 only
+ WARN_ON(1); // TODO
+ break;
+ case NPU_SET_KERNEL_WIDTH_M1:
+ st.ifm.width = param;
+ break;
+ case NPU_SET_KERNEL_HEIGHT_M1:
+ st.ifm.height[2] = param;
+ break;
+ case NPU_SET_KERNEL_STRIDE:
+ st.ifm.stride_kernel = param;
+ break;
+ case NPU_SET_IFM_PAD_TOP:
+ st.ifm.pad_top = param & 0x7f;
+ break;
+ case NPU_SET_IFM_PAD_LEFT:
+ st.ifm.pad_left = param & 0x7f;
+ break;
+ case NPU_SET_IFM_PAD_RIGHT:
+ st.ifm.pad_right = param & 0xff;
+ break;
+ case NPU_SET_IFM_PAD_BOTTOM:
+ st.ifm.pad_bottom = param & 0xff;
+ break;
+ case NPU_SET_IFM_DEPTH_M1:
+ st.ifm.depth = param;
+ break;
+ case NPU_SET_IFM_PRECISION:
+ st.ifm.precision = param;
+ break;
+ case NPU_SET_IFM_BROADCAST:
+ st.ifm.broadcast = param;
+ break;
+ case NPU_SET_IFM_REGION:
+ st.ifm.region = param & 0x7f;
+ break;
+ case NPU_SET_IFM_WIDTH0_M1:
+ st.ifm.width0 = param;
+ break;
+ case NPU_SET_IFM_HEIGHT0_M1:
+ st.ifm.height[0] = param;
+ break;
+ case NPU_SET_IFM_HEIGHT1_M1:
+ st.ifm.height[1] = param;
+ break;
+ case NPU_SET_IFM_BASE0:
+ case NPU_SET_IFM_BASE1:
+ case NPU_SET_IFM_BASE2:
+ case NPU_SET_IFM_BASE3:
+ st.ifm.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_X:
+ st.ifm.stride_x = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_Y:
+ st.ifm.stride_y = addr;
+ break;
+ case NPU_SET_IFM_STRIDE_C:
+ st.ifm.stride_c = addr;
+ break;
+
+ case NPU_SET_OFM_WIDTH_M1:
+ st.ofm.width = param;
+ break;
+ case NPU_SET_OFM_HEIGHT_M1:
+ st.ofm.height[2] = param;
+ break;
+ case NPU_SET_OFM_DEPTH_M1:
+ st.ofm.depth = param;
+ break;
+ case NPU_SET_OFM_PRECISION:
+ st.ofm.precision = param;
+ break;
+ case NPU_SET_OFM_REGION:
+ st.ofm.region = param & 0x7;
+ break;
+ case NPU_SET_OFM_WIDTH0_M1:
+ st.ofm.width0 = param;
+ break;
+ case NPU_SET_OFM_HEIGHT0_M1:
+ st.ofm.height[0] = param;
+ break;
+ case NPU_SET_OFM_HEIGHT1_M1:
+ st.ofm.height[1] = param;
+ break;
+ case NPU_SET_OFM_BASE0:
+ case NPU_SET_OFM_BASE1:
+ case NPU_SET_OFM_BASE2:
+ case NPU_SET_OFM_BASE3:
+ st.ofm.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_X:
+ st.ofm.stride_x = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_Y:
+ st.ofm.stride_y = addr;
+ break;
+ case NPU_SET_OFM_STRIDE_C:
+ st.ofm.stride_c = addr;
+ break;
+
+ case NPU_SET_IFM2_BROADCAST:
+ st.ifm2.broadcast = param;
+ break;
+ case NPU_SET_IFM2_PRECISION:
+ st.ifm2.precision = param;
+ break;
+ case NPU_SET_IFM2_REGION:
+ st.ifm2.region = param & 0x7;
+ break;
+ case NPU_SET_IFM2_WIDTH0_M1:
+ st.ifm2.width0 = param;
+ break;
+ case NPU_SET_IFM2_HEIGHT0_M1:
+ st.ifm2.height[0] = param;
+ break;
+ case NPU_SET_IFM2_HEIGHT1_M1:
+ st.ifm2.height[1] = param;
+ break;
+ case NPU_SET_IFM2_BASE0:
+ case NPU_SET_IFM2_BASE1:
+ case NPU_SET_IFM2_BASE2:
+ case NPU_SET_IFM2_BASE3:
+ st.ifm2.base[cmd & 0x3] = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_X:
+ st.ifm2.stride_x = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_Y:
+ st.ifm2.stride_y = addr;
+ break;
+ case NPU_SET_IFM2_STRIDE_C:
+ st.ifm2.stride_c = addr;
+ break;
+
+ case NPU_SET_WEIGHT_REGION:
+ st.weight[0].region = param & 0x7;
+ break;
+ case NPU_SET_SCALE_REGION:
+ st.scale[0].region = param & 0x7;
+ break;
+ case NPU_SET_WEIGHT_BASE:
+ st.weight[0].base = addr;
+ break;
+ case NPU_SET_WEIGHT_LENGTH:
+ st.weight[0].length = cmds[1];
+ break;
+ case NPU_SET_SCALE_BASE:
+ st.scale[0].base = addr;
+ break;
+ case NPU_SET_SCALE_LENGTH:
+ st.scale[0].length = cmds[1];
+ break;
+ case NPU_SET_WEIGHT1_BASE:
+ st.weight[1].base = addr;
+ break;
+ case NPU_SET_WEIGHT1_LENGTH:
+ st.weight[1].length = cmds[1];
+ break;
+ case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85)
+ if (ethosu_is_u65(edev))
+ st.scale[1].base = addr;
+ else
+ st.weight[2].base = addr;
+ break;
+ case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85)
+ if (ethosu_is_u65(edev))
+ st.scale[1].length = cmds[1];
+ else
+ st.weight[1].length = cmds[1];
+ break;
+ case NPU_SET_WEIGHT3_BASE:
+ st.weight[3].base = addr;
+ break;
+ case NPU_SET_WEIGHT3_LENGTH:
+ st.weight[3].length = cmds[1];
+ break;
+
+ case NPU_SET_DMA0_SRC_REGION:
+ if (param & 0x100)
+ st.dma.src.region = -1;
+ else
+ st.dma.src.region = param & 0x7;
+ st.dma.mode = (param >> 9) & 0x3;
+ break;
+ case NPU_SET_DMA0_DST_REGION:
+ if (param & 0x100)
+ st.dma.dst.region = -1;
+ else
+ st.dma.dst.region = param & 0x7;
+ break;
+ case NPU_SET_DMA0_SIZE0:
+ st.dma.size0 = param;
+ break;
+ case NPU_SET_DMA0_SIZE1:
+ st.dma.size1 = param;
+ break;
+ case NPU_SET_DMA0_SRC_STRIDE0:
+ st.dma.src.stride[0] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_SRC_STRIDE1:
+ st.dma.src.stride[1] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_DST_STRIDE0:
+ st.dma.dst.stride[0] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_DST_STRIDE1:
+ st.dma.dst.stride[1] = ((s64)addr << 24) >> 24;
+ break;
+ case NPU_SET_DMA0_SRC:
+ st.dma.src.offset = addr;
+ break;
+ case NPU_SET_DMA0_DST:
+ st.dma.dst.offset = addr;
+ break;
+ case NPU_SET_DMA0_LEN:
+ st.dma.src.len = st.dma.dst.len = addr;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < NPU_BASEP_REGION_MAX; i++) {
+ if (!info->region_size[i])
+ continue;
+ dev_dbg(ddev->dev, "region %d max size: 0x%llx\n",
+ i, info->region_size[i]);
+ }
+
+ bo->info = no_free_ptr(info);
+ return 0;
+}
+
+/**
+ * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_ethosu_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int ethosu_gem_cmdstream_create(struct drm_file *file,
+ struct drm_device *ddev,
+ u32 size, u64 data, u32 flags, u32 *handle)
+{
+ int ret;
+ struct drm_gem_dma_object *mem;
+ struct ethosu_gem_object *bo;
+
+ mem = drm_gem_dma_create(ddev, size);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ bo = to_ethosu_bo(&mem->base);
+ bo->flags = flags;
+
+ ret = ethosu_gem_cmdstream_copy_and_validate(ddev,
+ (void __user *)(uintptr_t)data,
+ bo, size);
+ if (ret)
+ goto fail;
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file, &mem->base, handle);
+
+fail:
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&mem->base);
+
+ return ret;
+}
diff --git a/drivers/accel/ethosu/ethosu_gem.h b/drivers/accel/ethosu/ethosu_gem.h
new file mode 100644
index 000000000000..3922895a60fb
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_gem.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_GEM_H__
+#define __ETHOSU_GEM_H__
+
+#include "ethosu_device.h"
+#include <drm/drm_gem_dma_helper.h>
+
+struct ethosu_validated_cmdstream_info {
+ u32 cmd_size;
+ u64 region_size[NPU_BASEP_REGION_MAX];
+ bool output_region[NPU_BASEP_REGION_MAX];
+};
+
+/**
+ * struct ethosu_gem_object - Driver specific GEM object.
+ */
+struct ethosu_gem_object {
+ /** @base: Inherit from drm_gem_shmem_object. */
+ struct drm_gem_dma_object base;
+
+ struct ethosu_validated_cmdstream_info *info;
+
+ /** @flags: Combination of drm_ethosu_bo_flags flags. */
+ u32 flags;
+};
+
+static inline
+struct ethosu_gem_object *to_ethosu_bo(struct drm_gem_object *obj)
+{
+ return container_of(to_drm_gem_dma_obj(obj), struct ethosu_gem_object, base);
+}
+
+struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev,
+ size_t size);
+
+int ethosu_gem_create_with_handle(struct drm_file *file,
+ struct drm_device *ddev,
+ u64 *size, u32 flags, uint32_t *handle);
+
+int ethosu_gem_cmdstream_create(struct drm_file *file,
+ struct drm_device *ddev,
+ u32 size, u64 data, u32 flags, u32 *handle);
+
+#endif /* __ETHOSU_GEM_H__ */
diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c
new file mode 100644
index 000000000000..26e7a2f64d71
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_job.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+/* Copyright 2025 Arm, Ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/genalloc.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_print.h>
+#include <drm/ethosu_accel.h>
+
+#include "ethosu_device.h"
+#include "ethosu_drv.h"
+#include "ethosu_gem.h"
+#include "ethosu_job.h"
+
+#define JOB_TIMEOUT_MS 500
+
+static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct ethosu_job, base);
+}
+
+static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "ethosu";
+}
+
+static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
+{
+ return "ethosu-npu";
+}
+
+static const struct dma_fence_ops ethosu_fence_ops = {
+ .get_driver_name = ethosu_fence_get_driver_name,
+ .get_timeline_name = ethosu_fence_get_timeline_name,
+};
+
+static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
+{
+ struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
+ struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (int i = 0; i < job->region_cnt; i++) {
+ struct drm_gem_dma_object *bo;
+ int region = job->region_bo_num[i];
+
+ bo = to_drm_gem_dma_obj(job->region_bo[i]);
+ writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
+ writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
+ dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
+ }
+
+ if (job->sram_size) {
+ writel_relaxed(lower_32_bits(dev->sramphys),
+ dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
+ writel_relaxed(upper_32_bits(dev->sramphys),
+ dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
+ dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
+ ETHOSU_SRAM_REGION, &dev->sramphys);
+ }
+
+ writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
+ writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
+ writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
+
+ writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD);
+
+ dev_dbg(dev->base.dev,
+ "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
+}
+
+static int ethosu_acquire_object_fences(struct ethosu_job *job)
+{
+ int i, ret;
+ struct drm_gem_object **bos = job->region_bo;
+ struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (i = 0; i < job->region_cnt; i++) {
+ bool is_write;
+
+ if (!bos[i])
+ break;
+
+ ret = dma_resv_reserve_fences(bos[i]->resv, 1);
+ if (ret)
+ return ret;
+
+ is_write = info->output_region[job->region_bo_num[i]];
+ ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i],
+ is_write);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ethosu_attach_object_fences(struct ethosu_job *job)
+{
+ int i;
+ struct dma_fence *fence = job->inference_done_fence;
+ struct drm_gem_object **bos = job->region_bo;
+ struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
+
+ for (i = 0; i < job->region_cnt; i++)
+ if (info->output_region[job->region_bo_num[i]])
+ dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
+}
+
+static int ethosu_job_push(struct ethosu_job *job)
+{
+ struct ww_acquire_ctx acquire_ctx;
+ int ret;
+
+ ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
+ if (ret)
+ return ret;
+
+ ret = ethosu_acquire_object_fences(job);
+ if (ret)
+ goto out;
+
+ ret = pm_runtime_resume_and_get(job->dev->base.dev);
+ if (!ret) {
+ guard(mutex)(&job->dev->sched_lock);
+
+ drm_sched_job_arm(&job->base);
+ job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
+ kref_get(&job->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&job->base);
+ ethosu_attach_object_fences(job);
+ }
+
+out:
+ drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
+ return ret;
+}
+
+static void ethosu_job_cleanup(struct kref *ref)
+{
+ struct ethosu_job *job = container_of(ref, struct ethosu_job,
+ refcount);
+ unsigned int i;
+
+ pm_runtime_put_autosuspend(job->dev->base.dev);
+
+ dma_fence_put(job->done_fence);
+ dma_fence_put(job->inference_done_fence);
+
+ for (i = 0; i < job->region_cnt; i++)
+ drm_gem_object_put(job->region_bo[i]);
+
+ drm_gem_object_put(job->cmd_bo);
+
+ kfree(job);
+}
+
+static void ethosu_job_put(struct ethosu_job *job)
+{
+ kref_put(&job->refcount, ethosu_job_cleanup);
+}
+
+static void ethosu_job_free(struct drm_sched_job *sched_job)
+{
+ struct ethosu_job *job = to_ethosu_job(sched_job);
+
+ drm_sched_job_cleanup(sched_job);
+ ethosu_job_put(job);
+}
+
+static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
+{
+ struct ethosu_job *job = to_ethosu_job(sched_job);
+ struct ethosu_device *dev = job->dev;
+ struct dma_fence *fence = job->done_fence;
+
+ if (unlikely(job->base.s_fence->finished.error))
+ return NULL;
+
+ dma_fence_init(fence, &ethosu_fence_ops, &dev->fence_lock,
+ dev->fence_context, ++dev->emit_seqno);
+ dma_fence_get(fence);
+
+ scoped_guard(mutex, &dev->job_lock) {
+ dev->in_flight_job = job;
+ ethosu_job_hw_submit(dev, job);
+ }
+
+ return fence;
+}
+
+static void ethosu_job_handle_irq(struct ethosu_device *dev)
+{
+ u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
+
+ if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
+ dev_err(dev->base.dev, "Error IRQ - %x\n", status);
+ drm_sched_fault(&dev->sched);
+ return;
+ }
+
+ scoped_guard(mutex, &dev->job_lock) {
+ if (dev->in_flight_job) {
+ dma_fence_signal(dev->in_flight_job->done_fence);
+ dev->in_flight_job = NULL;
+ }
+ }
+}
+
+static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
+{
+ struct ethosu_device *dev = data;
+
+ ethosu_job_handle_irq(dev);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
+{
+ struct ethosu_device *dev = data;
+ u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
+
+ if (!(status & STATUS_IRQ_RAISED))
+ return IRQ_NONE;
+
+ writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
+ return IRQ_WAKE_THREAD;
+}
+
+static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
+{
+ struct ethosu_job *job = to_ethosu_job(bad);
+ struct ethosu_device *dev = job->dev;
+ bool running;
+ u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
+ u32 cmdaddr;
+
+ cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
+ running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
+
+ if (running) {
+ int ret;
+ u32 reg;
+
+ ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
+ reg,
+ reg != cmdaddr,
+ USEC_PER_MSEC, 100 * USEC_PER_MSEC);
+
+ /* If still running and progress is being made, just return */
+ if (!ret)
+ return DRM_GPU_SCHED_STAT_NO_HANG;
+ }
+
+ dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
+ running ? "running" : "stopped",
+ cmdaddr, bocmds[cmdaddr / 4]);
+
+ drm_sched_stop(&dev->sched, bad);
+
+ scoped_guard(mutex, &dev->job_lock)
+ dev->in_flight_job = NULL;
+
+ /* Proceed with reset now. */
+ pm_runtime_force_suspend(dev->base.dev);
+ pm_runtime_force_resume(dev->base.dev);
+
+ /* Restart the scheduler */
+ drm_sched_start(&dev->sched, 0);
+
+ return DRM_GPU_SCHED_STAT_RESET;
+}
+
+static const struct drm_sched_backend_ops ethosu_sched_ops = {
+ .run_job = ethosu_job_run,
+ .timedout_job = ethosu_job_timedout,
+ .free_job = ethosu_job_free
+};
+
+int ethosu_job_init(struct ethosu_device *edev)
+{
+ struct device *dev = edev->base.dev;
+ struct drm_sched_init_args args = {
+ .ops = &ethosu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .credit_limit = 1,
+ .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .name = dev_name(dev),
+ .dev = dev,
+ };
+ int ret;
+
+ spin_lock_init(&edev->fence_lock);
+ ret = devm_mutex_init(dev, &edev->job_lock);
+ if (ret)
+ return ret;
+ ret = devm_mutex_init(dev, &edev->sched_lock);
+ if (ret)
+ return ret;
+
+ edev->irq = platform_get_irq(to_platform_device(dev), 0);
+ if (edev->irq < 0)
+ return edev->irq;
+
+ ret = devm_request_threaded_irq(dev, edev->irq,
+ ethosu_job_irq_handler,
+ ethosu_job_irq_handler_thread,
+ IRQF_SHARED, KBUILD_MODNAME,
+ edev);
+ if (ret) {
+ dev_err(dev, "failed to request irq\n");
+ return ret;
+ }
+
+ edev->fence_context = dma_fence_context_alloc(1);
+
+ ret = drm_sched_init(&edev->sched, &args);
+ if (ret) {
+ dev_err(dev, "Failed to create scheduler: %d\n", ret);
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ drm_sched_fini(&edev->sched);
+ return ret;
+}
+
+void ethosu_job_fini(struct ethosu_device *dev)
+{
+ drm_sched_fini(&dev->sched);
+}
+
+int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
+{
+ struct ethosu_device *dev = ethosu_priv->edev;
+ struct drm_gpu_scheduler *sched = &dev->sched;
+ int ret;
+
+ ret = drm_sched_entity_init(&ethosu_priv->sched_entity,
+ DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ return WARN_ON(ret);
+}
+
+void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
+{
+ struct drm_sched_entity *entity = &ethosu_priv->sched_entity;
+
+ drm_sched_entity_destroy(entity);
+}
+
+static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
+ struct drm_ethosu_job *job)
+{
+ struct ethosu_device *edev = to_ethosu_device(dev);
+ struct ethosu_file_priv *file_priv = file->driver_priv;
+ struct ethosu_job *ejob = NULL;
+ struct ethosu_validated_cmdstream_info *cmd_info;
+ int ret = 0;
+
+ /* BO region 2 is reserved if SRAM is used */
+ if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
+ return -EINVAL;
+
+ if (edev->npu_info.sram_size < job->sram_size)
+ return -EINVAL;
+
+ ejob = kzalloc(sizeof(*ejob), GFP_KERNEL);
+ if (!ejob)
+ return -ENOMEM;
+
+ kref_init(&ejob->refcount);
+
+ ejob->dev = edev;
+ ejob->sram_size = job->sram_size;
+
+ ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL);
+ if (!ejob->done_fence) {
+ ret = -ENOMEM;
+ goto out_cleanup_job;
+ }
+
+ ret = drm_sched_job_init(&ejob->base,
+ &file_priv->sched_entity,
+ 1, NULL, file->client_id);
+ if (ret)
+ goto out_put_job;
+
+ ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo);
+ if (!ejob->cmd_bo) {
+ ret = -ENOENT;
+ goto out_cleanup_job;
+ }
+ cmd_info = to_ethosu_bo(ejob->cmd_bo)->info;
+ if (!cmd_info) {
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
+ struct drm_gem_object *gem;
+
+ /* Can only omit a BO handle if the region is not used or used for SRAM */
+ if (!job->region_bo_handles[i] &&
+ (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size)))
+ continue;
+
+ if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
+ dev_err(dev->dev,
+ "Cmdstream BO handle %d set for unused region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ gem = drm_gem_object_lookup(file, job->region_bo_handles[i]);
+ if (!gem) {
+ dev_err(dev->dev,
+ "Invalid BO handle %d for region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -ENOENT;
+ goto out_cleanup_job;
+ }
+
+ ejob->region_bo[ejob->region_cnt] = gem;
+ ejob->region_bo_num[ejob->region_cnt] = i;
+ ejob->region_cnt++;
+
+ if (to_ethosu_bo(gem)->info) {
+ dev_err(dev->dev,
+ "Cmdstream BO handle %d used for region %d\n",
+ job->region_bo_handles[i], i);
+ ret = -EINVAL;
+ goto out_cleanup_job;
+ }
+
+ /* Verify the command stream doesn't have accesses outside the BO */
+ if (cmd_info->region_size[i] > gem->size) {
+ dev_err(dev->dev,
+ "cmd stream region %d size greater than BO size (%llu > %zu)\n",
+ i, cmd_info->region_size[i], gem->size);
+ ret = -EOVERFLOW;
+ goto out_cleanup_job;
+ }
+ }
+ ret = ethosu_job_push(ejob);
+
+out_cleanup_job:
+ if (ret)
+ drm_sched_job_cleanup(&ejob->base);
+out_put_job:
+ ethosu_job_put(ejob);
+
+ return ret;
+}
+
+int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ethosu_submit *args = data;
+ int ret = 0;
+ unsigned int i = 0;
+
+ if (args->pad) {
+ drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
+ return -EINVAL;
+ }
+
+ struct drm_ethosu_job __free(kvfree) *jobs =
+ kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
+ if (!jobs)
+ return -ENOMEM;
+
+ if (copy_from_user(jobs,
+ (void __user *)(uintptr_t)args->jobs,
+ args->job_count * sizeof(*jobs))) {
+ drm_dbg(dev, "Failed to copy incoming job array\n");
+ return -EFAULT;
+ }
+
+ for (i = 0; i < args->job_count; i++) {
+ ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/accel/ethosu/ethosu_job.h b/drivers/accel/ethosu/ethosu_job.h
new file mode 100644
index 000000000000..ff1cf448d094
--- /dev/null
+++ b/drivers/accel/ethosu/ethosu_job.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+/* Copyright 2025 Arm, Ltd. */
+
+#ifndef __ETHOSU_JOB_H__
+#define __ETHOSU_JOB_H__
+
+#include <linux/kref.h>
+#include <drm/gpu_scheduler.h>
+
+struct ethosu_device;
+struct ethosu_file_priv;
+
+struct ethosu_job {
+ struct drm_sched_job base;
+ struct ethosu_device *dev;
+
+ struct drm_gem_object *cmd_bo;
+ struct drm_gem_object *region_bo[NPU_BASEP_REGION_MAX];
+ u8 region_bo_num[NPU_BASEP_REGION_MAX];
+ u8 region_cnt;
+ u32 sram_size;
+
+ /* Fence to be signaled by drm-sched once its done with the job */
+ struct dma_fence *inference_done_fence;
+
+ /* Fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ struct kref refcount;
+};
+
+int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file);
+
+int ethosu_job_init(struct ethosu_device *dev);
+void ethosu_job_fini(struct ethosu_device *dev);
+int ethosu_job_open(struct ethosu_file_priv *ethosu_priv);
+void ethosu_job_close(struct ethosu_file_priv *ethosu_priv);
+
+#endif
diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig
index be85336107f9..6d1506acbd72 100644
--- a/drivers/accel/habanalabs/Kconfig
+++ b/drivers/accel/habanalabs/Kconfig
@@ -6,7 +6,7 @@
config DRM_ACCEL_HABANALABS
tristate "HabanaLabs AI accelerators"
depends on DRM_ACCEL
- depends on X86_64
+ depends on X86 && X86_64
depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR
select HWMON
@@ -27,3 +27,26 @@ config DRM_ACCEL_HABANALABS
To compile this driver as a module, choose M here: the
module will be called habanalabs.
+
+if DRM_ACCEL_HABANALABS
+
+config HL_HLDIO
+ bool "Habanalabs NVMe Direct I/O (HLDIO)"
+ depends on PCI_P2PDMA
+ depends on BLOCK
+ help
+ Enable NVMe peer-to-peer direct I/O support for Habanalabs AI
+ accelerators.
+
+ This allows direct data transfers between NVMe storage devices
+ and Habanalabs accelerators without involving system memory,
+ using PCI peer-to-peer DMA capabilities.
+
+ Requirements:
+ - CONFIG_PCI_P2PDMA=y
+ - NVMe device and Habanalabs accelerator under same PCI root complex
+ - IOMMU disabled or in passthrough mode
+ - Hardware supporting PCI P2P DMA
+
+ If unsure, say N
+endif # DRM_ACCEL_HABANALABS
diff --git a/drivers/accel/habanalabs/common/Makefile b/drivers/accel/habanalabs/common/Makefile
index e6abffea9f87..b6d00de09db5 100644
--- a/drivers/accel/habanalabs/common/Makefile
+++ b/drivers/accel/habanalabs/common/Makefile
@@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_submission.o common/firmware_if.o \
common/security.o common/state_dump.o \
common/memory_mgr.o common/decoder.o
+
+# Conditionally add HLDIO support
+ifdef CONFIG_HL_HLDIO
+HL_COMMON_FILES += common/hldio.o
+endif \ No newline at end of file
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 39e23d625a3c..dee487724918 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -2586,7 +2586,7 @@ int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
cs_seq = args->in.seq;
timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
- ? msecs_to_jiffies(args->in.timeout * 1000)
+ ? secs_to_jiffies(args->in.timeout)
: hpriv->hdev->timeout_jiffies;
switch (cs_type) {
@@ -3284,12 +3284,6 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
/* In case the node already registered, need to unregister first then re-use */
if (req_offset_record->ts_reg_info.in_use) {
- dev_dbg(data->buf->mmg->dev,
- "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
- req_offset_record,
- req_offset_record->ts_reg_info.interrupt->interrupt_id,
- req_offset_record->ts_reg_info.timestamp_kernel_addr,
- data->interrupt->interrupt_id);
/*
* Since interrupt here can be different than the one the node currently registered
* on, and we don't want to lock two lists while we're doing unregister, so
@@ -3345,10 +3339,6 @@ static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx
goto put_cq_cb;
}
- dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
- data->interrupt->interrupt_id, data->ts_handle,
- data->ts_offset, data->cq_offset);
-
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
if (!data->buf) {
rc = -EINVAL;
@@ -3370,9 +3360,6 @@ static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx
if (*pend->cq_kernel_addr >= data->target_value) {
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
- dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
- pend, data->ts_offset, data->interrupt->interrupt_id);
-
pend->ts_reg_info.in_use = 0;
*status = HL_WAIT_CS_STATUS_COMPLETED;
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index b83141f58319..9f212b17611a 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -199,7 +199,6 @@ out_err:
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
- char task_comm[TASK_COMM_LEN];
int rc = 0, i;
ctx->hdev = hdev;
@@ -272,7 +271,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
- get_task_comm(task_comm, current), ctx->asid);
+ current->comm, ctx->asid);
}
return 0;
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index b1c88d1837d9..5f0820b19ccb 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -6,6 +6,7 @@
*/
#include "habanalabs.h"
+#include "hldio.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include <linux/pci.h>
@@ -42,9 +43,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_reg = i2c_reg;
pkt.i2c_len = i2c_len;
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, val);
- if (rc)
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val);
+ if (rc && rc != -EAGAIN)
dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
return rc;
@@ -75,10 +75,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_len = i2c_len;
pkt.value = cpu_to_le64(val);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, NULL);
-
- if (rc)
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ if (rc && rc != -EAGAIN)
dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
return rc;
@@ -99,10 +97,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
pkt.led_index = cpu_to_le32(led);
pkt.value = cpu_to_le64(state);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, NULL);
-
- if (rc)
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ if (rc && rc != -EAGAIN)
dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
}
@@ -607,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
+#ifdef CONFIG_HL_HLDIO
+/* DIO debugfs functions following the standard pattern */
+static int dio_ssd2hl_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ if (!hdev->asic_prop.supports_nvme) {
+ seq_puts(s, "NVMe Direct I/O not supported\\n");
+ return 0;
+ }
+
+ seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");
+ seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);
+ seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");
+
+ return 0;
+}
+
+static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct seq_file *s = file->private_data;
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+ struct hl_ctx *ctx = hdev->kernel_ctx;
+ char kbuf[128];
+ u64 device_va = 0, off_bytes = 0, len_bytes = 0;
+ u32 fd = 0;
+ size_t len_read = 0;
+ int rc, parsed;
+
+ if (!hdev->asic_prop.supports_nvme)
+ return -EOPNOTSUPP;
+
+ if (count >= sizeof(kbuf))
+ return -EINVAL;
+
+ if (copy_from_user(kbuf, buf, count))
+ return -EFAULT;
+
+ kbuf[count] = 0;
+
+ /* Parse: fd=N va=0xADDR off=N len=N */
+ parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",
+ &fd, &device_va, &off_bytes, &len_bytes);
+ if (parsed != 4) {
+ dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");
+ return -EINVAL;
+ }
+
+ /* Validate file descriptor */
+ if (fd == 0) {
+ dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);
+ return -EINVAL;
+ }
+
+ /* Validate alignment requirements */
+ if (!IS_ALIGNED(device_va, PAGE_SIZE) ||
+ !IS_ALIGNED(off_bytes, PAGE_SIZE) ||
+ !IS_ALIGNED(len_bytes, PAGE_SIZE)) {
+ dev_err(hdev->dev,
+ "All parameters must be page-aligned (4KB)\\n");
+ return -EINVAL;
+ }
+
+ /* Validate transfer size */
+ if (len_bytes == 0 || len_bytes > SZ_1G) {
+ dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",
+ len_bytes);
+ return -EINVAL;
+ }
+
+ dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",
+ fd, device_va, off_bytes, len_bytes);
+
+ rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);
+ if (rc < 0) {
+ dev_entry->dio_stats.failed_ops++;
+ dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);
+ return rc;
+ }
+
+ /* Update statistics */
+ dev_entry->dio_stats.total_ops++;
+ dev_entry->dio_stats.successful_ops++;
+ dev_entry->dio_stats.bytes_transferred += len_read;
+ dev_entry->dio_stats.last_len_read = len_read;
+
+ dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);
+
+ return count;
+}
+
+static int dio_hl2ssd_show(struct seq_file *s, void *data)
+{
+ seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");
+ return 0;
+}
+
+static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct seq_file *s = file->private_data;
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ if (!hdev->asic_prop.supports_nvme)
+ return -EOPNOTSUPP;
+
+ dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");
+ return -EOPNOTSUPP;
+}
+
+static int dio_stats_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+ struct hl_dio_stats *stats = &dev_entry->dio_stats;
+ u64 avg_bytes_per_op = 0, success_rate = 0;
+
+ if (!hdev->asic_prop.supports_nvme) {
+ seq_puts(s, "NVMe Direct I/O not supported\\n");
+ return 0;
+ }
+
+ if (stats->successful_ops > 0)
+ avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;
+
+ if (stats->total_ops > 0)
+ success_rate = (stats->successful_ops * 100) / stats->total_ops;
+
+ seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");
+ seq_printf(s, "Total operations: %llu\\n", stats->total_ops);
+ seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops);
+ seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops);
+ seq_printf(s, "Success rate: %llu%%\\n", success_rate);
+ seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred);
+ seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op);
+ seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read);
+
+ return 0;
+}
+
+static int dio_reset_show(struct seq_file *s, void *data)
+{
+ seq_puts(s, "Write '1' to reset DIO statistics\\n");
+ return 0;
+}
+
+static ssize_t dio_reset_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct seq_file *s = file->private_data;
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+ char kbuf[8];
+ unsigned long val;
+ int rc;
+
+ if (!hdev->asic_prop.supports_nvme)
+ return -EOPNOTSUPP;
+
+ if (count >= sizeof(kbuf))
+ return -EINVAL;
+
+ if (copy_from_user(kbuf, buf, count))
+ return -EFAULT;
+
+ kbuf[count] = 0;
+
+ rc = kstrtoul(kbuf, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val == 1) {
+ memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
+ dev_dbg(hdev->dev, "DIO statistics reset\\n");
+ } else {
+ dev_err(hdev->dev, "Write '1' to reset statistics\\n");
+ return -EINVAL;
+ }
+
+ return count;
+}
+#endif
+
static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -793,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
}
}
+static void dump_cfg_access_entry(struct hl_device *hdev,
+ struct hl_debugfs_cfg_access_entry *entry)
+{
+ char *access_type = "";
+ struct tm tm;
+
+ switch (entry->debugfs_type) {
+ case DEBUGFS_READ32:
+ access_type = "READ32 from";
+ break;
+ case DEBUGFS_WRITE32:
+ access_type = "WRITE32 to";
+ break;
+ case DEBUGFS_READ64:
+ access_type = "READ64 from";
+ break;
+ case DEBUGFS_WRITE64:
+ access_type = "WRITE64 to";
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);
+ return;
+ }
+
+ time64_to_tm(entry->seconds_since_epoch, 0, &tm);
+ dev_info(hdev->dev,
+ "%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,
+ tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);
+}
+
+void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
+{
+ struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;
+ u32 i, head, count = 0;
+ time64_t entry_time, now;
+ unsigned long flags;
+
+ now = ktime_get_real_seconds();
+
+ spin_lock_irqsave(&dbgfs->lock, flags);
+ head = dbgfs->head;
+ if (head == 0)
+ i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
+ else
+ i = head - 1;
+
+ /* Walk back until timeout or invalid entry */
+ while (dbgfs->cfg_access_list[i].valid) {
+ entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;
+ /* Stop when entry is older than timeout */
+ if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)
+ break;
+
+ /* print single entry under lock */
+ {
+ struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];
+ /*
+ * We copy the entry out under lock and then print after
+ * releasing the lock to minimize time under lock.
+ */
+ spin_unlock_irqrestore(&dbgfs->lock, flags);
+ dump_cfg_access_entry(hdev, &entry);
+ spin_lock_irqsave(&dbgfs->lock, flags);
+ }
+
+ /* mark consumed */
+ dbgfs->cfg_access_list[i].valid = false;
+
+ if (i == 0)
+ i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
+ else
+ i--;
+ count++;
+ if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)
+ break;
+ }
+ spin_unlock_irqrestore(&dbgfs->lock, flags);
+}
+
+static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,
+ enum debugfs_access_type access_type)
+{
+ struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;
+ struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];
+ struct hl_debugfs_cfg_access_entry *new_entry;
+ unsigned long flags;
+
+ /* Check if address is in config memory */
+ if (addr >= mem_reg->region_base &&
+ mem_reg->region_size >= access_size &&
+ addr <= mem_reg->region_base + mem_reg->region_size - access_size) {
+
+ spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);
+
+ new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];
+ new_entry->seconds_since_epoch = ktime_get_real_seconds();
+ new_entry->addr = addr;
+ new_entry->debugfs_type = access_type;
+ new_entry->valid = true;
+ dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)
+ % HL_DBGFS_CFG_ACCESS_HIST_LEN;
+
+ spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);
+
+ }
+}
+
static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type)
{
@@ -810,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
return rc;
}
+ check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);
rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
if (rc) {
dev_err(hdev->dev,
@@ -1408,7 +1704,7 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
return rc;
if (value)
- hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
+ hdev->timeout_jiffies = secs_to_jiffies(value);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
@@ -1530,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"mmu", mmu_show, mmu_asid_va_write},
{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
{"engines", engines_show, NULL},
+#ifdef CONFIG_HL_HLDIO
+ /* DIO entries - only created if NVMe is supported */
+ {"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},
+ {"dio_stats", dio_stats_show, NULL},
+ {"dio_reset", dio_reset_show, dio_reset_write},
+ {"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},
+#endif
};
static int hl_debugfs_open(struct inode *inode, struct file *file)
@@ -1722,7 +2025,17 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
root,
&hdev->device_release_watchdog_timeout_sec);
+ debugfs_create_u16("server_type",
+ 0444,
+ root,
+ &hdev->asic_prop.server_type);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+ /* Skip DIO entries if NVMe is not supported */
+ if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&
+ !hdev->asic_prop.supports_nvme)
+ continue;
+
debugfs_create_file(hl_debugfs_list[i].name,
0644,
root,
@@ -1762,6 +2075,14 @@ int hl_debugfs_device_init(struct hl_device *hdev)
spin_lock_init(&dev_entry->userptr_spinlock);
mutex_init(&dev_entry->ctx_mem_hash_mutex);
+ spin_lock_init(&hdev->debugfs_cfg_accesses.lock);
+ hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */
+
+#ifdef CONFIG_HL_HLDIO
+ /* Initialize DIO statistics */
+ memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
+#endif
+
return 0;
}
@@ -1780,6 +2101,7 @@ void hl_debugfs_device_fini(struct hl_device *hdev)
vfree(entry->state_dump[i]);
kfree(entry->entry_arr);
+
}
void hl_debugfs_add_device(struct hl_device *hdev)
@@ -1792,6 +2114,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_enabled)
add_secured_nodes(dev_entry, dev_entry->root);
+
}
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
@@ -1924,3 +2247,4 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
up_write(&dev_entry->state_dump_sem);
}
+
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 8f92445c5a90..999c92d7036e 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -30,6 +30,8 @@ enum dma_alloc_type {
#define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788
+static void hl_device_heartbeat(struct work_struct *work);
+
/*
* hl_set_dram_bar- sets the bar to allow later access to address
*
@@ -130,8 +132,8 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t
}
if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
- trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
- caller);
+ trace_habanalabs_dma_alloc(&(hdev)->pdev->dev, (u64) (uintptr_t) ptr, *dma_handle,
+ size, caller);
return ptr;
}
@@ -152,7 +154,7 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
break;
}
- trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller);
+ trace_habanalabs_dma_free(&(hdev)->pdev->dev, store_cpu_addr, dma_handle, size, caller);
}
void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
@@ -204,15 +206,15 @@ int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
return 0;
for_each_sgtable_dma_sg(sgt, sg, i)
- trace_habanalabs_dma_map_page(hdev->dev,
- page_to_phys(sg_page(sg)),
- sg->dma_address - prop->device_dma_offset_for_host_access,
+ trace_habanalabs_dma_map_page(&(hdev)->pdev->dev,
+ page_to_phys(sg_page(sg)),
+ sg->dma_address - prop->device_dma_offset_for_host_access,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length,
+ sg->dma_length,
#else
- sg->length,
+ sg->length,
#endif
- dir, caller);
+ dir, caller);
return 0;
}
@@ -247,7 +249,8 @@ void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
if (trace_habanalabs_dma_unmap_page_enabled()) {
for_each_sgtable_dma_sg(sgt, sg, i)
- trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
+ trace_habanalabs_dma_unmap_page(&(hdev)->pdev->dev,
+ page_to_phys(sg_page(sg)),
sg->dma_address - prop->device_dma_offset_for_host_access,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length,
@@ -439,16 +442,19 @@ static void print_idle_status_mask(struct hl_device *hdev, const char *message,
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE])
{
if (idle_mask[3])
- dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n",
- message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
+ dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx_%016llx)\n",
+ dev_name(&hdev->pdev->dev), message,
+ idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
else if (idle_mask[2])
- dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n",
- message, idle_mask[2], idle_mask[1], idle_mask[0]);
+ dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx)\n",
+ dev_name(&hdev->pdev->dev), message,
+ idle_mask[2], idle_mask[1], idle_mask[0]);
else if (idle_mask[1])
- dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n",
- message, idle_mask[1], idle_mask[0]);
+ dev_err(hdev->dev, "%s %s (mask %#llx_%016llx)\n",
+ dev_name(&hdev->pdev->dev), message, idle_mask[1], idle_mask[0]);
else
- dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]);
+ dev_err(hdev->dev, "%s %s (mask %#llx)\n", dev_name(&hdev->pdev->dev), message,
+ idle_mask[0]);
}
static void hpriv_release(struct kref *ref)
@@ -545,7 +551,8 @@ int hl_hpriv_put(struct hl_fpriv *hpriv)
return kref_put(&hpriv->refcount, hpriv_release);
}
-static void print_device_in_use_info(struct hl_device *hdev, const char *message)
+static void print_device_in_use_info(struct hl_device *hdev,
+ struct hl_mem_mgr_fini_stats *mm_fini_stats, const char *message)
{
u32 active_cs_num, dmabuf_export_cnt;
bool unknown_reason = true;
@@ -569,6 +576,12 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
dmabuf_export_cnt);
}
+ if (mm_fini_stats->n_busy_cb) {
+ unknown_reason = false;
+ offset += scnprintf(buf + offset, size - offset, " [%u live CB handles]",
+ mm_fini_stats->n_busy_cb);
+ }
+
if (unknown_reason)
scnprintf(buf + offset, size - offset, " [unknown reason]");
@@ -586,6 +599,7 @@ void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = to_hl_device(ddev);
+ struct hl_mem_mgr_fini_stats mm_fini_stats;
if (!hdev) {
pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
@@ -597,12 +611,13 @@ void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
/* Memory buffers might be still in use at this point and thus the handles IDR destruction
* is postponed to hpriv_release().
*/
- hl_mem_mgr_fini(&hpriv->mem_mgr);
+ hl_mem_mgr_fini(&hpriv->mem_mgr, &mm_fini_stats);
hdev->compute_ctx_in_release = 1;
if (!hl_hpriv_put(hpriv)) {
- print_device_in_use_info(hdev, "User process closed FD but device still in use");
+ print_device_in_use_info(hdev, &mm_fini_stats,
+ "User process closed FD but device still in use");
hl_device_reset(hdev, HL_DRV_RESET_HARD);
}
@@ -802,7 +817,7 @@ static void device_hard_reset_pending(struct work_struct *work)
}
queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work,
- msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
+ secs_to_jiffies(HL_PENDING_RESET_PER_SEC));
}
}
@@ -858,6 +873,10 @@ static int device_early_init(struct hl_device *hdev)
gaudi2_set_asic_funcs(hdev);
strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
break;
+ case ASIC_GAUDI2D:
+ gaudi2_set_asic_funcs(hdev);
+ strscpy(hdev->asic_name, "GAUDI2D", sizeof(hdev->asic_name));
+ break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
hdev->asic_type);
@@ -946,6 +965,8 @@ static int device_early_init(struct hl_device *hdev)
goto free_cb_mgr;
}
+ INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+
INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending);
hdev->device_reset_work.hdev = hdev;
hdev->device_fini_pending = 0;
@@ -968,7 +989,7 @@ static int device_early_init(struct hl_device *hdev)
return 0;
free_cb_mgr:
- hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
+ hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL);
hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr);
free_chip_info:
kfree(hdev->hl_chip_info);
@@ -1012,7 +1033,7 @@ static void device_early_fini(struct hl_device *hdev)
mutex_destroy(&hdev->clk_throttling.lock);
- hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
+ hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL);
hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr);
kfree(hdev->hl_chip_info);
@@ -1045,21 +1066,36 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
return (device_id == hdev->pdev->device);
}
-static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
+static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
{
+ struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info;
+ u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1;
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (!prop->cpucp_info.eq_health_check_supported)
- return 0;
+ return true;
- if (hdev->eq_heartbeat_received) {
- hdev->eq_heartbeat_received = false;
- } else {
+ if (!hdev->eq_heartbeat_received) {
dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
- return -EIO;
+
+ dev_err(hdev->dev,
+ "EQ: {CI %u, HB counter %u, last HB time: %ptTs}, PQ: {PI: %u, CI: %u (%u), last HB time: %ptTs}\n",
+ hdev->event_queue.ci,
+ heartbeat_debug_info->heartbeat_event_counter,
+ &hdev->heartbeat_debug_info.last_eq_heartbeat_ts,
+ hdev->kernel_queues[cpu_q_id].pi,
+ atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
+ atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask,
+ &hdev->heartbeat_debug_info.last_pq_heartbeat_ts);
+
+ hl_eq_dump(hdev, &hdev->event_queue);
+
+ return false;
}
- return 0;
+ hdev->eq_heartbeat_received = false;
+
+ return true;
}
static void hl_device_heartbeat(struct work_struct *work)
@@ -1078,7 +1114,7 @@ static void hl_device_heartbeat(struct work_struct *work)
* in order to validate the eq is working.
* Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
*/
- if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
+ if (hl_device_eq_heartbeat_received(hdev) && (!hdev->asic_funcs->send_heartbeat(hdev)))
goto reschedule;
if (hl_device_operational(hdev, NULL))
@@ -1132,21 +1168,6 @@ static int device_late_init(struct hl_device *hdev)
}
hdev->high_pll = hdev->asic_prop.high_pll;
-
- if (hdev->heartbeat) {
- /*
- * Before scheduling the heartbeat driver will check if eq event has received.
- * for the first schedule we need to set the indication as true then for the next
- * one this indication will be true only if eq event was sent by FW.
- */
- hdev->eq_heartbeat_received = true;
-
- INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
-
- schedule_delayed_work(&hdev->work_heartbeat,
- usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
- }
-
hdev->late_init_done = true;
return 0;
@@ -1163,9 +1184,6 @@ static void device_late_fini(struct hl_device *hdev)
if (!hdev->late_init_done)
return;
- if (hdev->heartbeat)
- cancel_delayed_work_sync(&hdev->work_heartbeat);
-
if (hdev->asic_funcs->late_fini)
hdev->asic_funcs->late_fini(hdev);
@@ -1266,8 +1284,12 @@ static void hl_abort_waiting_for_completions(struct hl_device *hdev)
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
bool skip_wq_flush)
{
- if (hard_reset)
+ if (hard_reset) {
+ if (hdev->heartbeat)
+ cancel_delayed_work_sync(&hdev->work_heartbeat);
+
device_late_fini(hdev);
+ }
/*
* Halt the engines and disable interrupts so we won't get any more
@@ -1495,15 +1517,14 @@ static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
- if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) {
- dev_warn(hdev->dev, "Failed to disable FW's PCI access\n");
+ if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
return;
- }
- /* verify that last EQs are handled before disabled is set */
+ /* disable_irq also generates sync irq, this verifies that last EQs are handled
+ * before disabled is set. The IRQ will be enabled again in request_irq call.
+ */
if (hdev->cpu_queues_enable)
- synchronize_irq(pci_irq_vector(hdev->pdev,
- hdev->asic_prop.eq_interrupt_id));
+ disable_irq(pci_irq_vector(hdev->pdev, hdev->asic_prop.eq_interrupt_id));
}
}
@@ -1547,6 +1568,31 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
}
}
+static void reset_heartbeat_debug_info(struct hl_device *hdev)
+{
+ hdev->heartbeat_debug_info.last_pq_heartbeat_ts = 0;
+ hdev->heartbeat_debug_info.last_eq_heartbeat_ts = 0;
+ hdev->heartbeat_debug_info.heartbeat_event_counter = 0;
+}
+
+static inline void device_heartbeat_schedule(struct hl_device *hdev)
+{
+ if (!hdev->heartbeat)
+ return;
+
+ reset_heartbeat_debug_info(hdev);
+
+ /*
+ * Before scheduling the heartbeat driver will check if eq event has received.
+ * for the first schedule we need to set the indication as true then for the next
+ * one this indication will be true only if eq event was sent by FW.
+ */
+ hdev->eq_heartbeat_received = true;
+
+ schedule_delayed_work(&hdev->work_heartbeat,
+ usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+}
+
/*
* hl_device_reset - reset the device
*
@@ -1584,6 +1630,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;
+ if (hdev->cpld_shutdown) {
+ dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n");
+ return -EIO;
+ }
+
if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
return 0;
@@ -1916,6 +1967,8 @@ kill_processes:
if (hard_reset) {
hdev->reset_info.hard_reset_cnt++;
+ device_heartbeat_schedule(hdev);
+
/* After reset is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events
* and if those events are fatal, we won't know about it and
@@ -2024,7 +2077,7 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
hdev->device_release_watchdog_timeout_sec);
schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work,
- msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000));
+ secs_to_jiffies(hdev->device_release_watchdog_timeout_sec));
hdev->reset_info.watchdog_active = 1;
out:
spin_unlock(&hdev->reset_info.lock);
@@ -2350,6 +2403,12 @@ int hl_device_init(struct hl_device *hdev)
goto out_disabled;
}
+ /* Scheduling the EQ heartbeat thread must come after driver is done with all
+ * initializations, as we want to make sure the FW gets enough time to be prepared
+ * to respond to heartbeat packets.
+ */
+ device_heartbeat_schedule(hdev);
+
dev_notice(hdev->dev,
"Successfully added device %s to habanalabs driver\n",
dev_name(&(hdev)->pdev->dev));
@@ -2522,6 +2581,14 @@ void hl_device_fini(struct hl_device *hdev)
if (rc)
dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
+ /* Reset the H/W (if it accessible). It will be in idle state after this returns */
+ if (!hdev->cpld_shutdown) {
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc)
+ dev_err(hdev->dev,
+ "hw_fini failed in device fini while removing device %d\n", rc);
+ }
+
hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
/* Release kernel context */
@@ -2592,7 +2659,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
u32 val = readl(hdev->rmmio + reg);
if (unlikely(trace_habanalabs_rreg32_enabled()))
- trace_habanalabs_rreg32(hdev->dev, reg, val);
+ trace_habanalabs_rreg32(&(hdev)->pdev->dev, reg, val);
return val;
}
@@ -2610,7 +2677,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
{
if (unlikely(trace_habanalabs_wreg32_enabled()))
- trace_habanalabs_wreg32(hdev->dev, reg, val);
+ trace_habanalabs_wreg32(&(hdev)->pdev->dev, reg, val);
writel(val, hdev->rmmio + reg);
}
@@ -2836,3 +2903,66 @@ void hl_set_irq_affinity(struct hl_device *hdev, int irq)
if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask))
dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
}
+
+void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
+{
+ hdev->heartbeat_debug_info.heartbeat_event_counter++;
+ hdev->heartbeat_debug_info.last_eq_heartbeat_ts = ktime_get_real_seconds();
+ hdev->eq_heartbeat_received = true;
+}
+
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask)
+{
+ struct hl_clk_throttle *clk_throttle = &hdev->clk_throttling;
+ ktime_t zero_time = ktime_set(0, 0);
+
+ mutex_lock(&clk_throttle->lock);
+
+ switch (event_type) {
+ case EQ_EVENT_POWER_EVT_START:
+ clk_throttle->current_reason |= HL_CLK_THROTTLE_POWER;
+ clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_POWER;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
+ dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
+ break;
+
+ case EQ_EVENT_POWER_EVT_END:
+ clk_throttle->current_reason &= ~HL_CLK_THROTTLE_POWER;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
+ dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
+ break;
+
+ case EQ_EVENT_THERMAL_EVT_START:
+ clk_throttle->current_reason |= HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
+ break;
+
+ case EQ_EVENT_THERMAL_EVT_END:
+ clk_throttle->current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
+ break;
+
+ default:
+ dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
+ break;
+ }
+
+ mutex_unlock(&clk_throttle->lock);
+}
+
+void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask)
+{
+ hl_handle_critical_hw_err(hdev, event_id, event_mask);
+ *event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+
+ /* Avoid any new accesses to the H/W */
+ hdev->disabled = true;
+ hdev->cpld_shutdown = true;
+}
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 4bd02778a970..eeb6b2a80fc7 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -8,6 +8,7 @@
#include "habanalabs.h"
#include <linux/habanalabs/hl_boot_if.h>
+#include <linux/pci.h>
#include <linux/firmware.h>
#include <linux/crc32.h>
#include <linux/slab.h>
@@ -40,6 +41,31 @@ static char *comms_sts_str_arr[COMMS_STS_INVLD_LAST] = {
[COMMS_STS_TIMEOUT_ERR] = __stringify(COMMS_STS_TIMEOUT_ERR),
};
+/**
+ * hl_fw_version_cmp() - compares the FW version to a specific version
+ *
+ * @hdev: pointer to hl_device structure
+ * @major: major number of a reference version
+ * @minor: minor number of a reference version
+ * @subminor: sub-minor number of a reference version
+ *
+ * Return 1 if FW version greater than the reference version, -1 if it's
+ * smaller and 0 if versions are identical.
+ */
+int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor)
+{
+ if (hdev->fw_sw_major_ver != major)
+ return (hdev->fw_sw_major_ver > major) ? 1 : -1;
+
+ if (hdev->fw_sw_minor_ver != minor)
+ return (hdev->fw_sw_minor_ver > minor) ? 1 : -1;
+
+ if (hdev->fw_sw_sub_minor_ver != subminor)
+ return (hdev->fw_sw_sub_minor_ver > subminor) ? 1 : -1;
+
+ return 0;
+}
+
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
@@ -345,43 +371,63 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value)
{
struct cpucp_packet pkt = {};
+ int rc;
pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(value);
- return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ if (rc)
+ dev_err(hdev->dev, "Failed to disable FW's PCI access\n");
+
+ return rc;
}
+/**
+ * hl_fw_send_cpu_message() - send CPU message to the device.
+ *
+ * @hdev: pointer to hl_device structure.
+ * @hw_queue_id: HW queue ID
+ * @msg: raw data of the message/packet
+ * @size: size of @msg in bytes
+ * @timeout_us: timeout in usec to wait for CPU reply on the message
+ * @result: return code reported by FW
+ *
+ * send message to the device CPU.
+ *
+ * Return: 0 on success, non-zero for failure.
+ * -ENOMEM: memory allocation failure
+ * -EAGAIN: CPU is disabled (try again when enabled)
+ * -ETIMEDOUT: timeout waiting for FW response
+ * -EIO: protocol error
+ */
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
- u16 len, u32 timeout, u64 *result)
+ u16 size, u32 timeout_us, u64 *result)
{
struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 tmp, expected_ack_val, pi, opcode;
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
- u32 tmp, expected_ack_val, pi, opcode;
- int rc;
+ int rc = 0, fw_rc;
- pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
+ pkt = hl_cpu_accessible_dma_pool_alloc(hdev, size, &pkt_dma_addr);
if (!pkt) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for packet to CPU\n");
+ dev_err(hdev->dev, "Failed to allocate DMA memory for packet to CPU\n");
return -ENOMEM;
}
- memcpy(pkt, msg, len);
+ memcpy(pkt, msg, size);
mutex_lock(&hdev->send_cpu_message_lock);
/* CPU-CP messages can be sent during soft-reset */
- if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
- rc = 0;
+ if (hdev->disabled && !hdev->reset_info.in_compute_reset)
goto out;
- }
if (hdev->device_cpu_disabled) {
- rc = -EIO;
+ rc = -EAGAIN;
goto out;
}
@@ -397,7 +443,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
* Which means that we don't need to lock the access to the entire H/W
* queues module when submitting a JOB to the CPU queue.
*/
- hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr);
+ hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), size, pkt_dma_addr);
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi;
@@ -406,7 +452,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
(tmp == expected_ack_val), 1000,
- timeout, true);
+ timeout_us, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -414,19 +460,27 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
/* If FW performed reset just before sending it a packet, we will get a timeout.
* This is expected behavior, hence no need for error message.
*/
- if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset)
+ if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) {
dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
tmp);
- else
- dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp);
+ } else {
+ struct hl_bd *bd = queue->kernel_address;
+
+ bd += hl_pi_2_offset(pi);
+
+ dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n"
+ "Pkt info[%u]: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n",
+ tmp, pi, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr,
+ queue->dram_bd);
+ }
hdev->device_cpu_disabled = true;
goto out;
}
tmp = le32_to_cpu(pkt->ctl);
- rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
- if (rc) {
+ fw_rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
+ if (fw_rc) {
opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
if (!prop->supports_advanced_cpucp_rc) {
@@ -435,7 +489,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto scrub_descriptor;
}
- switch (rc) {
+ switch (fw_rc) {
case cpucp_packet_invalid:
dev_err(hdev->dev,
"CPU packet %d is not supported by F/W\n", opcode);
@@ -460,7 +514,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
/* propagate the return code from the f/w to the callers who want to check it */
if (result)
- *result = rc;
+ *result = fw_rc;
rc = -EIO;
@@ -480,7 +534,7 @@ scrub_descriptor:
out:
mutex_unlock(&hdev->send_cpu_message_lock);
- hl_cpu_accessible_dma_pool_free(hdev, len, pkt);
+ hl_cpu_accessible_dma_pool_free(hdev, size, pkt);
return rc;
}
@@ -550,7 +604,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
int hl_fw_test_cpu_queue(struct hl_device *hdev)
{
struct cpucp_packet test_pkt = {};
- u64 result;
+ u64 result = 0;
int rc;
test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
@@ -623,16 +677,14 @@ int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
int hl_fw_send_heartbeat(struct hl_device *hdev)
{
struct cpucp_packet hb_pkt;
- u64 result;
+ u64 result = 0;
int rc;
memset(&hb_pkt, 0, sizeof(hb_pkt));
- hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
- CPUCP_PKT_CTL_OPCODE_SHIFT);
+ hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << CPUCP_PKT_CTL_OPCODE_SHIFT);
hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
- sizeof(hb_pkt), 0, &result);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result);
if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
return -EIO;
@@ -643,6 +695,8 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
rc = -EIO;
}
+ hdev->heartbeat_debug_info.last_pq_heartbeat_ts = ktime_get_real_seconds();
+
return rc;
}
@@ -885,7 +939,7 @@ static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
{
struct cpucp_array_data_packet *pkt;
size_t total_pkt_size, data_size;
- u64 result;
+ u64 result = 0;
int rc;
/* skip sending this info for unsupported ASICs */
@@ -976,11 +1030,10 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
-
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP EEPROM packet, error %d\n",
- rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP EEPROM packet, error %d\n", rc);
goto out;
}
@@ -1021,7 +1074,9 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
goto out;
}
@@ -1055,8 +1110,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
return rc;
}
counters->rx_throughput = result;
@@ -1070,8 +1126,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
return rc;
}
counters->tx_throughput = result;
@@ -1084,8 +1141,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
return rc;
}
counters->replay_cnt = (u32) result;
@@ -1105,9 +1163,9 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CpuCP total energy pkt, error %d\n",
- rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CpuCP total energy pkt, error %d\n", rc);
return rc;
}
@@ -1183,7 +1241,8 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
return rc;
}
@@ -1210,7 +1269,8 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
return rc;
}
@@ -1247,8 +1307,9 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc);
goto out;
}
@@ -1273,7 +1334,8 @@ int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num)
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
if (rc) {
- dev_err(hdev->dev,
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
"Failed to handle CPU-CP pending rows info pkt, error %d\n", rc);
goto out;
}
@@ -1428,7 +1490,7 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
{
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
u32 status = 0, timeout;
- int rc, tries = 1;
+ int rc, tries = 1, fw_err = 0;
bool preboot_still_runs;
/* Need to check two possible scenarios:
@@ -1468,18 +1530,18 @@ retry:
}
}
- if (rc) {
+ /* If we read all FF, then something is totally wrong, no point
+ * of reading specific errors
+ */
+ if (status != -1)
+ fw_err = fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
+ pre_fw_load->boot_err1_reg,
+ pre_fw_load->sts_boot_dev_sts0_reg,
+ pre_fw_load->sts_boot_dev_sts1_reg);
+ if (rc || fw_err) {
detect_cpu_boot_status(hdev, status);
- dev_err(hdev->dev, "CPU boot ready timeout (status = %d)\n", status);
-
- /* If we read all FF, then something is totally wrong, no point
- * of reading specific errors
- */
- if (status != -1)
- fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
- pre_fw_load->boot_err1_reg,
- pre_fw_load->sts_boot_dev_sts0_reg,
- pre_fw_load->sts_boot_dev_sts1_reg);
+ dev_err(hdev->dev, "CPU boot %s (status = %d)\n",
+ fw_err ? "failed due to an error" : "ready timeout", status);
return -EIO;
}
@@ -1750,7 +1812,7 @@ static void hl_fw_dynamic_send_cmd(struct hl_device *hdev,
val = FIELD_PREP(COMMS_COMMAND_CMD_MASK, cmd);
val |= FIELD_PREP(COMMS_COMMAND_SIZE_MASK, size);
- trace_habanalabs_comms_send_cmd(hdev->dev, comms_cmd_str_arr[cmd]);
+ trace_habanalabs_comms_send_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]);
WREG32(le32_to_cpu(dyn_regs->kmd_msg_to_cpu), val);
}
@@ -1808,7 +1870,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
- trace_habanalabs_comms_wait_status(hdev->dev, comms_sts_str_arr[expected_status]);
+ trace_habanalabs_comms_wait_status(&hdev->pdev->dev, comms_sts_str_arr[expected_status]);
/* Wait for expected status */
rc = hl_poll_timeout(
@@ -1825,7 +1887,8 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
return -EIO;
}
- trace_habanalabs_comms_wait_status_done(hdev->dev, comms_sts_str_arr[expected_status]);
+ trace_habanalabs_comms_wait_status_done(&hdev->pdev->dev,
+ comms_sts_str_arr[expected_status]);
/*
* skip storing FW response for NOOP to preserve the actual desired
@@ -1899,7 +1962,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
{
int rc;
- trace_habanalabs_comms_protocol_cmd(hdev->dev, comms_cmd_str_arr[cmd]);
+ trace_habanalabs_comms_protocol_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]);
/* first send clear command to clean former commands */
rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
@@ -2038,7 +2101,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
* note that no alignment/stride address issues here as all structures
* are 64 bit padded.
*/
- data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header);
+ data_ptr = (u8 *)fw_desc + sizeof(struct comms_msg_header);
data_size = le16_to_cpu(fw_desc->header.size);
data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
@@ -2192,11 +2255,11 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
fw_data_size = le16_to_cpu(fw_desc->header.size);
- temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size);
+ temp_fw_desc = vzalloc(sizeof(struct comms_msg_header) + fw_data_size);
if (!temp_fw_desc)
return -ENOMEM;
- memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size);
+ memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_msg_header) + fw_data_size);
rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader,
(struct lkd_fw_comms_desc *) temp_fw_desc);
@@ -3122,10 +3185,10 @@ long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
-
if (rc) {
- dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
- used_pll_idx, rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
+ used_pll_idx, rc);
return rc;
}
@@ -3149,8 +3212,7 @@ void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
used_pll_idx, rc);
}
@@ -3166,9 +3228,9 @@ long hl_fw_get_max_power(struct hl_device *hdev)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
-
if (rc) {
- dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
return rc;
}
@@ -3190,8 +3252,7 @@ void hl_fw_set_max_power(struct hl_device *hdev)
pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
@@ -3217,11 +3278,11 @@ static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void
pkt.data_max_size = cpu_to_le32(size);
pkt.nonce = cpu_to_le32(nonce);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- timeout, NULL);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), timeout, NULL);
if (rc) {
- dev_err(hdev->dev,
- "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
goto out;
}
@@ -3263,10 +3324,12 @@ int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
- if (rc)
- dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n");
- else
+ if (rc) {
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n");
+ } else {
dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result);
+ }
*size = (u32)result;
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 48f0f3eea1ef..d94c2ba22a6a 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -71,7 +71,7 @@ struct hl_fpriv;
#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
-#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */
+#define HL_HEARTBEAT_PER_USEC 10000000 /* 10 s */
#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
@@ -90,7 +90,9 @@ struct hl_fpriv;
#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF
#define HL_COMMON_DEC_INTERRUPT_ID 0xFFE
-#define HL_STATE_DUMP_HIST_LEN 5
+#define HL_STATE_DUMP_HIST_LEN 5
+#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20
+#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */
/* Default value for device reset trigger , an invalid value */
#define HL_RESET_TRIGGER_DEFAULT 0xFF
@@ -651,6 +653,8 @@ struct hl_hints_range {
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @reserved_fw_mem_size: size of dram memory reserved for FW.
+ * @fw_event_queue_size: queue size for events from CPU-CP.
+ * A value of 0 means using the default HL_EQ_SIZE_IN_BYTES value.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@@ -700,6 +704,7 @@ struct hl_hints_range {
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
* @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
+ * @supports_nvme: indicates whether the asic supports NVMe P2P DMA.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -782,6 +787,7 @@ struct asic_fixed_properties {
u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
+ u32 fw_event_queue_size;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@@ -819,6 +825,7 @@ struct asic_fixed_properties {
u8 supports_advanced_cpucp_rc;
u8 supports_engine_modes;
u8 support_dynamic_resereved_fw_size;
+ u8 supports_nvme;
};
/**
@@ -902,6 +909,18 @@ struct hl_mem_mgr {
};
/**
+ * struct hl_mem_mgr_fini_stats - describes statistics returned during memory manager teardown.
+ * @n_busy_cb: the amount of CB handles that could not be removed
+ * @n_busy_ts: the amount of TS handles that could not be removed
+ * @n_busy_other: the amount of any other type of handles that could not be removed
+ */
+struct hl_mem_mgr_fini_stats {
+ u32 n_busy_cb;
+ u32 n_busy_ts;
+ u32 n_busy_other;
+};
+
+/**
* struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior
* @topic: string identifier used for logging
* @mem_id: memory type identifier, embedded in the handle and used to identify
@@ -1229,6 +1248,7 @@ struct hl_user_pending_interrupt {
* @hdev: pointer to the device structure
* @kernel_address: holds the queue's kernel virtual address
* @bus_address: holds the queue's DMA address
+ * @size: the event queue size
* @ci: ci inside the queue
* @prev_eqe_index: the index of the previous event queue entry. The index of
* the current entry's index must be +1 of the previous one.
@@ -1240,6 +1260,7 @@ struct hl_eq {
struct hl_device *hdev;
void *kernel_address;
dma_addr_t bus_address;
+ u32 size;
u32 ci;
u32 prev_eqe_index;
bool check_eqe_index;
@@ -1268,15 +1289,18 @@ struct hl_dec {
* @ASIC_GAUDI2: Gaudi2 device.
* @ASIC_GAUDI2B: Gaudi2B device.
* @ASIC_GAUDI2C: Gaudi2C device.
+ * @ASIC_GAUDI2D: Gaudi2D device.
*/
enum hl_asic_type {
ASIC_INVALID,
+
ASIC_GOYA,
ASIC_GAUDI,
ASIC_GAUDI_SEC,
ASIC_GAUDI2,
ASIC_GAUDI2B,
ASIC_GAUDI2C,
+ ASIC_GAUDI2D,
};
struct hl_cs_parser;
@@ -2254,6 +2278,9 @@ struct hl_vm {
u8 init_done;
};
+#ifdef CONFIG_HL_HLDIO
+#include "hldio.h"
+#endif
/*
* DEBUG, PROFILING STRUCTURE
@@ -2324,7 +2351,6 @@ struct hl_fpriv {
struct mutex ctx_lock;
};
-
/*
* DebugFS
*/
@@ -2352,6 +2378,7 @@ struct hl_debugfs_entry {
struct hl_dbg_device_entry *dev_entry;
};
+
/**
* struct hl_dbg_device_entry - ASIC specific debugfs manager.
* @root: root dentry.
@@ -2383,6 +2410,7 @@ struct hl_debugfs_entry {
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
* @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read.
+ * @dio_stats: Direct I/O statistics
*/
struct hl_dbg_device_entry {
struct dentry *root;
@@ -2414,6 +2442,35 @@ struct hl_dbg_device_entry {
u8 i2c_addr;
u8 i2c_reg;
u8 i2c_len;
+#ifdef CONFIG_HL_HLDIO
+ struct hl_dio_stats dio_stats;
+#endif
+};
+
+/**
+ * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of
+ * hl_debugfs_cfg_access.
+ * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons.
+ * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64.
+ * @addr: the requested address to access.
+ * @valid: if set, this entry has valid data for dumping at interrupt time.
+ */
+struct hl_debugfs_cfg_access_entry {
+ ktime_t seconds_since_epoch;
+ enum debugfs_access_type debugfs_type;
+ u64 addr;
+ bool valid;
+};
+
+/**
+ * struct hl_debugfs_cfg_access - saves debugfs config region access requests history.
+ * @cfg_access_list: list of objects describing config region access requests.
+ * @head: next valid index to add new entry to in cfg_access_list.
+ */
+struct hl_debugfs_cfg_access {
+ struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN];
+ u32 head;
+ spinlock_t lock; /* protects head and entries */
};
/**
@@ -2709,11 +2766,16 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
* updated directly by the device. If false, the host memory being polled will
* be updated by host CPU. Required so host knows whether or not the memory
* might need to be byte-swapped before returning value to caller.
+ *
+ * On the first 4 polling iterations the macro goes to sleep for short period of
+ * time that gradually increases and reaches sleep_us on the fifth iteration.
*/
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
mem_written_by_device) \
({ \
+ u64 __sleep_step_us; \
ktime_t __timeout; \
+ u8 __step = 8; \
\
__timeout = ktime_add_us(ktime_get(), timeout_us); \
might_sleep_if(sleep_us); \
@@ -2731,8 +2793,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(val) = le32_to_cpu(*(__le32 *) &(val)); \
break; \
} \
- if (sleep_us) \
- usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ __sleep_step_us = sleep_us >> __step; \
+ if (__sleep_step_us) \
+ usleep_range((__sleep_step_us >> 2) + 1, __sleep_step_us); \
+ __step >>= 1; \
} \
(cond) ? 0 : -ETIMEDOUT; \
})
@@ -3175,6 +3239,21 @@ struct hl_reset_info {
};
/**
+ * struct eq_heartbeat_debug_info - stores debug info to be used upon heartbeat failure.
+ * @last_pq_heartbeat_ts: timestamp of the last test packet that was sent to FW.
+ * This packet is the trigger in FW to send the EQ heartbeat event.
+ * @last_eq_heartbeat_ts: timestamp of the last EQ heartbeat event that was received from FW.
+ * @heartbeat_event_counter: number of heartbeat events received.
+ * @cpu_queue_id: used to read the queue pi/ci
+ */
+struct eq_heartbeat_debug_info {
+ time64_t last_pq_heartbeat_ts;
+ time64_t last_eq_heartbeat_ts;
+ u32 heartbeat_event_counter;
+ u32 cpu_queue_id;
+};
+
+/**
* struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
* @pcie_bar_phys: array of available PCIe bars physical addresses.
@@ -3239,6 +3318,7 @@ struct hl_reset_info {
* @hl_chip_info: ASIC's sensors information.
* @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager.
+ * @debugfs_cfg_accesses: list of last debugfs config region accesses.
* @cb_pool: list of pre allocated CBs.
* @cb_pool_lock: protects the CB pool.
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
@@ -3262,6 +3342,8 @@ struct hl_reset_info {
* @clk_throttling: holds information about current/previous clock throttling events
* @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
+ * @heartbeat_debug_info: counters used to debug heartbeat failures.
+ * @hldio: describes habanalabs direct storage interaction interface.
* @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_inner_major_ver: the major of current loaded preboot inner version.
@@ -3314,6 +3396,7 @@ struct hl_reset_info {
* addresses.
* @is_in_dram_scrub: true if dram scrub operation is on going.
* @disabled: is device disabled.
+ * @cpld_shutdown: is cpld shutdown.
* @late_init_done: is late init stage was done during initialization.
* @hwmon_initialized: is H/W monitor sensors was initialized.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
@@ -3418,6 +3501,7 @@ struct hl_device {
struct hwmon_chip_info *hl_chip_info;
struct hl_dbg_device_entry hl_debugfs;
+ struct hl_debugfs_cfg_access debugfs_cfg_accesses;
struct list_head cb_pool;
spinlock_t cb_pool_lock;
@@ -3452,6 +3536,10 @@ struct hl_device {
struct hl_reset_info reset_info;
+ struct eq_heartbeat_debug_info heartbeat_debug_info;
+#ifdef CONFIG_HL_HLDIO
+ struct hl_dio hldio;
+#endif
cpumask_t irq_affinity_mask;
u32 *stream_master_qid_arr;
@@ -3487,6 +3575,7 @@ struct hl_device {
u16 cpu_pci_msb_addr;
u8 is_in_dram_scrub;
u8 disabled;
+ u8 cpld_shutdown;
u8 late_init_done;
u8 hwmon_initialized;
u8 reset_on_lockup;
@@ -3596,25 +3685,6 @@ struct hl_ioctl_desc {
hl_ioctl_t *func;
};
-static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major, u32 fw_sw_minor)
-{
- if (hdev->fw_sw_major_ver < fw_sw_major)
- return true;
- if (hdev->fw_sw_major_ver > fw_sw_major)
- return false;
- if (hdev->fw_sw_minor_ver < fw_sw_minor)
- return true;
- return false;
-}
-
-static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major,
- u32 fw_sw_minor)
-{
- return (hdev->fw_sw_major_ver > fw_sw_major ||
- (hdev->fw_sw_major_ver == fw_sw_major &&
- hdev->fw_sw_minor_ver >= fw_sw_minor));
-}
-
/*
* Kernel module functions that can be accessed by entire module
*/
@@ -3740,6 +3810,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
+void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
@@ -3919,6 +3990,7 @@ void hl_mmu_dr_flush(struct hl_ctx *ctx);
int hl_mmu_dr_init(struct hl_device *hdev);
void hl_mmu_dr_fini(struct hl_device *hdev);
+int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value);
@@ -4033,7 +4105,7 @@ char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg);
-void hl_mem_mgr_fini(struct hl_mem_mgr *mmg);
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats);
void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg);
int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
void *args);
@@ -4059,6 +4131,9 @@ void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_coun
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
void hl_init_cpu_for_irq(struct hl_device *hdev);
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
+void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
+void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask);
#ifdef CONFIG_DEBUG_FS
@@ -4080,6 +4155,7 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
unsigned long length);
+void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev);
#else
@@ -4155,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
{
}
+static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
+{
+}
+
#endif
/* Security */
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index e542fd40e16c..0035748f3228 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -101,7 +101,6 @@ static const struct drm_driver hl_driver = {
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
- .date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
@@ -144,6 +143,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev)
case REV_ID_C:
asic_type = ASIC_GAUDI2C;
break;
+ case REV_ID_D:
+ asic_type = ASIC_GAUDI2D;
+ break;
default:
break;
}
@@ -260,7 +262,7 @@ int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
- hl_mem_mgr_fini(&hpriv->mem_mgr);
+ hl_mem_mgr_fini(&hpriv->mem_mgr, NULL);
hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
mutex_destroy(&hpriv->ctx_lock);
@@ -359,8 +361,7 @@ static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
* a different default timeout for Gaudi
*/
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
- hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
- MSEC_PER_SEC);
+ hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED);
hdev->reset_upon_device_release = 0;
break;
@@ -385,7 +386,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
if (tmp_timeout)
- hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
+ hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 1dd6e23172ca..fdfdabc85e54 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,8 +17,6 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <asm/msr.h>
-
/* make sure there is space for all the signed info */
static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
@@ -963,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *
case HL_PASSTHROUGH_VERSIONS:
need_input_buff = false;
break;
+ case HL_GET_ERR_COUNTERS_CMD:
+ need_input_buff = true;
+ break;
+ case HL_GET_P_STATE:
+ need_input_buff = false;
+ break;
default:
return -EINVAL;
}
@@ -1279,13 +1283,10 @@ static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long ar
retcode = -EFAULT;
out_err:
- if (retcode) {
- char task_comm[TASK_COMM_LEN];
-
+ if (retcode)
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
- }
+ task_pid_nr(current), current->comm, cmd, nr);
if (kdata != stack_kdata)
kfree(kdata);
@@ -1308,11 +1309,9 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else {
- char task_comm[TASK_COMM_LEN];
-
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
- task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+ task_pid_nr(current), current->comm, cmd, nr);
return -ENOTTY;
}
diff --git a/drivers/accel/habanalabs/common/hldio.c b/drivers/accel/habanalabs/common/hldio.c
new file mode 100644
index 000000000000..083ae5610875
--- /dev/null
+++ b/drivers/accel/habanalabs/common/hldio.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2024 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "hldio.h"
+#include <generated/uapi/linux/version.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+
+/*
+ * NVMe Direct I/O implementation for habanalabs driver
+ *
+ * ASSUMPTIONS
+ * ===========
+ * 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless).
+ * 2. Only READ operations (can extend in the future).
+ * 3. No sparse files (can overcome this in the future).
+ * 4. Kernel version >= 6.9
+ * 5. Requiring page alignment is OK (I don't see a solution to this one right,
+ * now, how do we read partial pages?)
+ * 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel.
+ * Theoretically I have a slight idea on how this could be solvable, but it
+ * is probably inacceptable for the upstream. Also may not work in the end.
+ * 7. Either make sure our cards and disks are under the same PCI bridge, or
+ * compile a custom kernel to hack around this.
+ */
+
+#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */
+
+/*
+ * This struct contains all the useful data I could milk out of the file handle
+ * provided by the user.
+ * @TODO: right now it is retrieved on each IO, but can be done once with some
+ * dedicated IOCTL, call it for example HL_REGISTER_HANDLE.
+ */
+struct hl_dio_fd {
+ /* Back pointer in case we need it in async completion */
+ struct hl_ctx *ctx;
+ /* Associated fd struct */
+ struct file *filp;
+};
+
+/*
+ * This is a single IO descriptor
+ */
+struct hl_direct_io {
+ struct hl_dio_fd f;
+ struct kiocb kio;
+ struct bio_vec *bv;
+ struct iov_iter iter;
+ u64 device_va;
+ u64 off_bytes;
+ u64 len_bytes;
+ u32 type;
+};
+
+bool hl_device_supports_nvme(struct hl_device *hdev)
+{
+ return hdev->asic_prop.supports_nvme;
+}
+
+static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct block_device *bd;
+ struct super_block *sb;
+ struct inode *inode;
+ struct gendisk *gd;
+ struct device *disk_dev;
+ int rc;
+
+ f->filp = fget(fd);
+ if (!f->filp) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ if (!(f->filp->f_flags & O_DIRECT)) {
+ dev_err(hdev->dev, "file is not in the direct mode\n");
+ rc = -EINVAL;
+ goto fput;
+ }
+
+ if (!f->filp->f_op->read_iter) {
+ dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n");
+ rc = -EINVAL;
+ goto fput;
+ }
+
+ inode = file_inode(f->filp);
+ sb = inode->i_sb;
+ bd = sb->s_bdev;
+ gd = bd->bd_disk;
+
+ if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) {
+ dev_err(hdev->dev, "sparse files are not currently supported\n");
+ rc = -EINVAL;
+ goto fput;
+ }
+
+ if (!bd || !gd) {
+ dev_err(hdev->dev, "invalid block device\n");
+ rc = -ENODEV;
+ goto fput;
+ }
+ /* Get the underlying device from the block device */
+ disk_dev = disk_to_dev(gd);
+ if (!dma_pci_p2pdma_supported(disk_dev)) {
+ dev_err(hdev->dev, "device does not support PCI P2P DMA\n");
+ rc = -EOPNOTSUPP;
+ goto fput;
+ }
+
+ /*
+ * @TODO: Maybe we need additional checks here
+ */
+
+ f->ctx = ctx;
+ rc = 0;
+
+ goto out;
+fput:
+ fput(f->filp);
+out:
+ return rc;
+}
+
+static void hl_dio_fd_unregister(struct hl_dio_fd *f)
+{
+ fput(f->filp);
+}
+
+static long hl_dio_count_io(struct hl_device *hdev)
+{
+ s64 sum = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(*hdev->hldio.inflight_ios, i);
+
+ return sum;
+}
+
+static bool hl_dio_get_iopath(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ if (hdev->hldio.io_enabled) {
+ this_cpu_inc(*hdev->hldio.inflight_ios);
+
+ /* Avoid race conditions */
+ if (!hdev->hldio.io_enabled) {
+ this_cpu_dec(*hdev->hldio.inflight_ios);
+ return false;
+ }
+
+ hl_ctx_get(ctx);
+
+ return true;
+ }
+
+ return false;
+}
+
+static void hl_dio_put_iopath(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ hl_ctx_put(ctx);
+ this_cpu_dec(*hdev->hldio.inflight_ios);
+}
+
+static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled)
+{
+ hdev->hldio.io_enabled = enabled;
+}
+
+static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io)
+{
+ if ((u64)io->device_va & ~PAGE_MASK) {
+ dev_dbg(hdev->dev, "device address must be 4K aligned\n");
+ return false;
+ }
+
+ if (io->len_bytes & ~PAGE_MASK) {
+ dev_dbg(hdev->dev, "IO length must be 4K aligned\n");
+ return false;
+ }
+
+ if (io->off_bytes & ~PAGE_MASK) {
+ dev_dbg(hdev->dev, "IO offset must be 4K aligned\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va)
+{
+ struct hl_dio *hldio = &hdev->hldio;
+ u64 device_pa;
+ int rc, i;
+
+ rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa);
+ if (rc) {
+ dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)",
+ device_va, rc);
+ return NULL;
+ }
+
+ for (i = 0 ; i < hldio->np2prs ; ++i) {
+ if (device_pa >= hldio->p2prs[i].device_pa &&
+ device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size)
+ return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >>
+ PAGE_SHIFT];
+ }
+
+ return NULL;
+}
+
+static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io)
+{
+ u64 npages, device_va;
+ ssize_t rc;
+ int i;
+
+ if (!hl_dio_validate_io(hdev, io))
+ return -EINVAL;
+
+ if (!hl_dio_get_iopath(io->f.ctx)) {
+ dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n");
+ return -ESHUTDOWN;
+ }
+
+ init_sync_kiocb(&io->kio, io->f.filp);
+ io->kio.ki_pos = io->off_bytes;
+
+ npages = (io->len_bytes >> PAGE_SHIFT);
+
+ /* @TODO: this can be implemented smarter, vmalloc in iopath is not
+ * ideal. Maybe some variation of genpool. Number of pages may differ
+ * greatly, so maybe even use pools of different sizes and chose the
+ * closest one.
+ */
+ io->bv = vzalloc(npages * sizeof(struct bio_vec));
+ if (!io->bv)
+ return -ENOMEM;
+
+ for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) {
+ io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va);
+ if (!io->bv[i].bv_page) {
+ dev_err(hdev->dev, "error getting page struct for device va %#llx",
+ device_va);
+ rc = -EFAULT;
+ goto cleanup;
+ }
+ io->bv[i].bv_offset = 0;
+ io->bv[i].bv_len = PAGE_SIZE;
+ }
+
+ iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes);
+ if (io->f.filp->f_op && io->f.filp->f_op->read_iter)
+ rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter);
+ else
+ rc = -EINVAL;
+
+cleanup:
+ vfree(io->bv);
+ hl_dio_put_iopath(io->f.ctx);
+
+ dev_dbg(hdev->dev, "IO ended with %ld\n", rc);
+
+ return rc;
+}
+
+/*
+ * @TODO: This function can be used as a callback for io completion under
+ * kio->ki_complete in order to implement async IO.
+ * Note that on more recent kernels there is no ret2.
+ */
+__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2)
+{
+ struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio);
+
+ dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret);
+
+ /* Do something to copy result to user / notify completion */
+
+ hl_dio_put_iopath(io->f.ctx);
+
+ hl_dio_fd_unregister(&io->f);
+}
+
+/*
+ * DMA disk to ASIC, wait for results. Must be invoked from the user context
+ */
+int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+ u64 device_va, off_t off_bytes, size_t len_bytes,
+ size_t *len_read)
+{
+ struct hl_direct_io *io;
+ ssize_t rc;
+
+ dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes);
+
+ io = kzalloc(sizeof(*io), GFP_KERNEL);
+ if (!io) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ *io = (struct hl_direct_io){
+ .device_va = device_va,
+ .len_bytes = len_bytes,
+ .off_bytes = off_bytes,
+ .type = READ,
+ };
+
+ rc = hl_dio_fd_register(ctx, fd, &io->f);
+ if (rc)
+ goto kfree_io;
+
+ rc = hl_direct_io(hdev, io);
+ if (rc >= 0) {
+ *len_read = rc;
+ rc = 0;
+ }
+
+ /* This shall be called only in the case of a sync IO */
+ hl_dio_fd_unregister(&io->f);
+kfree_io:
+ kfree(io);
+out:
+ return rc;
+}
+
+static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{
+ if (p2pr->p2ppages) {
+ vfree(p2pr->p2ppages);
+ p2pr->p2ppages = NULL;
+ }
+
+ if (p2pr->p2pmem) {
+ dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n",
+ p2pr->p2pmem, p2pr->size);
+ pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size);
+ p2pr->p2pmem = NULL;
+ }
+}
+
+void hl_p2p_region_fini_all(struct hl_device *hdev)
+{
+ int i;
+
+ for (i = 0 ; i < hdev->hldio.np2prs ; ++i)
+ hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]);
+
+ kvfree(hdev->hldio.p2prs);
+ hdev->hldio.p2prs = NULL;
+ hdev->hldio.np2prs = 0;
+}
+
+int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{
+ void *addr;
+ int rc, i;
+
+ /* Start by publishing our p2p memory */
+ rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset);
+ if (rc) {
+ dev_err(hdev->dev, "error adding p2p resource: %d\n", rc);
+ goto err;
+ }
+
+ /* Alloc all p2p mem */
+ p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size);
+ if (!p2pr->p2pmem) {
+ dev_err(hdev->dev, "error allocating p2p memory\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *));
+ if (!p2pr->p2ppages) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) {
+ p2pr->p2ppages[i] = virt_to_page(addr);
+ if (!p2pr->p2ppages[i]) {
+ rc = -EFAULT;
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ hl_p2p_region_fini(hdev, p2pr);
+ return rc;
+}
+
+int hl_dio_start(struct hl_device *hdev)
+{
+ dev_dbg(hdev->dev, "initializing HLDIO\n");
+
+ /* Initialize the IO counter and enable IO */
+ hdev->hldio.inflight_ios = alloc_percpu(s64);
+ if (!hdev->hldio.inflight_ios)
+ return -ENOMEM;
+
+ hl_dio_set_io_enabled(hdev, true);
+
+ return 0;
+}
+
+void hl_dio_stop(struct hl_device *hdev)
+{
+ dev_dbg(hdev->dev, "deinitializing HLDIO\n");
+
+ if (hdev->hldio.io_enabled) {
+ /* Wait for all the IO to finish */
+ hl_dio_set_io_enabled(hdev, false);
+ hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT);
+ }
+
+ if (hdev->hldio.inflight_ios) {
+ free_percpu(hdev->hldio.inflight_ios);
+ hdev->hldio.inflight_ios = NULL;
+ }
+}
diff --git a/drivers/accel/habanalabs/common/hldio.h b/drivers/accel/habanalabs/common/hldio.h
new file mode 100644
index 000000000000..2874388f2851
--- /dev/null
+++ b/drivers/accel/habanalabs/common/hldio.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver
+ *
+ * This feature requires specific hardware setup and must not be built
+ * under COMPILE_TEST.
+ */
+
+#ifndef __HL_HLDIO_H__
+#define __HL_HLDIO_H__
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/ktime.h> /* ktime functions */
+#include <linux/delay.h> /* usleep_range */
+#include <linux/kernel.h> /* might_sleep_if */
+#include <linux/errno.h> /* error codes */
+
+/* Forward declarations */
+struct hl_device;
+struct file;
+
+/* Enable only if Kconfig selected */
+#ifdef CONFIG_HL_HLDIO
+/**
+ * struct hl_p2p_region - describes a single P2P memory region
+ * @p2ppages: array of page structs for the P2P memory
+ * @p2pmem: virtual address of the P2P memory region
+ * @device_pa: physical address on the device
+ * @bar_offset: offset within the BAR
+ * @size: size of the region in bytes
+ * @bar: BAR number containing this region
+ */
+struct hl_p2p_region {
+ struct page **p2ppages;
+ void *p2pmem;
+ u64 device_pa;
+ u64 bar_offset;
+ u64 size;
+ int bar;
+};
+
+/**
+ * struct hl_dio_stats - Direct I/O statistics
+ * @total_ops: total number of operations attempted
+ * @successful_ops: number of successful operations
+ * @failed_ops: number of failed operations
+ * @bytes_transferred: total bytes successfully transferred
+ * @last_len_read: length of the last read operation
+ */
+struct hl_dio_stats {
+ u64 total_ops;
+ u64 successful_ops;
+ u64 failed_ops;
+ u64 bytes_transferred;
+ size_t last_len_read;
+};
+
+/**
+ * struct hl_dio - describes habanalabs direct storage interaction interface
+ * @p2prs: array of p2p regions
+ * @inflight_ios: percpu counter for inflight ios
+ * @np2prs: number of elements in p2prs
+ * @io_enabled: 1 if io is enabled 0 otherwise
+ */
+struct hl_dio {
+ struct hl_p2p_region *p2prs;
+ s64 __percpu *inflight_ios;
+ u8 np2prs;
+ u8 io_enabled;
+};
+
+int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+ u64 device_va, off_t off_bytes, size_t len_bytes,
+ size_t *len_read);
+void hl_p2p_region_fini_all(struct hl_device *hdev);
+int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr);
+int hl_dio_start(struct hl_device *hdev);
+void hl_dio_stop(struct hl_device *hdev);
+
+/* Init/teardown */
+int hl_hldio_init(struct hl_device *hdev);
+void hl_hldio_fini(struct hl_device *hdev);
+
+/* File operations */
+long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+
+/* DebugFS hooks */
+#ifdef CONFIG_DEBUG_FS
+void hl_hldio_debugfs_init(struct hl_device *hdev);
+void hl_hldio_debugfs_fini(struct hl_device *hdev);
+#else
+static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
+static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
+#endif
+
+#else /* !CONFIG_HL_HLDIO */
+
+struct hl_p2p_region;
+/* Stubs when HLDIO is disabled */
+static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+ u64 device_va, off_t off_bytes, size_t len_bytes,
+ size_t *len_read)
+{ return -EOPNOTSUPP; }
+static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {}
+static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{ return -EOPNOTSUPP; }
+static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; }
+static inline void hl_dio_stop(struct hl_device *hdev) {}
+
+static inline int hl_hldio_init(struct hl_device *hdev) { return 0; }
+static inline void hl_hldio_fini(struct hl_device *hdev) { }
+static inline long hl_hldio_ioctl(struct file *f, unsigned int c,
+ unsigned long a)
+{ return -ENOTTY; }
+static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
+static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
+
+#endif /* CONFIG_HL_HLDIO */
+
+/* Simplified polling macro for HLDIO (no simulator support) */
+#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \
+({ \
+ ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ might_sleep_if(sleep_us); \
+ (void)(hdev); /* keep signature consistent, hdev unused */ \
+ for (;;) { \
+ mb(); /* ensure ordering of memory operations */ \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+ break; \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#ifdef CONFIG_HL_HLDIO
+bool hl_device_supports_nvme(struct hl_device *hdev);
+#else
+static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; }
+#endif
+
+#endif /* __HL_HLDIO_H__ */
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 36b951b5f503..52d1e6bf10dc 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -585,9 +585,10 @@ int hl_get_temperature(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get temperature from sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get temperature from sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
@@ -610,8 +611,7 @@ int hl_set_temperature(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err_ratelimited(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
@@ -639,9 +639,10 @@ int hl_get_voltage(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get voltage from sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get voltage from sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
@@ -668,9 +669,10 @@ int hl_get_current(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get current from sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get current from sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
@@ -697,9 +699,10 @@ int hl_get_fan_speed(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get fan speed from sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get fan speed from sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
@@ -726,9 +729,10 @@ int hl_get_pwm_info(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get pwm info from sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get pwm info from sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
@@ -751,8 +755,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err_ratelimited(hdev->dev,
"Failed to set pwm info to sensor %d, error %d\n",
sensor_index, rc);
@@ -774,8 +777,7 @@ int hl_set_voltage(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err_ratelimited(hdev->dev,
"Failed to set voltage of sensor %d, error %d\n",
sensor_index, rc);
@@ -797,10 +799,8 @@ int hl_set_current(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, NULL);
-
- if (rc)
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ if (rc && rc != -EAGAIN)
dev_err_ratelimited(hdev->dev,
"Failed to set current of sensor %d, error %d\n",
sensor_index, rc);
@@ -830,8 +830,7 @@ int hl_set_power(struct hl_device *hdev,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
-
- if (rc)
+ if (rc && rc != -EAGAIN)
dev_err_ratelimited(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
@@ -859,9 +858,10 @@ int hl_get_power(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to get power of sensor %d, error %d\n",
- sensor_index, rc);
+ if (rc != -EAGAIN)
+ dev_err_ratelimited(hdev->dev,
+ "Failed to get power of sensor %d, error %d\n",
+ sensor_index, rc);
*value = 0;
}
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 978b7f4d5eeb..7c9f2f6a2870 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -652,14 +652,16 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
*/
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
{
+ u32 size = hdev->asic_prop.fw_event_queue_size ? : HL_EQ_SIZE_IN_BYTES;
void *p;
- p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_EQ_SIZE_IN_BYTES, &q->bus_address);
+ p = hl_cpu_accessible_dma_pool_alloc(hdev, size, &q->bus_address);
if (!p)
return -ENOMEM;
q->hdev = hdev;
q->kernel_address = p;
+ q->size = size;
q->ci = 0;
q->prev_eqe_index = 0;
@@ -678,7 +680,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{
flush_workqueue(hdev->eq_wq);
- hl_cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, q->kernel_address);
+ hl_cpu_accessible_dma_pool_free(hdev, q->size, q->kernel_address);
}
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
@@ -693,5 +695,30 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
* when the device is operational again
*/
- memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
+ memset(q->kernel_address, 0, q->size);
+}
+
+void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q)
+{
+ u32 eq_length, eqe_size, ctl, ready, mode, type, index;
+ struct hl_eq_header *hdr;
+ u8 *ptr;
+ int i;
+
+ eq_length = HL_EQ_LENGTH;
+ eqe_size = q->size / HL_EQ_LENGTH;
+
+ dev_info(hdev->dev, "Contents of EQ entries headers:\n");
+
+ for (i = 0, ptr = q->kernel_address ; i < eq_length ; ++i, ptr += eqe_size) {
+ hdr = (struct hl_eq_header *) ptr;
+ ctl = le32_to_cpu(hdr->ctl);
+ ready = FIELD_GET(EQ_CTL_READY_MASK, ctl);
+ mode = FIELD_GET(EQ_CTL_EVENT_MODE_MASK, ctl);
+ type = FIELD_GET(EQ_CTL_EVENT_TYPE_MASK, ctl);
+ index = FIELD_GET(EQ_CTL_INDEX_MASK, ctl);
+
+ dev_info(hdev->dev, "%02u: %#010x [ready: %u, mode %u, type %04u, index %05u]\n",
+ i, ctl, ready, mode, type, index);
+ }
}
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 3348ad12c237..633db4bff46f 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -14,7 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/pci-p2pdma.h>
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
#define HL_MMU_DEBUG 0
@@ -1829,9 +1829,6 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
struct hl_ctx *ctx;
- if (!hl_dmabuf)
- return;
-
ctx = hl_dmabuf->ctx;
if (hl_dmabuf->memhash_hnode)
@@ -1840,7 +1837,12 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
atomic_dec(&ctx->hdev->dmabuf_export_cnt);
hl_ctx_put(ctx);
- /* Paired with get_file() in export_dmabuf() */
+ /*
+ * Paired with get_file() in export_dmabuf().
+ * 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called,
+ * because releasing the compute device file involves another reference decrement, and it
+ * would be possible only after calling fput().
+ */
fput(ctx->hpriv->file_priv->filp);
kfree(hl_dmabuf);
@@ -1859,7 +1861,12 @@ static int export_dmabuf(struct hl_ctx *ctx,
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct hl_device *hdev = ctx->hdev;
- int rc, fd;
+ CLASS(get_unused_fd, fd)(flags);
+
+ if (fd < 0) {
+ dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd);
+ return fd;
+ }
exp_info.ops = &habanalabs_dmabuf_ops;
exp_info.size = total_size;
@@ -1872,13 +1879,6 @@ static int export_dmabuf(struct hl_ctx *ctx,
return PTR_ERR(hl_dmabuf->dmabuf);
}
- fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
- if (fd < 0) {
- dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd);
- rc = fd;
- goto err_dma_buf_put;
- }
-
hl_dmabuf->ctx = ctx;
hl_ctx_get(hl_dmabuf->ctx);
atomic_inc(&ctx->hdev->dmabuf_export_cnt);
@@ -1890,13 +1890,9 @@ static int export_dmabuf(struct hl_ctx *ctx,
get_file(ctx->hpriv->file_priv->filp);
*dmabuf_fd = fd;
+ fd_install(take_fd(fd), hl_dmabuf->dmabuf->file);
return 0;
-
-err_dma_buf_put:
- hl_dmabuf->dmabuf->priv = NULL;
- dma_buf_put(hl_dmabuf->dmabuf);
- return rc;
}
static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
@@ -2341,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
if (rc < 0)
goto destroy_pages;
npages = rc;
- rc = -EFAULT;
+ rc = -ENOMEM;
goto put_pages;
}
userptr->npages = npages;
diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c
index c4d84df355b0..4401beb99e42 100644
--- a/drivers/accel/habanalabs/common/memory_mgr.c
+++ b/drivers/accel/habanalabs/common/memory_mgr.c
@@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
goto put_mem;
}
-#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
- if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
- user_mem_size)) {
-#else
if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
user_mem_size)) {
-#endif
dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n",
buf->behavior->topic, vma->vm_start);
@@ -318,28 +313,61 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
idr_init(&mmg->handles);
}
+static void hl_mem_mgr_fini_stats_reset(struct hl_mem_mgr_fini_stats *stats)
+{
+ if (!stats)
+ return;
+
+ memset(stats, 0, sizeof(*stats));
+}
+
+static void hl_mem_mgr_fini_stats_inc(u64 mem_id, struct hl_mem_mgr_fini_stats *stats)
+{
+ if (!stats)
+ return;
+
+ switch (mem_id) {
+ case HL_MMAP_TYPE_CB:
+ ++stats->n_busy_cb;
+ break;
+ case HL_MMAP_TYPE_TS_BUFF:
+ ++stats->n_busy_ts;
+ break;
+ default:
+ /* we currently store only CB/TS so this shouldn't happen */
+ ++stats->n_busy_other;
+ }
+}
+
/**
* hl_mem_mgr_fini - release unified memory manager
*
* @mmg: parent unified memory manager
+ * @stats: if non-NULL, will return some counters for handles that could not be removed.
*
* Release the unified memory manager. Shall be called from an interrupt context.
*/
-void hl_mem_mgr_fini(struct hl_mem_mgr *mmg)
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats)
{
struct hl_mmap_mem_buf *buf;
struct idr *idp;
const char *topic;
+ u64 mem_id;
u32 id;
+ hl_mem_mgr_fini_stats_reset(stats);
+
idp = &mmg->handles;
idr_for_each_entry(idp, buf, id) {
topic = buf->behavior->topic;
- if (hl_mmap_mem_buf_put(buf) != 1)
+ mem_id = buf->behavior->mem_id;
+ if (hl_mmap_mem_buf_put(buf) != 1) {
dev_err(mmg->dev,
"%s: Buff handle %u for CTX is still alive\n",
topic, id);
+ hl_mem_mgr_fini_stats_inc(mem_id, stats);
+ }
}
}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c
index d3eaab908457..79823facce7f 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -6,6 +6,7 @@
*/
#include <linux/slab.h>
+#include <linux/pci.h>
#include "../habanalabs.h"
@@ -262,7 +263,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
mmu_funcs->flush(ctx);
if (trace_habanalabs_mmu_unmap_enabled() && !rc)
- trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
+ trace_habanalabs_mmu_unmap(&hdev->pdev->dev, virt_addr, 0, page_size, flush_pte);
return rc;
}
@@ -349,7 +350,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
if (flush_pte)
mmu_funcs->flush(ctx);
- trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
+ trace_habanalabs_mmu_map(&hdev->pdev->dev, virt_addr, phys_addr, page_size, flush_pte);
return 0;
@@ -599,6 +600,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
case ASIC_GAUDI2:
case ASIC_GAUDI2B:
case ASIC_GAUDI2C:
+ case ASIC_GAUDI2D:
hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
if (prop->pmmu.host_resident)
hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
@@ -644,7 +646,8 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
if (rc)
dev_err_ratelimited(hdev->dev,
- "%s cache invalidation failed, rc=%d\n",
+ "%s: %s cache invalidation failed, rc=%d\n",
+ dev_name(&hdev->pdev->dev),
flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc);
return rc;
@@ -659,8 +662,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
asid, va, size);
if (rc)
dev_err_ratelimited(hdev->dev,
- "%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
- flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc);
+ "%s: %s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
+ dev_name(&hdev->pdev->dev), flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU",
+ va, size, rc);
return rc;
}
diff --git a/drivers/accel/habanalabs/common/pci/pci.c b/drivers/accel/habanalabs/common/pci/pci.c
index 191e0e3cf3a5..81cbd8697d4c 100644
--- a/drivers/accel/habanalabs/common/pci/pci.c
+++ b/drivers/accel/habanalabs/common/pci/pci.c
@@ -123,7 +123,7 @@ int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data)
pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
if (unlikely(trace_habanalabs_elbi_read_enabled()))
- trace_habanalabs_elbi_read(hdev->dev, (u32) addr, val);
+ trace_habanalabs_elbi_read(&hdev->pdev->dev, (u32) addr, val);
return 0;
}
@@ -186,7 +186,7 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) {
if (unlikely(trace_habanalabs_elbi_write_enabled()))
- trace_habanalabs_elbi_write(hdev->dev, (u32) addr, val);
+ trace_habanalabs_elbi_write(&hdev->pdev->dev, (u32) addr, val);
return 0;
}
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 8a9f98832157..8f55ba3b4e73 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -96,14 +96,21 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
infineon_second_stage_third_instance =
(infineon_second_stage_version >> 16) & mask;
- if (cpucp_info->infineon_second_stage_version)
+ if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
le32_to_cpu(cpucp_info->infineon_version),
infineon_second_stage_first_instance,
infineon_second_stage_second_instance,
infineon_second_stage_third_instance);
- else
+ else if (cpucp_info->infineon_second_stage_version)
+ return sprintf(buf, "%#04x:%#04x:%#04x\n",
+ infineon_second_stage_first_instance,
+ infineon_second_stage_second_instance,
+ infineon_second_stage_third_instance);
+ else if (cpucp_info->infineon_version)
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
+
+ return 0;
}
static DEVICE_ATTR_RO(vrm_ver);
@@ -142,8 +149,9 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- return sprintf(buf, "0x%08x\n",
- le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version));
+ return sprintf(buf, "0x%08x%08x\n",
+ le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_timestamp),
+ le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version));
}
static ssize_t cpucp_kernel_ver_show(struct device *dev,
@@ -270,6 +278,9 @@ static ssize_t device_type_show(struct device *dev,
case ASIC_GAUDI2C:
str = "GAUDI2C";
break;
+ case ASIC_GAUDI2D:
+ str = "GAUDI2D";
+ break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
hdev->asic_type);
@@ -364,7 +375,7 @@ out:
}
static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf, loff_t offset,
+ const struct bin_attribute *attr, char *buf, loff_t offset,
size_t max_size)
{
struct device *dev = kobj_to_dev(kobj);
@@ -439,7 +450,7 @@ static DEVICE_ATTR_RO(security_enabled);
static DEVICE_ATTR_RO(module_id);
static DEVICE_ATTR_RO(parent_device);
-static struct bin_attribute bin_attr_eeprom = {
+static const struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
.size = PAGE_SIZE,
.read = eeprom_read_handler
@@ -468,7 +479,7 @@ static struct attribute *hl_dev_attrs[] = {
NULL,
};
-static struct bin_attribute *hl_dev_bin_attrs[] = {
+static const struct bin_attribute *const hl_dev_bin_attrs[] = {
&bin_attr_eeprom,
NULL
};
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index f2b04ffb0ecb..34771d75da9d 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -1639,10 +1639,8 @@ static int gaudi_late_init(struct hl_device *hdev)
}
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
- if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ if (rc)
return rc;
- }
/* Scrub both SRAM and DRAM */
rc = hdev->asic_funcs->scrub_device_mem(hdev);
@@ -4154,13 +4152,7 @@ skip_reset:
static int gaudi_suspend(struct hl_device *hdev)
{
- int rc;
-
- rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
- if (rc)
- dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
-
- return rc;
+ return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
}
static int gaudi_resume(struct hl_device *hdev)
@@ -4176,10 +4168,29 @@ static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE);
+#ifdef _HAS_DMA_MMAP_COHERENT
+ /*
+ * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
+ * so vm_insert_page() can handle it safely. Without this, the kernel
+ * may BUG_ON due to VM_PFNMAP.
+ */
+ if (is_vmalloc_addr(cpu_addr))
+ vm_flags_set(vma, VM_MIXEDMAP);
+
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
(dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
+#else
+
+ rc = remap_pfn_range(vma, vma->vm_start,
+ virt_to_phys(cpu_addr) >> PAGE_SHIFT,
+ size, vma->vm_page_prot);
+ if (rc)
+ dev_err(hdev->dev, "remap_pfn_range error %d", rc);
+
+ #endif
+
return rc;
}
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index fa1c4feb9f89..b8c0689dba64 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -728,6 +728,354 @@ static const int gaudi2_dma_core_async_event_id[] = {
[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
};
+const char *gaudi2_engine_id_str[] = {
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6),
+ __stringify(GAUDI2_ENGINE_ID_PDMA_0),
+ __stringify(GAUDI2_ENGINE_ID_PDMA_1),
+ __stringify(GAUDI2_ENGINE_ID_ROT_0),
+ __stringify(GAUDI2_ENGINE_ID_ROT_1),
+ __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC0_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC0_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC1_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC1_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC2_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC2_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC3_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC3_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC4_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC4_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC5_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC5_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC6_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC6_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC7_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC7_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC8_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC8_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC9_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC9_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC10_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC10_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC11_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC11_1),
+ __stringify(GAUDI2_ENGINE_ID_PCIE),
+ __stringify(GAUDI2_ENGINE_ID_PSOC),
+ __stringify(GAUDI2_ENGINE_ID_ARC_FARM),
+ __stringify(GAUDI2_ENGINE_ID_KDMA),
+ __stringify(GAUDI2_ENGINE_ID_SIZE),
+};
+
+const char *gaudi2_queue_id_str[] = {
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_3),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_0),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_1),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_2),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_3),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_0),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_1),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_2),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_3),
+ __stringify(GAUDI2_QUEUE_ID_CPU_PQ),
+ __stringify(GAUDI2_QUEUE_ID_SIZE),
+};
+
static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
"qman sei intr",
"arc sei intr"
@@ -2601,6 +2949,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
+ prop->supports_advanced_cpucp_rc = true;
+
return 0;
free_qprops:
@@ -3148,7 +3498,6 @@ static int gaudi2_early_init(struct hl_device *hdev)
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
- /* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
@@ -3160,6 +3509,13 @@ static int gaudi2_early_init(struct hl_device *hdev)
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
goto pci_fini;
}
+
+ rc = hl_fw_read_preboot_status(hdev);
+ if (rc) {
+ if (hdev->reset_on_preboot_fail)
+ hdev->asic_funcs->hw_fini(hdev, true, false);
+ goto pci_fini;
+ }
}
return 0;
@@ -3308,14 +3664,10 @@ static int gaudi2_late_init(struct hl_device *hdev)
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc;
- hdev->asic_prop.supports_advanced_cpucp_rc = true;
-
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
gaudi2->virt_msix_db_dma_addr);
- if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ if (rc)
return rc;
- }
rc = gaudi2_fetch_psoc_frequency(hdev);
if (rc) {
@@ -3783,7 +4135,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
prop->supports_compute_reset = true;
/* Event queue sanity check added in FW version 1.11 */
- if (hl_is_fw_sw_ver_below(hdev, 1, 11))
+ if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0)
hdev->event_queue.check_eqe_index = false;
else
hdev->event_queue.check_eqe_index = true;
@@ -3798,6 +4150,8 @@ static int gaudi2_sw_init(struct hl_device *hdev)
if (rc)
goto special_blocks_free;
+ hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ;
+
return 0;
special_blocks_free:
@@ -4836,7 +5190,7 @@ static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw
else
wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
- if (fw_reset)
+ if (fw_reset || hdev->cpld_shutdown)
goto skip_engines;
gaudi2_stop_dma_qmans(hdev);
@@ -6314,26 +6668,6 @@ static void gaudi2_execute_hard_reset(struct hl_device *hdev)
WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
}
-static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
-{
- int i, rc = 0;
- u32 reg_val;
-
- for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
- rc = hl_poll_timeout(
- hdev,
- mmCPU_RST_STATUS_TO_HOST,
- reg_val,
- reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
- 1000,
- poll_timeout_us);
-
- if (rc)
- dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
- reg_val);
- return rc;
-}
-
/**
* gaudi2_execute_soft_reset - execute soft reset by driver/FW
*
@@ -6346,23 +6680,8 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
u32 poll_timeout_us)
{
- int rc;
-
- if (!driver_performs_reset) {
- if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
- /* set SP to indicate reset request sent to FW */
- WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
-
- WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
- gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
-
- /* wait for f/w response */
- rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
- } else {
- rc = hl_fw_send_soft_reset(hdev);
- }
- return rc;
- }
+ if (!driver_performs_reset)
+ return hl_fw_send_soft_reset(hdev);
/* Block access to engines, QMANs and SM during reset, these
* RRs will be reconfigured after soft reset.
@@ -6502,13 +6821,7 @@ skip_reset:
static int gaudi2_suspend(struct hl_device *hdev)
{
- int rc;
-
- rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
- if (rc)
- dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
-
- return rc;
+ return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
}
static int gaudi2_resume(struct hl_device *hdev)
@@ -6525,6 +6838,13 @@ static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
VM_DONTCOPY | VM_NORESERVE);
#ifdef _HAS_DMA_MMAP_COHERENT
+ /*
+ * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
+ * so vm_insert_page() can handle it safely. Without this, the kernel
+ * may BUG_ON due to VM_PFNMAP.
+ */
+ if (is_vmalloc_addr(cpu_addr))
+ vm_flags_set(vma, VM_MIXEDMAP);
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
if (rc)
@@ -6815,7 +7135,8 @@ static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parse
struct gaudi2_device *gaudi2 = hdev->asic_specific;
if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
- dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
+ dev_err(hdev->dev, "h/w queue %s is disabled\n",
+ GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id));
return -EINVAL;
}
@@ -7067,7 +7388,8 @@ static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
if (rc)
dev_err(hdev->dev,
- "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
+ "Failed to send msg_short packet to H/W queue %s\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
return rc;
}
@@ -7093,8 +7415,8 @@ static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queu
timeout_usec);
if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
- hw_queue_id, tmp);
+ dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp);
rc = -EIO;
}
@@ -7914,7 +8236,7 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
bool has_block_id = false;
u16 block_id;
- if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
+ if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0)
has_block_id = true;
ecc_address = le64_to_cpu(ecc_data->ecc_address);
@@ -8165,13 +8487,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
}
hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
-
- if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
- !hdev->asic_prop.fw_security_enabled &&
- ((module_idx == 0) || (module_idx == 1)))
- lbw_rtr_id = DCORE0_RTR0;
- else
- lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
break;
case RAZWI_MME:
sprintf(initiator_name, "MME_%u", module_idx);
@@ -9310,8 +9626,8 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type,
static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
{
+ bool require_hard_reset = false;
u32 addr, beat, beat_shift;
- bool rc = false;
dev_err_ratelimited(hdev->dev,
"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
@@ -9343,7 +9659,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
beat,
le32_to_cpu(rd_err_data->dbg_rd_err_dm),
le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
- rc |= true;
+ require_hard_reset = true;
}
beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
@@ -9356,7 +9672,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
- rc |= true;
+ require_hard_reset = true;
}
dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
@@ -9366,7 +9682,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
}
- return rc;
+ return require_hard_reset;
}
static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
@@ -9650,8 +9966,8 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
gaudi2_print_event(hdev, event_type, true,
- "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
- engine_id, intr_type, q->queue_index);
+ "ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u",
+ GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index);
return 1;
default:
gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
@@ -9824,11 +10140,6 @@ static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
return U16_MAX;
}
-static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
-{
- hdev->eq_heartbeat_received = true;
-}
-
static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -10050,6 +10361,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
reset_required = true;
+ is_critical = eq_entry->sei_data.hdr.is_critical;
}
error_count++;
break;
@@ -10070,7 +10382,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
- if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
+ if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0)
is_critical = true;
break;
@@ -10223,7 +10535,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
le64_to_cpu(eq_entry->data[0]));
error_count = GAUDI2_NA_EVENT_CAUSE;
- event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask);
break;
case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
@@ -10281,8 +10593,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
gaudi2_print_event(hdev, event_type, true,
"No error cause for H/W event %u", event_type);
- if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
- reset_required) {
+ if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
if (reset_required ||
(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
reset_flags |= HL_DRV_RESET_HARD;
@@ -10312,6 +10623,7 @@ reset_device:
if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
hl_handle_critical_hw_err(hdev, event_type, &event_mask);
+ hl_debugfs_cfg_access_history_dump(hdev);
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
hl_device_cond_reset(hdev, reset_flags, event_mask);
}
@@ -10348,8 +10660,8 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
if (rc)
- dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
- hw_queue_id);
+ dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
return rc;
}
@@ -10489,7 +10801,7 @@ end:
(u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
WREG32(sob_addr, 0);
- kfree(lin_dma_pkts_arr);
+ kvfree(lin_dma_pkts_arr);
return rc;
}
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index eee41387b269..bdf5c1bd2d63 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -240,6 +240,15 @@
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
+extern const char *gaudi2_engine_id_str[];
+extern const char *gaudi2_queue_id_str[];
+
+#define GAUDI2_ENG_ID_TO_STR(initiator) ((initiator) >= GAUDI2_ENGINE_ID_SIZE ? "not found" : \
+ gaudi2_engine_id_str[initiator])
+
+#define GAUDI2_QUEUE_ID_TO_STR(initiator) ((initiator) >= GAUDI2_QUEUE_ID_SIZE ? "not found" : \
+ gaudi2_queue_id_str[initiator])
+
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
@@ -384,7 +393,7 @@ enum gaudi2_edma_id {
/* User interrupt count is aligned with HW CQ count.
* We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode
*/
-#define GAUDI2_NUM_USER_INTERRUPTS 255
+#define GAUDI2_NUM_USER_INTERRUPTS 64
#define GAUDI2_NUM_RESERVED_INTERRUPTS 1
#define GAUDI2_TOTAL_USER_INTERRUPTS (GAUDI2_NUM_USER_INTERRUPTS + GAUDI2_NUM_RESERVED_INTERRUPTS)
@@ -416,11 +425,11 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_EQ_ERROR,
+ GAUDI2_IRQ_NUM_USER_FIRST,
+ GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_RESERVED_FIRST,
- GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
+ GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_RESERVED_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
- GAUDI2_IRQ_NUM_USER_FIRST = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR + 1,
- GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1)
};
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 2423620ff358..bc3c57bda5cd 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -2426,7 +2426,7 @@ static int gaudi2_config_bmon(struct hl_device *hdev, struct hl_debug_params *pa
WREG32(base_reg + mmBMON_ADDRH_E3_OFFSET, 0);
WREG32(base_reg + mmBMON_REDUCTION_OFFSET, 0);
WREG32(base_reg + mmBMON_STM_TRC_OFFSET, 0x7 | (0xA << 8));
- WREG32(base_reg + mmBMON_CR_OFFSET, 0x77 | 0xf << 24);
+ WREG32(base_reg + mmBMON_CR_OFFSET, 0x41);
}
return 0;
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
index 34bf80c5a44b..307ccb912ccd 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@@ -479,6 +479,7 @@ static const u32 gaudi2_pb_dcr0_edma0_unsecured_regs[] = {
mmDCORE0_EDMA0_CORE_CTX_TE_NUMROWS,
mmDCORE0_EDMA0_CORE_CTX_IDX,
mmDCORE0_EDMA0_CORE_CTX_IDX_INC,
+ mmDCORE0_EDMA0_CORE_WR_COMP_MAX_OUTSTAND,
mmDCORE0_EDMA0_CORE_RD_LBW_RATE_LIM_CFG,
mmDCORE0_EDMA0_QM_CQ_CFG0_0,
mmDCORE0_EDMA0_QM_CQ_CFG0_1,
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index 5a359c3bdc78..84768e306269 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -893,11 +893,8 @@ int goya_late_init(struct hl_device *hdev)
WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to enable PCI access from CPU %d\n", rc);
+ if (rc)
return rc;
- }
/* force setting to low frequency */
goya->curr_pll_profile = PLL_LOW;
@@ -2864,13 +2861,7 @@ static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
int goya_suspend(struct hl_device *hdev)
{
- int rc;
-
- rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
- if (rc)
- dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
-
- return rc;
+ return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
}
int goya_resume(struct hl_device *hdev)
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
index 0231d6c55b4a..753d46a2836b 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
@@ -63,9 +63,9 @@
#define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START 0xFFF0F80000000000ull
#define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END 0xFFF0FFFFFFFFFFFFull
-#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 256
+#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 127
-#define GAUDI2_MSIX_ENTRIES 512
+#define GAUDI2_MSIX_ENTRIES 128
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index b2dbe1f64430..82d639990cca 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -330,9 +330,9 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
{ .fc_id = 149, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "EDMA7_ECC_SERR" },
{ .fc_id = 150, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
- .name = "HDMA4_ECC_SERR" },
+ .name = "EDMA4_ECC_SERR" },
{ .fc_id = 151, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
- .name = "HDMA5_ECC_SERR" },
+ .name = "EDMA5_ECC_SERR" },
{ .fc_id = 152, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "EDMA2_ECC_DERR" },
{ .fc_id = 153, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
@@ -856,55 +856,55 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
{ .fc_id = 412, .cpu_id = 84, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PCIE_ADDR_DEC_ERR" },
{ .fc_id = 413, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC0_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC0_AXI_ERR_RSP" },
{ .fc_id = 414, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC1_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC1_AXI_ERR_RSP" },
{ .fc_id = 415, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC2_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC2_AXI_ERR_RSP" },
{ .fc_id = 416, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC3_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC3_AXI_ERR_RSP" },
{ .fc_id = 417, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC4_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC4_AXI_ERR_RSP" },
{ .fc_id = 418, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC5_AXI_ERR_RSP" },
+ .name = "DCORE0_TPC5_AXI_ERR_RSP" },
{ .fc_id = 419, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC6_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC0_AXI_ERR_RSP" },
{ .fc_id = 420, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC7_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC1_AXI_ERR_RSP" },
{ .fc_id = 421, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC8_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC2_AXI_ERR_RSP" },
{ .fc_id = 422, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC9_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC3_AXI_ERR_RSP" },
{ .fc_id = 423, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC10_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC4_AXI_ERR_RSP" },
{ .fc_id = 424, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC11_AXI_ERR_RSP" },
+ .name = "DCORE1_TPC5_AXI_ERR_RSP" },
{ .fc_id = 425, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC12_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC0_AXI_ERR_RSP" },
{ .fc_id = 426, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC13_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC1_AXI_ERR_RSP" },
{ .fc_id = 427, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC14_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC2_AXI_ERR_RSP" },
{ .fc_id = 428, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC15_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC3_AXI_ERR_RSP" },
{ .fc_id = 429, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC16_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC4_AXI_ERR_RSP" },
{ .fc_id = 430, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC17_AXI_ERR_RSP" },
+ .name = "DCORE2_TPC5_AXI_ERR_RSP" },
{ .fc_id = 431, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC18_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC0_AXI_ERR_RSP" },
{ .fc_id = 432, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC19_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC1_AXI_ERR_RSP" },
{ .fc_id = 433, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC20_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC2_AXI_ERR_RSP" },
{ .fc_id = 434, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC21_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC3_AXI_ERR_RSP" },
{ .fc_id = 435, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC22_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC4_AXI_ERR_RSP" },
{ .fc_id = 436, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC23_AXI_ERR_RSP" },
+ .name = "DCORE3_TPC5_AXI_ERR_RSP" },
{ .fc_id = 437, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC24_AXI_ERR_RSP" },
+ .name = "DCORE4_TPC0_AXI_ERR_RSP" },
{ .fc_id = 438, .cpu_id = 86, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "AXI_ECC" },
{ .fc_id = 439, .cpu_id = 87, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
@@ -965,73 +965,73 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
.name = "MME3_CTRL_AXI_ERROR_RESPONSE" },
{ .fc_id = 467, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
.name = "MME3_QMAN_SW_ERROR" },
- { .fc_id = 468, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 468, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PSOC_MME_PLL_LOCK_ERR" },
- { .fc_id = 469, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 469, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PSOC_CPU_PLL_LOCK_ERR" },
- { .fc_id = 470, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 470, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_TPC_PLL_LOCK_ERR" },
- { .fc_id = 471, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 471, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_NIC_PLL_LOCK_ERR" },
- { .fc_id = 472, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 472, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_XBAR_MMU_PLL_LOCK_ERR" },
- { .fc_id = 473, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 473, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_XBAR_DMA_PLL_LOCK_ERR" },
- { .fc_id = 474, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 474, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_XBAR_IF_PLL_LOCK_ERR" },
- { .fc_id = 475, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 475, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_XBAR_BANK_PLL_LOCK_ERR" },
- { .fc_id = 476, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 476, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_XBAR_MMU_PLL_LOCK_ERR" },
- { .fc_id = 477, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 477, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_XBAR_DMA_PLL_LOCK_ERR" },
- { .fc_id = 478, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 478, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_XBAR_IF_PLL_LOCK_ERR" },
- { .fc_id = 479, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 479, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_XBAR_MESH_PLL_LOCK_ERR" },
- { .fc_id = 480, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 480, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_TPC_PLL_LOCK_ERR" },
- { .fc_id = 481, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 481, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_NIC_PLL_LOCK_ERR" },
- { .fc_id = 482, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 482, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PMMU_MME_PLL_LOCK_ERR" },
- { .fc_id = 483, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 483, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_TPC_PLL_LOCK_ERR" },
- { .fc_id = 484, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 484, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_PCI_PLL_LOCK_ERR" },
- { .fc_id = 485, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 485, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_XBAR_MMU_PLL_LOCK_ERR" },
- { .fc_id = 486, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 486, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_XBAR_DMA_PLL_LOCK_ERR" },
- { .fc_id = 487, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 487, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_XBAR_IF_PLL_LOCK_ERR" },
- { .fc_id = 488, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 488, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_XBAR_MESH_PLL_LOCK_ERR" },
- { .fc_id = 489, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 489, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_XBAR_MMU_PLL_LOCK_ERR" },
- { .fc_id = 490, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 490, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_XBAR_DMA_PLL_LOCK_ERR" },
- { .fc_id = 491, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 491, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_XBAR_IF_PLL_LOCK_ERR" },
- { .fc_id = 492, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 492, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_XBAR_BANK_PLL_LOCK_ERR" },
- { .fc_id = 493, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 493, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_TPC_PLL_LOCK_ERR" },
- { .fc_id = 494, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 494, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PSOC_VID_PLL_LOCK_ERR" },
- { .fc_id = 495, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 495, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PMMU_VID_PLL_LOCK_ERR" },
- { .fc_id = 496, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 496, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE3_HBM_PLL_LOCK_ERR" },
- { .fc_id = 497, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 497, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_XBAR_HBM_PLL_LOCK_ERR" },
- { .fc_id = 498, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 498, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE1_HBM_PLL_LOCK_ERR" },
- { .fc_id = 499, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 499, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE0_HBM_PLL_LOCK_ERR" },
- { .fc_id = 500, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 500, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_XBAR_HBM_PLL_LOCK_ERR" },
- { .fc_id = 501, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ { .fc_id = 501, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "DCORE2_HBM_PLL_LOCK_ERR" },
{ .fc_id = 502, .cpu_id = 93, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "CPU_AXI_ERR_RSP" },
@@ -1298,103 +1298,103 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC0_BMON_SPMU" },
{ .fc_id = 634, .cpu_id = 131, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC0_KERNEL_ERR" },
+ .name = "DCORE0_TPC0_KERNEL_ERR" },
{ .fc_id = 635, .cpu_id = 132, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC1_BMON_SPMU" },
{ .fc_id = 636, .cpu_id = 133, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC1_KERNEL_ERR" },
+ .name = "DCORE0_TPC1_KERNEL_ERR" },
{ .fc_id = 637, .cpu_id = 134, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC2_BMON_SPMU" },
{ .fc_id = 638, .cpu_id = 135, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC2_KERNEL_ERR" },
+ .name = "DCORE0_TPC2_KERNEL_ERR" },
{ .fc_id = 639, .cpu_id = 136, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC3_BMON_SPMU" },
{ .fc_id = 640, .cpu_id = 137, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC3_KERNEL_ERR" },
+ .name = "DCORE0_TPC3_KERNEL_ERR" },
{ .fc_id = 641, .cpu_id = 138, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC4_BMON_SPMU" },
{ .fc_id = 642, .cpu_id = 139, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC4_KERNEL_ERR" },
+ .name = "DCORE0_TPC4_KERNEL_ERR" },
{ .fc_id = 643, .cpu_id = 140, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC5_BMON_SPMU" },
{ .fc_id = 644, .cpu_id = 141, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC5_KERNEL_ERR" },
+ .name = "DCORE0_TPC5_KERNEL_ERR" },
{ .fc_id = 645, .cpu_id = 150, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC6_BMON_SPMU" },
{ .fc_id = 646, .cpu_id = 151, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC6_KERNEL_ERR" },
+ .name = "DCORE1_TPC0_KERNEL_ERR" },
{ .fc_id = 647, .cpu_id = 152, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC7_BMON_SPMU" },
{ .fc_id = 648, .cpu_id = 153, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC7_KERNEL_ERR" },
+ .name = "DCORE1_TPC1_KERNEL_ERR" },
{ .fc_id = 649, .cpu_id = 146, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC8_BMON_SPMU" },
{ .fc_id = 650, .cpu_id = 147, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC8_KERNEL_ERR" },
+ .name = "DCORE1_TPC2_KERNEL_ERR" },
{ .fc_id = 651, .cpu_id = 148, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC9_BMON_SPMU" },
{ .fc_id = 652, .cpu_id = 149, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC9_KERNEL_ERR" },
+ .name = "DCORE1_TPC3_KERNEL_ERR" },
{ .fc_id = 653, .cpu_id = 142, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC10_BMON_SPMU" },
{ .fc_id = 654, .cpu_id = 143, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC10_KERNEL_ERR" },
+ .name = "DCORE1_TPC4_KERNEL_ERR" },
{ .fc_id = 655, .cpu_id = 144, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC11_BMON_SPMU" },
{ .fc_id = 656, .cpu_id = 145, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC11_KERNEL_ERR" },
+ .name = "DCORE1_TPC5_KERNEL_ERR" },
{ .fc_id = 657, .cpu_id = 162, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC12_BMON_SPMU" },
{ .fc_id = 658, .cpu_id = 163, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC12_KERNEL_ERR" },
+ .name = "DCORE2_TPC0_KERNEL_ERR" },
{ .fc_id = 659, .cpu_id = 164, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC13_BMON_SPMU" },
{ .fc_id = 660, .cpu_id = 165, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC13_KERNEL_ERR" },
+ .name = "DCORE2_TPC1_KERNEL_ERR" },
{ .fc_id = 661, .cpu_id = 158, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC14_BMON_SPMU" },
{ .fc_id = 662, .cpu_id = 159, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC14_KERNEL_ERR" },
+ .name = "DCORE2_TPC2_KERNEL_ERR" },
{ .fc_id = 663, .cpu_id = 160, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC15_BMON_SPMU" },
{ .fc_id = 664, .cpu_id = 161, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC15_KERNEL_ERR" },
+ .name = "DCORE2_TPC3_KERNEL_ERR" },
{ .fc_id = 665, .cpu_id = 154, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC16_BMON_SPMU" },
{ .fc_id = 666, .cpu_id = 155, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC16_KERNEL_ERR" },
+ .name = "DCORE2_TPC4_KERNEL_ERR" },
{ .fc_id = 667, .cpu_id = 156, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC17_BMON_SPMU" },
{ .fc_id = 668, .cpu_id = 157, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC17_KERNEL_ERR" },
+ .name = "DCORE2_TPC5_KERNEL_ERR" },
{ .fc_id = 669, .cpu_id = 166, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC18_BMON_SPMU" },
{ .fc_id = 670, .cpu_id = 167, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC18_KERNEL_ERR" },
+ .name = "DCORE3_TPC0_KERNEL_ERR" },
{ .fc_id = 671, .cpu_id = 168, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC19_BMON_SPMU" },
{ .fc_id = 672, .cpu_id = 169, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC19_KERNEL_ERR" },
+ .name = "DCORE3_TPC1_KERNEL_ERR" },
{ .fc_id = 673, .cpu_id = 170, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC20_BMON_SPMU" },
{ .fc_id = 674, .cpu_id = 171, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC20_KERNEL_ERR" },
+ .name = "DCORE3_TPC2_KERNEL_ERR" },
{ .fc_id = 675, .cpu_id = 172, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC21_BMON_SPMU" },
{ .fc_id = 676, .cpu_id = 173, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC21_KERNEL_ERR" },
+ .name = "DCORE3_TPC3_KERNEL_ERR" },
{ .fc_id = 677, .cpu_id = 174, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC22_BMON_SPMU" },
{ .fc_id = 678, .cpu_id = 175, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC22_KERNEL_ERR" },
+ .name = "DCORE3_TPC4_KERNEL_ERR" },
{ .fc_id = 679, .cpu_id = 176, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC23_BMON_SPMU" },
{ .fc_id = 680, .cpu_id = 177, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC23_KERNEL_ERR" },
+ .name = "DCORE3_TPC5_KERNEL_ERR" },
{ .fc_id = 681, .cpu_id = 178, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC24_BMON_SPMU" },
{ .fc_id = 682, .cpu_id = 179, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC24_KERNEL_ERR" },
+ .name = "DCORE4_TPC0_KERNEL_ERR" },
{ .fc_id = 683, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "" },
{ .fc_id = 684, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
@@ -1827,8 +1827,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
.name = "DEC0_BMON_SPMU" },
{ .fc_id = 898, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
.name = "DEC1_SPI" },
- { .fc_id = 899, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "DEC1_SPI" },
+ { .fc_id = 899, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+ .name = "DEC1_BMON_SPMU" },
{ .fc_id = 900, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE,
.name = "DEC2_SPI" },
{ .fc_id = 901, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
@@ -2377,8 +2377,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
.name = "" },
{ .fc_id = 1173, .cpu_id = 479, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "" },
- { .fc_id = 1174, .cpu_id = 480, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
- .name = "" },
+ { .fc_id = 1174, .cpu_id = 480, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+ .name = "PSOC_DMA_QM" },
{ .fc_id = 1175, .cpu_id = 481, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "" },
{ .fc_id = 1176, .cpu_id = 482, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
@@ -2442,55 +2442,55 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
{ .fc_id = 1205, .cpu_id = 511, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "" },
{ .fc_id = 1206, .cpu_id = 512, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC0_QM" },
+ .name = "DCORE0_TPC0_QM" },
{ .fc_id = 1207, .cpu_id = 513, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC1_QM" },
+ .name = "DCORE0_TPC1_QM" },
{ .fc_id = 1208, .cpu_id = 514, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC2_QM" },
+ .name = "DCORE0_TPC2_QM" },
{ .fc_id = 1209, .cpu_id = 515, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC3_QM" },
+ .name = "DCORE0_TPC3_QM" },
{ .fc_id = 1210, .cpu_id = 516, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC4_QM" },
+ .name = "DCORE0_TPC4_QM" },
{ .fc_id = 1211, .cpu_id = 517, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC5_QM" },
+ .name = "DCORE0_TPC5_QM" },
{ .fc_id = 1212, .cpu_id = 518, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC6_QM" },
+ .name = "DCORE1_TPC0_QM" },
{ .fc_id = 1213, .cpu_id = 519, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC7_QM" },
+ .name = "DCORE1_TPC1_QM" },
{ .fc_id = 1214, .cpu_id = 520, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC8_QM" },
+ .name = "DCORE1_TPC2_QM" },
{ .fc_id = 1215, .cpu_id = 521, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC9_QM" },
+ .name = "DCORE1_TPC3_QM" },
{ .fc_id = 1216, .cpu_id = 522, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC10_QM" },
+ .name = "DCORE1_TPC4_QM" },
{ .fc_id = 1217, .cpu_id = 523, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC11_QM" },
+ .name = "DCORE1_TPC5_QM" },
{ .fc_id = 1218, .cpu_id = 524, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC12_QM" },
+ .name = "DCORE2_TPC0_QM" },
{ .fc_id = 1219, .cpu_id = 525, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC13_QM" },
+ .name = "DCORE2_TPC1_QM" },
{ .fc_id = 1220, .cpu_id = 526, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC14_QM" },
+ .name = "DCORE2_TPC2_QM" },
{ .fc_id = 1221, .cpu_id = 527, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC15_QM" },
+ .name = "DCORE2_TPC3_QM" },
{ .fc_id = 1222, .cpu_id = 528, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC16_QM" },
+ .name = "DCORE2_TPC4_QM" },
{ .fc_id = 1223, .cpu_id = 529, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC17_QM" },
+ .name = "DCORE2_TPC5_QM" },
{ .fc_id = 1224, .cpu_id = 530, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC18_QM" },
+ .name = "DCORE3_TPC0_QM" },
{ .fc_id = 1225, .cpu_id = 531, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC19_QM" },
+ .name = "DCORE3_TPC1_QM" },
{ .fc_id = 1226, .cpu_id = 532, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC20_QM" },
+ .name = "DCORE3_TPC2_QM" },
{ .fc_id = 1227, .cpu_id = 533, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC21_QM" },
+ .name = "DCORE3_TPC3_QM" },
{ .fc_id = 1228, .cpu_id = 534, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC22_QM" },
+ .name = "DCORE3_TPC4_QM" },
{ .fc_id = 1229, .cpu_id = 535, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC23_QM" },
+ .name = "DCORE3_TPC5_QM" },
{ .fc_id = 1230, .cpu_id = 536, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
- .name = "TPC24_QM" },
+ .name = "DCORE4_TPC0_QM" },
{ .fc_id = 1231, .cpu_id = 537, .valid = 0, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "" },
{ .fc_id = 1232, .cpu_id = 538, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE,
@@ -2674,19 +2674,19 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
.name = "DEV_RESET_REQ" },
{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_BRK_ENTRY" },
+ .name = "PWR_BRK_ENTRY" },
{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_BRK_EXT" },
+ .name = "PWR_BRK_EXT" },
{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_RD_MODE0" },
+ .name = "PWR_RD_MODE0" },
{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_RD_MODE1" },
+ .name = "PWR_RD_MODE1" },
{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_RD_MODE2" },
+ .name = "PWR_RD_MODE2" },
{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_PWR_RD_MODE3" },
+ .name = "PWR_RD_MODE3" },
{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
- .name = "ARC_EQ_HEARTBEAT" },
+ .name = "EQ_HEARTBEAT" },
};
#endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
index 18ca147b1c86..6ea936c9594e 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
@@ -45,6 +45,13 @@
#define GAUDI2_ARM_RX_MB_OFFSET (GAUDI2_ARM_RX_MB_ADDR - \
GAUDI2_SP_SRAM_BASE_ADDR)
+#define POWER_MODE_LEVELS { \
+ 150000, /* 00 */ \
+ 250000, /* 01 */ \
+ 400000, /* 10 */ \
+ /* 11: Normal mode */ \
+}
+
enum gaudi2_fw_status {
GAUDI2_PID_STATUS_UP = 0x1, /* PID on ARC0 is up */
GAUDI2_ARM_STATUS_UP = 0x2, /* ARM Linux Boot complete */
@@ -52,26 +59,6 @@ enum gaudi2_fw_status {
GAUDI2_STATUS_LAST = 0xFF
};
-struct gaudi2_cold_rst_data {
- union {
- struct {
- u32 recovery_flag: 1;
- u32 validation_flag: 1;
- u32 efuse_read_flag: 1;
- u32 spsram_init_done : 1;
- u32 fake_security_enable : 1;
- u32 fake_sig_validation_en : 1;
- u32 bist_skip_enable : 1;
- u32 reserved1 : 1;
- u32 fake_bis_compliant : 1;
- u32 wd_rst_cause_arm : 1;
- u32 wd_rst_cause_arcpid : 1;
- u32 reserved : 21;
- };
- __le32 data;
- };
-};
-
enum gaudi2_rst_src {
HL_COLD_RST = 1,
HL_MANUAL_RST = 2,
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h
index f3eaeb6d9b7e..1e9c056e437d 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h
@@ -58,4 +58,12 @@
#define mmWD_GPIO_DATAOUT_REG mmPSOC_GPIO3_DATAOUT
#define mmSTM_PROFILER_SPE_REG mmPSOC_STM_STMSPER
+/* Registers below are used to pass the boot_if data between ARM and ARC1 */
+#define mmARM_MSG_BOOT_ERR_SET mmCPU_IF_SPECIAL_GLBL_SPARE_0
+#define mmARM_MSG_BOOT_ERR_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_1
+#define mmARM_MSG_BOOT_DEV_STS_SET mmCPU_IF_SPECIAL_GLBL_SPARE_2
+#define mmARM_MSG_BOOT_DEV_STS_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_3
+#define mmMGMT_MSG_BOOT_ERR mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_0
+#define mmMGMT_MSG_BOOT_DEV_STS mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_1
+
#endif /* GAUDI2_REG_MAP_H_ */
diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
index 4f951cada077..a75faa00197f 100644
--- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
+++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
@@ -25,7 +25,8 @@ enum hl_revision_id {
REV_ID_INVALID = 0x00,
REV_ID_A = 0x01,
REV_ID_B = 0x02,
- REV_ID_C = 0x03
+ REV_ID_C = 0x03,
+ REV_ID_D = 0x04
};
#endif /* INCLUDE_PCI_GENERAL_H_ */
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 682c53245286..9e055b5ce03d 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU
select FW_LOADER
select DRM_GEM_SHMEM_HELPER
select GENERIC_ALLOCATOR
+ select WANT_DEV_COREDUMP
help
Choose this option if you have a system with an 14th generation
Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
@@ -15,3 +16,12 @@ config DRM_ACCEL_IVPU
and Deep Learning applications.
If "M" is selected, the module will be called intel_vpu.
+
+config DRM_ACCEL_IVPU_DEBUG
+ bool "Intel NPU debug mode"
+ depends on DRM_ACCEL_IVPU
+ help
+ Choose this option to enable additional
+ debug features for the Intel NPU driver:
+ - Always print debug messages regardless of dyndbg config,
+ - Enable unsafe module params.
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index 95ff7ad16338..dbf76b8a5b4c 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -1,20 +1,30 @@
# SPDX-License-Identifier: GPL-2.0-only
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2024 Intel Corporation
intel_vpu-y := \
ivpu_drv.o \
ivpu_fw.o \
ivpu_fw_log.o \
ivpu_gem.o \
- ivpu_hw_37xx.o \
- ivpu_hw_40xx.o \
+ ivpu_gem_userptr.o \
+ ivpu_hw.o \
+ ivpu_hw_btrs.o \
+ ivpu_hw_ip.o \
ivpu_ipc.o \
ivpu_job.o \
ivpu_jsm_msg.o \
ivpu_mmu.o \
ivpu_mmu_context.o \
- ivpu_pm.o
+ ivpu_ms.o \
+ ivpu_pm.o \
+ ivpu_sysfs.o \
+ ivpu_trace_points.o
intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
+intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o
obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
+
+subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG
+
+CFLAGS_ivpu_trace_points.o = -I$(src)
diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c
new file mode 100644
index 000000000000..16ad0c30818c
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_coredump.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#include <linux/devcoredump.h>
+#include <linux/firmware.h>
+
+#include "ivpu_coredump.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "vpu_boot_api.h"
+
+#define CRASH_DUMP_HEADER "Intel NPU crash dump"
+#define CRASH_DUMP_HEADERS_SIZE SZ_4K
+
+void ivpu_dev_coredump(struct ivpu_device *vdev)
+{
+ struct drm_print_iterator pi = {};
+ struct drm_printer p;
+ size_t coredump_size;
+ char *coredump;
+
+ coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE +
+ ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb);
+ coredump = vmalloc(coredump_size);
+ if (!coredump)
+ return;
+
+ pi.data = coredump;
+ pi.remain = coredump_size;
+ p = drm_coredump_printer(&pi);
+
+ drm_printf(&p, "%s\n", CRASH_DUMP_HEADER);
+ drm_printf(&p, "FW version: %s\n", vdev->fw->version);
+ ivpu_fw_log_print(vdev, false, &p);
+
+ dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL);
+}
diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h
new file mode 100644
index 000000000000..8efb09d02441
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_coredump.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __IVPU_COREDUMP_H__
+#define __IVPU_COREDUMP_H__
+
+#include <drm/drm_print.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_fw_log.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+void ivpu_dev_coredump(struct ivpu_device *vdev);
+#else
+static inline void ivpu_dev_coredump(struct ivpu_device *vdev)
+{
+ struct drm_printer p = drm_info_printer(vdev->drm.dev);
+
+ ivpu_fw_log_print(vdev, false, &p);
+}
+#endif
+
+#endif /* __IVPU_COREDUMP_H__ */
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index e07e447d08d1..3bd85ee6c26b 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_file.h>
@@ -45,6 +46,14 @@ static int fw_name_show(struct seq_file *s, void *v)
return 0;
}
+static int fw_version_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = seq_to_ivpu(s);
+
+ seq_printf(s, "%s\n", vdev->fw->version);
+ return 0;
+}
+
static int fw_trace_capability_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = seq_to_ivpu(s);
@@ -108,41 +117,66 @@ static int reset_pending_show(struct seq_file *s, void *v)
return 0;
}
+static int firewall_irq_counter_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = seq_to_ivpu(s);
+
+ seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter));
+ return 0;
+}
+
static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"bo_list", bo_list_show, 0},
{"fw_name", fw_name_show, 0},
+ {"fw_version", fw_version_show, 0},
{"fw_trace_capability", fw_trace_capability_show, 0},
{"fw_trace_config", fw_trace_config_show, 0},
{"last_bootmode", last_bootmode_show, 0},
{"reset_counter", reset_counter_show, 0},
{"reset_pending", reset_pending_show, 0},
+ {"firewall_irq_counter", firewall_irq_counter_show, 0},
};
+static int dvfs_mode_get(void *data, u64 *dvfs_mode)
+{
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
+
+ *dvfs_mode = vdev->fw->dvfs_mode;
+ return 0;
+}
+
+static int dvfs_mode_set(void *data, u64 dvfs_mode)
+{
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
+
+ vdev->fw->dvfs_mode = (u32)dvfs_mode;
+ return pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n");
+
static ssize_t
-dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
- struct ivpu_fw_info *fw = vdev->fw;
- u32 dvfs_mode;
+ char buffer[VPU_DYNDBG_CMD_MAX_LEN] = {};
int ret;
- ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
- if (ret < 0)
- return ret;
-
- fw->dvfs_mode = dvfs_mode;
+ if (size >= VPU_DYNDBG_CMD_MAX_LEN)
+ return -EINVAL;
- ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
- if (ret)
+ ret = strncpy_from_user(buffer, user_buf, size);
+ if (ret < 0)
return ret;
+ ivpu_jsm_dyndbg_control(vdev, buffer, size);
return size;
}
-static const struct file_operations dvfs_mode_fops = {
+static const struct file_operations fw_dyndbg_fops = {
.owner = THIS_MODULE,
.open = simple_open,
- .write = dvfs_mode_fops_write,
+ .write = fw_dyndbg_fops_write,
};
static int fw_log_show(struct seq_file *s, void *v)
@@ -168,7 +202,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l
if (!size)
return -EINVAL;
- ivpu_fw_log_clear(vdev);
+ ivpu_fw_log_mark_read(vdev);
return size;
}
@@ -298,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
return -EINVAL;
ret = ivpu_rpm_get(vdev);
- if (ret)
+ if (ret < 0)
return ret;
ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -313,26 +347,127 @@ static const struct file_operations ivpu_force_recovery_fops = {
.write = ivpu_force_recovery_fn,
};
+static int ivpu_reset_engine_fn(void *data, u64 val)
+{
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
+
+ return ivpu_jsm_reset_engine(vdev, (u32)val);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n");
+
+static int ivpu_resume_engine_fn(void *data, u64 val)
+{
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
+
+ return ivpu_jsm_hws_resume_engine(vdev, (u32)val);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n");
+
+static int dct_active_get(void *data, u64 *active_percent)
+{
+ struct ivpu_device *vdev = data;
+
+ *active_percent = vdev->pm->dct_active_percent;
+
+ return 0;
+}
+
+static int dct_active_set(void *data, u64 active_percent)
+{
+ struct ivpu_device *vdev = data;
+ int ret;
+
+ if (active_percent > 100)
+ return -EINVAL;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ if (active_percent)
+ ret = ivpu_pm_dct_enable(vdev, active_percent);
+ else
+ ret = ivpu_pm_dct_disable(vdev);
+
+ ivpu_rpm_put(vdev);
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n");
+
+static void print_priority_band(struct seq_file *s, struct ivpu_hw_info *hw,
+ int band, const char *name)
+{
+ seq_printf(s, "%-9s: grace_period %9u process_grace_period %9u process_quantum %9u\n",
+ name,
+ hw->hws.grace_period[band],
+ hw->hws.process_grace_period[band],
+ hw->hws.process_quantum[band]);
+}
+
+static int priority_bands_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = s->private;
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE, "Idle");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL, "Normal");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS, "Focus");
+ print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME, "Realtime");
+
+ return 0;
+}
+
+static int priority_bands_fops_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, priority_bands_show, inode->i_private);
+}
+
static ssize_t
-ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
- struct ivpu_device *vdev = file->private_data;
+ struct seq_file *s = file->private_data;
+ struct ivpu_device *vdev = s->private;
+ char buf[64];
+ u32 grace_period;
+ u32 process_grace_period;
+ u32 process_quantum;
+ u32 band;
+ int ret;
- if (!size)
+ if (size >= sizeof(buf))
return -EINVAL;
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
- return -ENODEV;
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
- return -ENODEV;
+ ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size);
+ if (ret < 0)
+ return ret;
+
+ buf[ret] = '\0';
+ ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
+ &process_quantum);
+ if (ret != 4)
+ return -EINVAL;
+
+ if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT)
+ return -EINVAL;
+
+ vdev->hw->hws.grace_period[band] = grace_period;
+ vdev->hw->hws.process_grace_period[band] = process_grace_period;
+ vdev->hw->hws.process_quantum[band] = process_quantum;
return size;
}
-static const struct file_operations ivpu_reset_engine_fops = {
+static const struct file_operations ivpu_hws_priority_bands_fops = {
.owner = THIS_MODULE,
- .open = simple_open,
- .write = ivpu_reset_engine_fn,
+ .open = priority_bands_fops_open,
+ .write = priority_bands_fops_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
void ivpu_debugfs_init(struct ivpu_device *vdev)
@@ -344,9 +479,11 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
- debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+ debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev,
&dvfs_mode_fops);
+ debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev,
+ &fw_dyndbg_fops);
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
&fw_log_fops);
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
@@ -355,11 +492,21 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
&fw_trace_hw_comp_mask_fops);
debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
&fw_trace_level_fops);
+ debugfs_create_file("hws_priority_bands", 0200, debugfs_root, vdev,
+ &ivpu_hws_priority_bands_fops);
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
+ debugfs_create_file("resume_engine", 0200, debugfs_root, vdev,
+ &ivpu_resume_engine_fops);
- if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) {
debugfs_create_file("fw_profiling_freq_drive", 0200,
debugfs_root, vdev, &fw_profiling_freq_fops);
+ debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops);
+ }
+
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_hw", debugfs_root, &ivpu_hw_failure);
+#endif
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 51d3f1a55d02..3d6fccdefdd6 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+#include <generated/utsrelease.h>
#include <drm/drm_accel.h>
#include <drm/drm_file.h>
@@ -14,7 +16,7 @@
#include <drm/drm_ioctl.h>
#include <drm/drm_prime.h>
-#include "vpu_boot_api.h"
+#include "ivpu_coredump.h"
#include "ivpu_debugfs.h"
#include "ivpu_drv.h"
#include "ivpu_fw.h"
@@ -26,22 +28,24 @@
#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
+#include "ivpu_ms.h"
#include "ivpu_pm.h"
+#include "ivpu_sysfs.h"
+#include "vpu_boot_api.h"
#ifndef DRIVER_VERSION_STR
-#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \
- __stringify(DRM_IVPU_DRIVER_MINOR) "."
+#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE
#endif
-static struct lock_class_key submitted_jobs_xa_lock_class_key;
-
int ivpu_dbg_mask;
module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
int ivpu_test_mode;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
+#endif
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -51,10 +55,18 @@ u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
+int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO;
+module_param_named(sched_mode, ivpu_sched_mode, int, 0444);
+MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler (supported on 27XX - 50XX), 1 - Use HW scheduler");
+
bool ivpu_disable_mmu_cont_pages;
-module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644);
+module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444);
MODULE_PARM_DESC(disable_mmu_cont_pages, "Disable MMU contiguous pages optimization");
+bool ivpu_force_snoop;
+module_param_named(force_snoop, ivpu_force_snoop, bool, 0444);
+MODULE_PARM_DESC(force_snoop, "Force snooping for NPU host memory access");
+
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
{
struct ivpu_device *vdev = file_priv->vdev;
@@ -74,9 +86,8 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi
ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id);
ivpu_cmdq_release_all_locked(file_priv);
- ivpu_jsm_context_release(vdev, file_priv->ctx.id);
ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx);
- ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+ ivpu_mmu_context_fini(vdev, &file_priv->ctx);
file_priv->bound = false;
drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id));
}
@@ -94,9 +105,12 @@ static void file_priv_release(struct kref *ref)
pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&vdev->context_list_lock);
file_priv_unbind(vdev, file_priv);
+ drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa));
+ xa_destroy(&file_priv->cmdq_xa);
mutex_unlock(&vdev->context_list_lock);
pm_runtime_put_autosuspend(vdev->drm.dev);
+ mutex_destroy(&file_priv->ms_lock);
mutex_destroy(&file_priv->lock);
kfree(file_priv);
}
@@ -106,8 +120,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
struct ivpu_file_priv *file_priv = *link;
struct ivpu_device *vdev = file_priv->vdev;
- drm_WARN_ON(&vdev->drm, !file_priv);
-
ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n",
file_priv->ctx.id, kref_read(&file_priv->ref));
@@ -115,20 +127,20 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
kref_put(&file_priv->ref, file_priv_release);
}
-static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param *args)
+bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability)
{
- switch (args->index) {
+ switch (capability) {
case DRM_IVPU_CAP_METRIC_STREAMER:
- args->value = 0;
- break;
+ return true;
case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
- args->value = 1;
- break;
+ return true;
+ case DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR:
+ return true;
+ case DRM_IVPU_CAP_MANAGE_CMDQ:
+ return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW;
default:
- return -EINVAL;
+ return false;
}
-
- return 0;
}
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -154,7 +166,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+ args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -188,7 +200,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->hw->sku;
break;
case DRM_IVPU_PARAM_CAPABILITIES:
- ret = ivpu_get_capabilities(vdev, args);
+ args->value = ivpu_is_capable(vdev, args->index);
+ break;
+ case DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE:
+ args->value = ivpu_fw_preempt_buf_size(vdev);
break;
default:
ret = -EINVAL;
@@ -228,10 +243,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
goto err_dev_exit;
}
+ INIT_LIST_HEAD(&file_priv->ms_instance_list);
+
file_priv->vdev = vdev;
file_priv->bound = true;
kref_init(&file_priv->ref);
mutex_init(&file_priv->lock);
+ mutex_init(&file_priv->ms_lock);
mutex_lock(&vdev->context_list_lock);
@@ -242,9 +260,14 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
goto err_unlock;
}
- ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
- if (ret)
- goto err_xa_erase;
+ ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id);
+
+ file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+ file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK;
+
+ xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1);
+ file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID;
+ file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID;
mutex_unlock(&vdev->context_list_lock);
drm_dev_exit(idx);
@@ -256,10 +279,9 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
return 0;
-err_xa_erase:
- xa_erase_irq(&vdev->context_xa, ctx_id);
err_unlock:
mutex_unlock(&vdev->context_list_lock);
+ mutex_destroy(&file_priv->ms_lock);
mutex_destroy(&file_priv->lock);
kfree(file_priv);
err_dev_exit:
@@ -275,6 +297,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file)
ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n",
file_priv->ctx.id, current->comm, task_pid_nr(current));
+ ivpu_ms_cleanup(file_priv);
ivpu_file_priv_put(&file_priv);
}
@@ -285,6 +308,14 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE_FROM_USERPTR, ivpu_bo_create_from_userptr_ioctl, 0),
};
static int ivpu_wait_for_ready(struct ivpu_device *vdev)
@@ -301,7 +332,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
while (1) {
- ivpu_ipc_irq_handler(vdev, NULL);
+ ivpu_ipc_irq_handler(vdev);
ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
break;
@@ -323,6 +354,21 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
return ret;
}
+static int ivpu_hw_sched_init(struct ivpu_device *vdev)
+{
+ int ret = 0;
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_jsm_hws_setup_priority_bands(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
/**
* ivpu_boot() - Start VPU firmware
* @vdev: VPU device
@@ -334,8 +380,10 @@ int ivpu_boot(struct ivpu_device *vdev)
{
int ret;
- /* Update boot params located at first 4KB of FW memory */
- ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
+ drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+
+ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem_bp));
ret = ivpu_hw_boot_fw(vdev);
if (ret) {
@@ -346,23 +394,44 @@ int ivpu_boot(struct ivpu_device *vdev)
ret = ivpu_wait_for_ready(vdev);
if (ret) {
ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_mmu_evtq_dump(vdev);
- ivpu_fw_log_dump(vdev);
- return ret;
+ goto err_diagnose_failure;
}
ivpu_hw_irq_clear(vdev);
enable_irq(vdev->irq);
ivpu_hw_irq_enable(vdev);
ivpu_ipc_enable(vdev);
+
+ if (ivpu_fw_is_cold_boot(vdev)) {
+ ret = ivpu_pm_dct_init(vdev);
+ if (ret)
+ goto err_disable_ipc;
+
+ ret = ivpu_hw_sched_init(vdev);
+ if (ret)
+ goto err_disable_ipc;
+ }
+
return 0;
+
+err_disable_ipc:
+ ivpu_ipc_disable(vdev);
+ ivpu_hw_irq_disable(vdev);
+ disable_irq(vdev->irq);
+err_diagnose_failure:
+ ivpu_hw_diagnose_failure(vdev);
+ ivpu_mmu_evtq_dump(vdev);
+ ivpu_dev_coredump(vdev);
+ return ret;
}
void ivpu_prepare_for_reset(struct ivpu_device *vdev)
{
ivpu_hw_irq_disable(vdev);
disable_irq(vdev->irq);
+ flush_work(&vdev->irq_ipc_work);
+ flush_work(&vdev->irq_dct_work);
+ flush_work(&vdev->context_abort_work);
ivpu_ipc_disable(vdev);
ivpu_mmu_disable(vdev);
}
@@ -386,6 +455,9 @@ int ivpu_shutdown(struct ivpu_device *vdev)
static const struct file_operations ivpu_fops = {
.owner = THIS_MODULE,
DRM_ACCEL_FOPS,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
};
static const struct drm_driver driver = {
@@ -395,25 +467,20 @@ static const struct drm_driver driver = {
.postclose = ivpu_postclose,
.gem_create_object = ivpu_gem_create_object,
- .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+ .gem_prime_import = ivpu_gem_prime_import,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
.fops = &ivpu_fops,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_memory_stats,
+#endif
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
- .major = DRM_IVPU_DRIVER_MAJOR,
- .minor = DRM_IVPU_DRIVER_MINOR,
-};
-
-static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
-{
- struct ivpu_device *vdev = arg;
- return ivpu_ipc_irq_thread_handler(vdev);
-}
+ .major = 1,
+};
static int ivpu_irq_init(struct ivpu_device *vdev)
{
@@ -426,10 +493,16 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
return ret;
}
+ INIT_WORK(&vdev->irq_ipc_work, ivpu_ipc_irq_work_fn);
+ INIT_WORK(&vdev->irq_dct_work, ivpu_pm_irq_dct_work_fn);
+ INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_work_fn);
+
+ ivpu_irq_handlers_init(vdev);
+
vdev->irq = pci_irq_vector(pdev, 0);
- ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
- ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+ ret = devm_request_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler,
+ IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
if (ret)
ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
@@ -505,28 +578,32 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
if (!vdev->pm)
return -ENOMEM;
- if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) {
- vdev->hw->ops = &ivpu_hw_40xx_ops;
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX)
vdev->hw->dma_bits = 48;
- } else {
- vdev->hw->ops = &ivpu_hw_37xx_ops;
+ else
vdev->hw->dma_bits = 38;
- }
vdev->platform = IVPU_PLATFORM_INVALID;
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
+ atomic_set(&vdev->job_timeout_counter, 0);
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
- lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
+ vdev->db_limit.min = IVPU_MIN_DB;
+ vdev->db_limit.max = IVPU_MAX_DB;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
if (ret)
goto err_xa_destroy;
+ ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
+ if (ret)
+ goto err_xa_destroy;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
if (ret)
goto err_xa_destroy;
@@ -540,7 +617,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
goto err_xa_destroy;
/* Init basic HW info based on buttress registers which are accessible before power up */
- ret = ivpu_hw_info_init(vdev);
+ ret = ivpu_hw_init(vdev);
if (ret)
goto err_xa_destroy;
@@ -549,9 +626,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
if (ret)
goto err_shutdown;
- ret = ivpu_mmu_global_context_init(vdev);
- if (ret)
- goto err_shutdown;
+ ivpu_mmu_global_context_init(vdev);
ret = ivpu_mmu_init(vdev);
if (ret)
@@ -612,13 +687,14 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
+ ivpu_jobs_abort_all(vdev);
+ ivpu_pm_disable_recovery(vdev);
ivpu_pm_disable(vdev);
ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);
- ivpu_jobs_abort_all(vdev);
+ ivpu_ms_cleanup_all(vdev);
ivpu_job_done_consumer_fini(vdev);
- ivpu_pm_cancel_recovery(vdev);
ivpu_bo_unbind_all_user_contexts(vdev);
ivpu_ipc_fini(vdev);
@@ -638,6 +714,9 @@ static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_NVL) },
{ }
};
MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
@@ -658,6 +737,7 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return ret;
ivpu_debugfs_init(vdev);
+ ivpu_sysfs_init(vdev);
ret = drm_dev_register(&vdev->drm, 0);
if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index bb4374d0eaec..5b34b6f50e69 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
@@ -21,14 +21,23 @@
#define DRIVER_NAME "intel_vpu"
#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
-#define DRIVER_DATE "20230117"
-#define PCI_DEVICE_ID_MTL 0x7d1d
-#define PCI_DEVICE_ID_ARL 0xad1d
-#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_MTL 0x7d1d
+#define PCI_DEVICE_ID_ARL 0xad1d
+#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_PTL_P 0xb03e
+#define PCI_DEVICE_ID_WCL 0xfd3e
+#define PCI_DEVICE_ID_NVL 0xd71d
-#define IVPU_HW_37XX 37
-#define IVPU_HW_40XX 40
+#define IVPU_HW_IP_37XX 37
+#define IVPU_HW_IP_40XX 40
+#define IVPU_HW_IP_50XX 50
+#define IVPU_HW_IP_60XX 60
+
+#define IVPU_HW_IP_REV_LNL_B0 4
+
+#define IVPU_HW_BTRS_MTL 1
+#define IVPU_HW_BTRS_LNL 2
#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
/* SSID 1 is used by the VPU to represent reserved context */
@@ -39,13 +48,23 @@
#define IVPU_MIN_DB 1
#define IVPU_MAX_DB 255
-#define IVPU_NUM_ENGINES 2
+#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0)
+#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8)
+
+#define IVPU_NUM_PRIORITIES 4
+#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES)
+
+#define IVPU_CMDQ_MIN_ID 1
+#define IVPU_CMDQ_MAX_ID 255
#define IVPU_PLATFORM_SILICON 0
#define IVPU_PLATFORM_SIMICS 2
#define IVPU_PLATFORM_FPGA 3
+#define IVPU_PLATFORM_HSLE 4
#define IVPU_PLATFORM_INVALID 8
+#define IVPU_SCHED_MODE_AUTO -1
+
#define IVPU_DBG_REG BIT(0)
#define IVPU_DBG_IRQ BIT(1)
#define IVPU_DBG_MMU BIT(2)
@@ -60,6 +79,7 @@
#define IVPU_DBG_KREF BIT(11)
#define IVPU_DBG_RPM BIT(12)
#define IVPU_DBG_MMU_MAP BIT(13)
+#define IVPU_DBG_IOCTL BIT(14)
#define ivpu_err(vdev, fmt, ...) \
drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
@@ -93,6 +113,8 @@ struct ivpu_wa_table {
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
+ bool wp0_during_power_up;
+ bool disable_d0i2;
};
struct ivpu_hw_info;
@@ -122,22 +144,34 @@ struct ivpu_device {
struct xa_limit context_xa_limit;
struct xarray db_xa;
+ struct xa_limit db_limit;
+ u32 db_next;
+
+ struct work_struct irq_ipc_work;
+ struct work_struct irq_dct_work;
+ struct work_struct context_abort_work;
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
+ struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
struct xarray submitted_jobs_xa;
struct ivpu_ipc_consumer job_done_consumer;
+ atomic_t job_timeout_counter;
atomic64_t unique_id_counter;
+ ktime_t busy_start_ts;
+ ktime_t busy_time;
+
struct {
int boot;
int jsm;
int tdr;
- int reschedule_suspend;
+ int inference;
int autosuspend;
int d0i3_entry_msg;
+ int state_dump_msg;
} timeout;
};
@@ -149,22 +183,39 @@ struct ivpu_file_priv {
struct kref ref;
struct ivpu_device *vdev;
struct mutex lock; /* Protects cmdq */
- struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
+ struct xarray cmdq_xa;
struct ivpu_mmu_context ctx;
+ struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */
+ struct list_head ms_instance_list;
+ struct ivpu_bo *ms_info_bo;
+ struct xa_limit job_limit;
+ u32 job_id_next;
+ struct xa_limit cmdq_limit;
+ u32 cmdq_id_next;
bool has_mmu_faults;
bool bound;
+ bool aborted;
};
extern int ivpu_dbg_mask;
extern u8 ivpu_pll_min_ratio;
extern u8 ivpu_pll_max_ratio;
+extern int ivpu_sched_mode;
extern bool ivpu_disable_mmu_cont_pages;
+extern bool ivpu_force_snoop;
#define IVPU_TEST_MODE_FW_TEST BIT(0)
#define IVPU_TEST_MODE_NULL_HW BIT(1)
#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4)
#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
+#define IVPU_TEST_MODE_MIP_DISABLE BIT(6)
+#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8)
+#define IVPU_TEST_MODE_TURBO_ENABLE BIT(9)
+#define IVPU_TEST_MODE_TURBO_DISABLE BIT(10)
+#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11)
+#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(12)
+#define IVPU_TEST_MODE_D0I2_DISABLE BIT(13)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
@@ -173,6 +224,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link);
int ivpu_boot(struct ivpu_device *vdev);
int ivpu_shutdown(struct ivpu_device *vdev);
void ivpu_prepare_for_reset(struct ivpu_device *vdev);
+bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability);
static inline u8 ivpu_revision(struct ivpu_device *vdev)
{
@@ -184,16 +236,40 @@ static inline u16 ivpu_device_id(struct ivpu_device *vdev)
return to_pci_dev(vdev->drm.dev)->device;
}
-static inline int ivpu_hw_gen(struct ivpu_device *vdev)
+static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
+{
+ switch (ivpu_device_id(vdev)) {
+ case PCI_DEVICE_ID_MTL:
+ case PCI_DEVICE_ID_ARL:
+ return IVPU_HW_IP_37XX;
+ case PCI_DEVICE_ID_LNL:
+ return IVPU_HW_IP_40XX;
+ case PCI_DEVICE_ID_PTL_P:
+ case PCI_DEVICE_ID_WCL:
+ return IVPU_HW_IP_50XX;
+ case PCI_DEVICE_ID_NVL:
+ return IVPU_HW_IP_60XX;
+ default:
+ dump_stack();
+ ivpu_err(vdev, "Unknown NPU IP generation\n");
+ return 0;
+ }
+}
+
+static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
{
switch (ivpu_device_id(vdev)) {
case PCI_DEVICE_ID_MTL:
case PCI_DEVICE_ID_ARL:
- return IVPU_HW_37XX;
+ return IVPU_HW_BTRS_MTL;
case PCI_DEVICE_ID_LNL:
- return IVPU_HW_40XX;
+ case PCI_DEVICE_ID_PTL_P:
+ case PCI_DEVICE_ID_WCL:
+ case PCI_DEVICE_ID_NVL:
+ return IVPU_HW_BTRS_LNL;
default:
- ivpu_err(vdev, "Unknown NPU device\n");
+ dump_stack();
+ ivpu_err(vdev, "Unknown buttress generation\n");
return 0;
}
}
@@ -228,7 +304,13 @@ static inline bool ivpu_is_simics(struct ivpu_device *vdev)
static inline bool ivpu_is_fpga(struct ivpu_device *vdev)
{
- return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA;
+ return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA ||
+ ivpu_get_platform(vdev) == IVPU_PLATFORM_HSLE;
+}
+
+static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev)
+{
+ return ivpu_force_snoop;
}
#endif /* __IVPU_DRV_H__ */
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 1457300828bf..48386d2cddbb 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@@ -17,16 +17,10 @@
#include "ivpu_ipc.h"
#include "ivpu_pm.h"
-#define FW_GLOBAL_MEM_START (2ull * SZ_1G)
-#define FW_GLOBAL_MEM_END (3ull * SZ_1G)
-#define FW_SHARED_MEM_SIZE SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */
-#define FW_SHARED_MEM_ALIGNMENT SZ_128K /* VPU MTRR limitation */
-#define FW_RUNTIME_MAX_SIZE SZ_512M
#define FW_SHAVE_NN_MAX_SIZE SZ_2M
-#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START)
-#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
-#define FW_VERSION_HEADER_SIZE SZ_4K
#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
+#define FW_PREEMPT_BUF_MIN_SIZE SZ_4K
+#define FW_PREEMPT_BUF_MAX_SIZE SZ_32M
#define WATCHDOG_MSS_REDIRECT 32
#define WATCHDOG_NCE_REDIRECT 33
@@ -44,20 +38,33 @@
#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
+#define IVPU_FOCUS_PRESENT_TIMER_MS 1000
+
static char *ivpu_firmware;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/..");
+#endif
static struct {
int gen;
const char *name;
} fw_names[] = {
- { IVPU_HW_37XX, "vpu_37xx.bin" },
- { IVPU_HW_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
- { IVPU_HW_40XX, "vpu_40xx.bin" },
- { IVPU_HW_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
+ { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" },
+ { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
+ { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" },
+ { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
+ { IVPU_HW_IP_60XX, "intel/vpu/vpu_60xx_v1.bin" },
};
+/* Production fw_names from the table above */
+MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_60xx_v1.bin");
+
static int ivpu_fw_request(struct ivpu_device *vdev)
{
int ret = -ENOENT;
@@ -71,7 +78,7 @@ static int ivpu_fw_request(struct ivpu_device *vdev)
}
for (i = 0; i < ARRAY_SIZE(fw_names); i++) {
- if (fw_names[i].gen != ivpu_hw_gen(vdev))
+ if (fw_names[i].gen != ivpu_hw_ip_gen(vdev))
continue;
ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i].name, vdev->drm.dev);
@@ -121,11 +128,87 @@ ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_hea
return false;
}
+bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range)
+{
+ u64 addr_end;
+
+ if (!range || check_add_overflow(addr, size, &addr_end))
+ return false;
+
+ if (addr < range->start || addr_end > range->end)
+ return false;
+
+ return true;
+}
+
+static u32
+ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
+{
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX &&
+ ivpu_sched_mode == VPU_SCHEDULING_MODE_OS) {
+ ivpu_warn(vdev, "OS sched mode is not supported, using HW mode\n");
+ return VPU_SCHEDULING_MODE_HW;
+ }
+
+ if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO)
+ return ivpu_sched_mode;
+
+ if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, JSM, 3, 24))
+ return VPU_SCHEDULING_MODE_OS;
+
+ return VPU_SCHEDULING_MODE_HW;
+}
+
+static void
+ivpu_preemption_config_parse(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
+{
+ struct ivpu_fw_info *fw = vdev->fw;
+ u32 primary_preempt_buf_size, secondary_preempt_buf_size;
+
+ if (fw_hdr->preemption_buffer_1_max_size)
+ primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
+ else
+ primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+
+ if (fw_hdr->preemption_buffer_2_max_size)
+ secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
+ else
+ secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+
+ ivpu_dbg(vdev, FW_BOOT, "Preemption buffer size, primary: %u, secondary: %u\n",
+ primary_preempt_buf_size, secondary_preempt_buf_size);
+
+ if (primary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE ||
+ secondary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE) {
+ ivpu_warn(vdev, "Preemption buffers size too small\n");
+ return;
+ }
+
+ if (primary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE ||
+ secondary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE) {
+ ivpu_warn(vdev, "Preemption buffers size too big\n");
+ return;
+ }
+
+ if (fw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ return;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
+ return;
+
+ vdev->fw->primary_preempt_buf_size = ALIGN(primary_preempt_buf_size, PAGE_SIZE);
+ vdev->fw->secondary_preempt_buf_size = ALIGN(secondary_preempt_buf_size, PAGE_SIZE);
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data;
- u64 runtime_addr, image_load_addr, runtime_size, image_size;
+ struct ivpu_addr_range fw_image_range;
+ u64 boot_params_addr, boot_params_size;
+ u64 fw_version_addr, fw_version_size;
+ u64 runtime_addr, runtime_size;
+ u64 image_load_addr, image_size;
if (fw->file->size <= FW_FILE_IMAGE_OFFSET) {
ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size);
@@ -137,18 +220,37 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
return -EINVAL;
}
- runtime_addr = fw_hdr->boot_params_load_address;
- runtime_size = fw_hdr->runtime_size;
- image_load_addr = fw_hdr->image_load_address;
- image_size = fw_hdr->image_size;
+ boot_params_addr = fw_hdr->boot_params_load_address;
+ boot_params_size = SZ_4K;
- if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) {
- ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr);
+ if (!ivpu_is_within_range(boot_params_addr, boot_params_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid boot params address: 0x%llx\n", boot_params_addr);
return -EINVAL;
}
- if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) {
- ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size);
+ fw_version_addr = fw_hdr->firmware_version_load_address;
+ fw_version_size = ALIGN(fw_hdr->firmware_version_size, SZ_4K);
+
+ if (fw_version_size != SZ_4K) {
+ ivpu_err(vdev, "Invalid firmware version size: %u\n",
+ fw_hdr->firmware_version_size);
+ return -EINVAL;
+ }
+
+ if (!ivpu_is_within_range(fw_version_addr, fw_version_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware version address: 0x%llx\n", fw_version_addr);
+ return -EINVAL;
+ }
+
+ runtime_addr = fw_hdr->image_load_address;
+ runtime_size = fw_hdr->runtime_size - boot_params_size - fw_version_size;
+
+ image_load_addr = fw_hdr->image_load_address;
+ image_size = fw_hdr->image_size;
+
+ if (!ivpu_is_within_range(runtime_addr, runtime_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx and size %llu\n",
+ runtime_addr, runtime_size);
return -EINVAL;
}
@@ -157,34 +259,42 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
return -EINVAL;
}
- if (image_load_addr < runtime_addr ||
- image_load_addr + image_size > runtime_addr + runtime_size) {
- ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n",
+ if (!ivpu_is_within_range(image_load_addr, image_size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid firmware load address: 0x%llx and size %llu\n",
image_load_addr, image_size);
return -EINVAL;
}
- if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
- ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+ if (ivpu_hw_range_init(vdev, &fw_image_range, image_load_addr, image_size))
return -EINVAL;
- }
- if (fw_hdr->entry_point < image_load_addr ||
- fw_hdr->entry_point >= image_load_addr + image_size) {
+ if (!ivpu_is_within_range(fw_hdr->entry_point, SZ_4K, &fw_image_range)) {
ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point);
return -EINVAL;
}
+
+ if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
+ ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+ return -EINVAL;
+ }
+
ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n",
fw_hdr->header_version, fw_hdr->image_format);
- ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
- (const char *)fw_hdr + VPU_FW_HEADER_SIZE);
+ if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE))
+ ivpu_warn(vdev, "Missing firmware version\n");
+
+ ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version);
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
return -EINVAL;
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
return -EINVAL;
+ fw->boot_params_addr = boot_params_addr;
+ fw->boot_params_size = boot_params_size;
+ fw->fw_version_addr = fw_version_addr;
+ fw->fw_version_size = fw_version_size;
fw->runtime_addr = runtime_addr;
fw->runtime_size = runtime_size;
fw->image_load_offset = image_load_addr - runtime_addr;
@@ -194,16 +304,42 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->cold_boot_entry_point = fw_hdr->entry_point;
fw->entry_point = fw->cold_boot_entry_point;
- fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL);
+ fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL);
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
fw->dvfs_mode = 0;
- ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
- fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
- ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
- fw->runtime_addr, image_load_addr, fw->entry_point);
+ fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
+ ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
+
+ ivpu_preemption_config_parse(vdev, fw_hdr);
+ ivpu_dbg(vdev, FW_BOOT, "Mid-inference preemption %s supported\n",
+ ivpu_fw_preempt_buf_size(vdev) ? "is" : "is not");
+
+ if (fw_hdr->ro_section_start_address &&
+ !ivpu_is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size,
+ &fw_image_range)) {
+ ivpu_err(vdev, "Invalid read-only section: start address 0x%llx, size %u\n",
+ fw_hdr->ro_section_start_address, fw_hdr->ro_section_size);
+ return -EINVAL;
+ }
+
+ fw->read_only_addr = fw_hdr->ro_section_start_address;
+ fw->read_only_size = fw_hdr->ro_section_size;
+
+ ivpu_dbg(vdev, FW_BOOT, "Boot params: address 0x%llx, size %llu\n",
+ fw->boot_params_addr, fw->boot_params_size);
+ ivpu_dbg(vdev, FW_BOOT, "FW version: address 0x%llx, size %llu\n",
+ fw->fw_version_addr, fw->fw_version_size);
+ ivpu_dbg(vdev, FW_BOOT, "Runtime: address 0x%llx, size %u\n",
+ fw->runtime_addr, fw->runtime_size);
+ ivpu_dbg(vdev, FW_BOOT, "Image load offset: 0x%llx, size %u\n",
+ fw->image_load_offset, fw->image_size);
+ ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n",
+ fw->read_only_addr, fw->read_only_size);
+ ivpu_dbg(vdev, FW_BOOT, "FW entry point: 0x%llx\n", fw->entry_point);
+ ivpu_dbg(vdev, FW_BOOT, "SHAVE NN size: %u\n", fw->shave_nn_size);
return 0;
}
@@ -230,39 +366,40 @@ ivpu_fw_init_wa(struct ivpu_device *vdev)
IVPU_PRINT_WA(disable_d0i3_msg);
}
-static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
-{
- struct ivpu_fw_info *fw = vdev->fw;
- u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT);
- u64 size = FW_SHARED_MEM_SIZE;
-
- if (start + size > FW_GLOBAL_MEM_END) {
- ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size);
- return -EINVAL;
- }
-
- ivpu_hw_init_range(&vdev->hw->ranges.global, start, size);
- return 0;
-}
-
static int ivpu_fw_mem_init(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
- struct ivpu_addr_range fw_range;
int log_verb_size;
int ret;
- ret = ivpu_fw_update_global_range(vdev);
- if (ret)
- return ret;
+ fw->mem_bp = ivpu_bo_create_runtime(vdev, fw->boot_params_addr, fw->boot_params_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ if (!fw->mem_bp) {
+ ivpu_err(vdev, "Failed to create firmware boot params memory buffer\n");
+ return -ENOMEM;
+ }
- fw_range.start = fw->runtime_addr;
- fw_range.end = fw->runtime_addr + fw->runtime_size;
- fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size,
- DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ fw->mem_fw_ver = ivpu_bo_create_runtime(vdev, fw->fw_version_addr, fw->fw_version_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
+ if (!fw->mem_fw_ver) {
+ ivpu_err(vdev, "Failed to create firmware version memory buffer\n");
+ ret = -ENOMEM;
+ goto err_free_bp;
+ }
+
+ fw->mem = ivpu_bo_create_runtime(vdev, fw->runtime_addr, fw->runtime_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem) {
ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_free_fw_ver;
+ }
+
+ ret = ivpu_mmu_context_set_pages_ro(vdev, &vdev->gctx, fw->read_only_addr,
+ fw->read_only_size);
+ if (ret) {
+ ivpu_err(vdev, "Failed to set firmware image read-only\n");
+ goto err_free_fw_mem;
}
fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE,
@@ -273,7 +410,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
goto err_free_fw_mem;
}
- if (ivpu_log_level <= IVPU_FW_LOG_INFO)
+ if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO)
log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE;
else
log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE;
@@ -304,6 +441,10 @@ err_free_log_crit:
ivpu_bo_free(fw->mem_log_crit);
err_free_fw_mem:
ivpu_bo_free(fw->mem);
+err_free_fw_ver:
+ ivpu_bo_free(fw->mem_fw_ver);
+err_free_bp:
+ ivpu_bo_free(fw->mem_bp);
return ret;
}
@@ -319,10 +460,14 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
ivpu_bo_free(fw->mem_log_verb);
ivpu_bo_free(fw->mem_log_crit);
ivpu_bo_free(fw->mem);
+ ivpu_bo_free(fw->mem_fw_ver);
+ ivpu_bo_free(fw->mem_bp);
fw->mem_log_verb = NULL;
fw->mem_log_crit = NULL;
fw->mem = NULL;
+ fw->mem_fw_ver = NULL;
+ fw->mem_bp = NULL;
}
int ivpu_fw_init(struct ivpu_device *vdev)
@@ -415,11 +560,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n",
boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n",
- boot_params->global_memory_allocator_base);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n",
- boot_params->global_memory_allocator_size);
-
ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n",
boot_params->shave_nn_fw_base);
@@ -427,10 +567,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->watchdog_irq_mss);
ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n",
boot_params->watchdog_irq_nce);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n",
- boot_params->host_to_vpu_irq);
- ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n",
- boot_params->job_done_irq);
ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n",
boot_params->host_version_id);
@@ -464,6 +600,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->punit_telemetry_sram_size);
ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
boot_params->vpu_telemetry_enable);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_scheduling_mode = 0x%x\n",
+ boot_params->vpu_scheduling_mode);
ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
boot_params->dvfs_mode);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
@@ -474,6 +612,10 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->d0i3_entry_vpu_ts);
ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
boot_params->system_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n",
+ boot_params->power_profile);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_uses_ecc_mca_signal = 0x%x\n",
+ boot_params->vpu_uses_ecc_mca_signal);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -500,11 +642,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
return;
}
+ memset(boot_params, 0, sizeof(*boot_params));
vdev->pm->is_warmboot = false;
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
- boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock
@@ -527,8 +669,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
- boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
- boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
+ boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
+ }
/* Allow configuration for L2C_PAGE_TABLE with boot param value */
boot_params->autoconfig = 1;
@@ -561,14 +705,21 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr;
boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb);
- boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
- boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
- boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+ boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev);
+ boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev);
+ boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev);
+ boot_params->vpu_scheduling_mode = vdev->fw->sched_mode;
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS;
boot_params->dvfs_mode = vdev->fw->dvfs_mode;
if (!IVPU_WA(disable_d0i3_msg))
boot_params->d0i3_delayed_entry = 1;
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
+ if (IVPU_WA(disable_d0i2))
+ boot_params->power_profile |= BIT(1);
+ boot_params->vpu_uses_ecc_mca_signal =
+ ivpu_hw_uses_ecc_mca_signal(vdev) ? VPU_BOOT_MCA_ECC_BOTH : 0;
boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 66b60fa161b5..00945892b55e 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -1,11 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_FW_H__
#define __IVPU_FW_H__
+#include "vpu_jsm_api.h"
+
+#define FW_VERSION_HEADER_SIZE SZ_4K
+#define FW_VERSION_STR_SIZE SZ_256
+
struct ivpu_device;
struct ivpu_bo;
struct vpu_boot_params;
@@ -13,10 +18,17 @@ struct vpu_boot_params;
struct ivpu_fw_info {
const struct firmware *file;
const char *name;
+ char version[FW_VERSION_STR_SIZE];
+ struct ivpu_bo *mem_bp;
+ struct ivpu_bo *mem_fw_ver;
struct ivpu_bo *mem;
struct ivpu_bo *mem_shave_nn;
struct ivpu_bo *mem_log_crit;
struct ivpu_bo *mem_log_verb;
+ u64 boot_params_addr;
+ u64 boot_params_size;
+ u64 fw_version_addr;
+ u64 fw_version_size;
u64 runtime_addr;
u32 runtime_size;
u64 image_load_offset;
@@ -28,16 +40,28 @@ struct ivpu_fw_info {
u32 trace_destination_mask;
u64 trace_hw_component_mask;
u32 dvfs_mode;
+ u32 primary_preempt_buf_size;
+ u32 secondary_preempt_buf_size;
+ u64 read_only_addr;
+ u32 read_only_size;
+ u32 sched_mode;
+ u64 last_heartbeat;
};
+bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range);
int ivpu_fw_init(struct ivpu_device *vdev);
void ivpu_fw_fini(struct ivpu_device *vdev);
void ivpu_fw_load(struct ivpu_device *vdev);
-void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
+void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params);
static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
{
return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point;
}
+static inline u32 ivpu_fw_preempt_buf_size(struct ivpu_device *vdev)
+{
+ return vdev->fw->primary_preempt_buf_size + vdev->fw->secondary_preempt_buf_size;
+}
+
#endif /* __IVPU_FW_H__ */
diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c
index ef0adb5e0fbe..337c906b0210 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/ctype.h>
@@ -15,19 +15,19 @@
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
-#define IVPU_FW_LOG_LINE_LENGTH 256
+#define IVPU_FW_LOG_LINE_LENGTH 256
-unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR;
-module_param(ivpu_log_level, uint, 0444);
-MODULE_PARM_DESC(ivpu_log_level,
- "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
+unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR;
+module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444);
+MODULE_PARM_DESC(fw_log_level,
+ "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
" info=" __stringify(IVPU_FW_LOG_INFO)
" warn=" __stringify(IVPU_FW_LOG_WARN)
" error=" __stringify(IVPU_FW_LOG_ERROR)
" fatal=" __stringify(IVPU_FW_LOG_FATAL));
-static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
- struct vpu_tracing_buffer_header **log_header)
+static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
+ struct vpu_tracing_buffer_header **out_log)
{
struct vpu_tracing_buffer_header *log;
@@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return -EINVAL;
}
- *log_header = log;
+ *out_log = log;
*offset += log->size;
ivpu_dbg(vdev, FW_BOOT,
@@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return 0;
}
-static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
+static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p)
{
char line[IVPU_FW_LOG_LINE_LENGTH];
u32 index = 0;
@@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
}
line[index] = 0;
if (index != 0)
- drm_printf(p, "%s\n", line);
+ drm_printf(p, "%s", line);
}
-static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log,
- const char *prefix, bool only_new_msgs, struct drm_printer *p)
+static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix,
+ bool only_new_msgs, struct drm_printer *p)
{
- char *log_buffer = (void *)log + log->header_size;
- u32 log_size = log->size - log->header_size;
- u32 log_start = log->read_index;
- u32 log_end = log->write_index;
-
- if (!(log->write_index || log->wrap_count) ||
- (log->write_index == log->read_index && only_new_msgs)) {
- drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
- return;
+ char *log_data = (void *)log + log->header_size;
+ u32 data_size = log->size - log->header_size;
+ u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0;
+ u32 log_end = READ_ONCE(log->write_index);
+
+ if (log->wrap_count == log->read_wrap_count) {
+ if (log_end <= log_start) {
+ drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
+ return;
+ }
+ } else if (log->wrap_count == log->read_wrap_count + 1) {
+ if (log_end > log_start)
+ log_start = log_end;
+ } else {
+ log_start = log_end;
}
drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name);
- if (log->write_index > log->read_index) {
- buffer_print(log_buffer + log_start, log_end - log_start, p);
+ if (log_end > log_start) {
+ fw_log_print_lines(log_data + log_start, log_end - log_start, p);
} else {
- buffer_print(log_buffer + log_end, log_size - log_end, p);
- buffer_print(log_buffer, log_end, p);
+ fw_log_print_lines(log_data + log_start, data_size - log_start, p);
+ fw_log_print_lines(log_data, log_end, p);
}
- drm_printf(p, "\x1b[0m");
+ drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */
drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name);
}
-void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+static void
+fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name,
+ struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p)
{
- struct vpu_tracing_buffer_header *log_header;
+ struct vpu_tracing_buffer_header *log;
u32 next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, bo, &next, &log) == 0)
+ fw_log_print_buffer(log, name, only_new_msgs, p);
+}
+
+void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+{
+ fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p);
+ fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p);
+}
+
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev)
+{
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
+
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
}
-void ivpu_fw_log_clear(struct ivpu_device *vdev)
+void ivpu_fw_log_reset(struct ivpu_device *vdev)
{
- struct vpu_tracing_buffer_header *log_header;
- u32 next = 0;
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
}
diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h
index 0b2573f6f315..8bb528a73cb7 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.h
+++ b/drivers/accel/ivpu/ivpu_fw_log.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_FW_LOG_H__
@@ -8,8 +8,6 @@
#include <linux/types.h>
-#include <drm/drm_print.h>
-
#include "ivpu_drv.h"
#define IVPU_FW_LOG_DEFAULT 0
@@ -19,20 +17,15 @@
#define IVPU_FW_LOG_ERROR 4
#define IVPU_FW_LOG_FATAL 5
-extern unsigned int ivpu_log_level;
-
#define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M
#define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M
#define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K
-void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p);
-void ivpu_fw_log_clear(struct ivpu_device *vdev);
+extern unsigned int ivpu_fw_log_level;
-static inline void ivpu_fw_log_dump(struct ivpu_device *vdev)
-{
- struct drm_printer p = drm_info_printer(vdev->drm.dev);
+void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p);
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev);
+void ivpu_fw_log_reset(struct ivpu_device *vdev);
- ivpu_fw_log_print(vdev, false, &p);
-}
#endif /* __IVPU_FW_LOG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 1b409dbd332d..ece68f570b7e 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -15,50 +15,88 @@
#include <drm/drm_utils.h>
#include "ivpu_drv.h"
+#include "ivpu_fw.h"
#include "ivpu_gem.h"
#include "ivpu_hw.h"
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
+MODULE_IMPORT_NS("DMA_BUF");
+
static const struct drm_gem_object_funcs ivpu_gem_funcs;
static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
ivpu_dbg(vdev, BO,
- "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
- action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+ "%6s: bo %8p size %9zu ctx %d vpu_addr %9llx pages %d sgt %d mmu_mapped %d wc %d imported %d\n",
+ action, bo, ivpu_bo_size(bo), bo->ctx_id, bo->vpu_addr,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
- (bool)bo->base.base.import_attach);
+ (bool)drm_gem_is_imported(&bo->base.base));
+}
+
+static inline int ivpu_bo_lock(struct ivpu_bo *bo)
+{
+ return dma_resv_lock(bo->base.base.resv, NULL);
+}
+
+static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
+{
+ dma_resv_unlock(bo->base.base.resv);
+}
+
+static struct sg_table *ivpu_bo_map_attachment(struct ivpu_device *vdev, struct ivpu_bo *bo)
+{
+ struct sg_table *sgt;
+
+ drm_WARN_ON(&vdev->drm, !bo->base.base.import_attach);
+
+ ivpu_bo_lock(bo);
+
+ sgt = bo->base.sgt;
+ if (!sgt) {
+ sgt = dma_buf_map_attachment(bo->base.base.import_attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %ld\n", PTR_ERR(sgt));
+ else
+ bo->base.sgt = sgt;
+ }
+
+ ivpu_bo_unlock(bo);
+
+ return sgt;
}
/*
- * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ * ivpu_bo_bind() - pin the backing physical pages and map them to VPU.
*
* This function pins physical memory pages, then maps the physical pages
* to IOMMU address space and finally updates the VPU MMU page tables
* to allow the VPU to translate VPU address to IOMMU address.
*/
-int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+int __must_check ivpu_bo_bind(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct sg_table *sgt;
int ret = 0;
- mutex_lock(&bo->lock);
+ ivpu_dbg_bo(vdev, bo, "bind");
- ivpu_dbg_bo(vdev, bo, "pin");
- drm_WARN_ON(&vdev->drm, !bo->ctx);
+ if (bo->base.base.import_attach)
+ sgt = ivpu_bo_map_attachment(vdev, bo);
+ else
+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+ return ret;
+ }
- if (!bo->mmu_mapped) {
- struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
-
- if (IS_ERR(sgt)) {
- ret = PTR_ERR(sgt);
- ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
- goto unlock;
- }
+ ivpu_bo_lock(bo);
+ if (!bo->mmu_mapped) {
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
- ivpu_bo_is_snooped(bo));
+ ivpu_bo_is_snooped(bo), ivpu_bo_is_read_only(bo));
if (ret) {
ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
goto unlock;
@@ -67,7 +105,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
}
unlock:
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
return ret;
}
@@ -82,19 +120,17 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
if (!drm_dev_enter(&vdev->drm, &idx))
return -ENODEV;
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
+ bo->ctx_id = ctx->id;
bo->vpu_addr = bo->mm_node.start;
- } else {
- ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
+ ivpu_dbg_bo(vdev, bo, "vaddr");
}
- ivpu_dbg_bo(vdev, bo, "alloc");
-
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_dev_exit(idx);
@@ -105,7 +141,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
+ dma_resv_assert_held(bo->base.base.resv);
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -120,17 +156,17 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
bo->ctx = NULL;
}
- if (bo->base.base.import_attach)
- return;
-
- dma_resv_lock(bo->base.base.resv, NULL);
if (bo->base.sgt) {
- dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
- sg_free_table(bo->base.sgt);
- kfree(bo->base.sgt);
+ if (bo->base.base.import_attach) {
+ dma_buf_unmap_attachment(bo->base.base.import_attach,
+ bo->base.sgt, DMA_BIDIRECTIONAL);
+ } else {
+ dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(bo->base.sgt);
+ kfree(bo->base.sgt);
+ }
bo->base.sgt = NULL;
}
- dma_resv_unlock(bo->base.base.resv);
}
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
@@ -142,12 +178,12 @@ void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
if (bo->ctx == ctx) {
ivpu_dbg_bo(vdev, bo, "unbind");
ivpu_bo_unbind_locked(bo);
}
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
mutex_unlock(&vdev->bo_list_lock);
}
@@ -167,11 +203,52 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
INIT_LIST_HEAD(&bo->bo_list_node);
- mutex_init(&bo->lock);
return &bo->base.base;
}
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ struct device *attach_dev = dev->dev;
+ struct dma_buf_attachment *attach;
+ struct drm_gem_object *obj;
+ struct ivpu_bo *bo;
+ int ret;
+
+ attach = dma_buf_attach(dma_buf, attach_dev);
+ if (IS_ERR(attach))
+ return ERR_CAST(attach);
+
+ get_dma_buf(dma_buf);
+
+ obj = drm_gem_shmem_prime_import_sg_table(dev, attach, NULL);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto fail_detach;
+ }
+
+ obj->import_attach = attach;
+ obj->resv = dma_buf->resv;
+
+ bo = to_ivpu_bo(obj);
+
+ mutex_lock(&vdev->bo_list_lock);
+ list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+ mutex_unlock(&vdev->bo_list_lock);
+
+ ivpu_dbg(vdev, BO, "import: bo %8p size %9zu\n", bo, ivpu_bo_size(bo));
+
+ return obj;
+
+fail_detach:
+ dma_buf_detach(dma_buf, attach);
+ dma_buf_put(dma_buf);
+
+ return ERR_PTR(ret);
+}
+
static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
{
struct drm_gem_shmem_object *shmem;
@@ -197,6 +274,8 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
+ ivpu_dbg(vdev, BO, " alloc: bo %8p size %9llu\n", bo, size);
+
return bo;
}
@@ -208,8 +287,8 @@ static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file)
struct ivpu_addr_range *range;
if (bo->ctx) {
- ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n",
- file_priv->ctx.id, bo->ctx->id);
+ ivpu_dbg(vdev, IOCTL, "Can't add BO %pe to ctx %u: already in ctx %u\n",
+ bo, file_priv->ctx.id, bo->ctx->id);
return -EALREADY;
}
@@ -230,19 +309,41 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
ivpu_dbg_bo(vdev, bo, "free");
+ drm_WARN_ON(&vdev->drm, list_empty(&bo->bo_list_node));
+
mutex_lock(&vdev->bo_list_lock);
list_del(&bo->bo_list_node);
- mutex_unlock(&vdev->bo_list_lock);
- drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) &&
+ !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0);
+ drm_WARN_ON(&vdev->drm, bo->base.vaddr);
+ ivpu_bo_lock(bo);
ivpu_bo_unbind_locked(bo);
- mutex_destroy(&bo->lock);
+ ivpu_bo_unlock(bo);
+
+ mutex_unlock(&vdev->bo_list_lock);
- drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+ drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
+ drm_WARN_ON(&vdev->drm, bo->ctx);
+
+ drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
+ drm_WARN_ON(obj->dev, bo->base.base.vma_node.vm_files.rb_node);
drm_gem_shmem_free(&bo->base);
}
+static enum drm_gem_object_status ivpu_gem_status(struct drm_gem_object *obj)
+{
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ enum drm_gem_object_status status = 0;
+
+ if (ivpu_bo_is_resident(bo))
+ status |= DRM_GEM_OBJECT_RESIDENT;
+
+ return status;
+}
+
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_gem_bo_free,
.open = ivpu_gem_bo_open,
@@ -253,6 +354,7 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = drm_gem_shmem_object_mmap,
+ .status = ivpu_gem_status,
.vm_ops = &drm_gem_shmem_vm_ops,
};
@@ -265,22 +367,33 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
struct ivpu_bo *bo;
int ret;
- if (args->flags & ~DRM_IVPU_BO_FLAGS)
+ if (args->flags & ~DRM_IVPU_BO_FLAGS) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO flags 0x%x\n", args->flags);
return -EINVAL;
+ }
- if (size == 0)
+ if (size == 0) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO size %llu\n", args->size);
return -EINVAL;
+ }
bo = ivpu_bo_alloc(vdev, size, args->flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
+ ivpu_dbg(vdev, IOCTL, "Failed to allocate BO: %pe ctx %u size %llu flags 0x%x\n",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
+ drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 0);
+
ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
- if (!ret)
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ } else {
args->vpu_addr = bo->vpu_addr;
+ drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 1);
+ }
drm_gem_object_put(&bo->base.base);
@@ -304,23 +417,26 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
bo = ivpu_bo_alloc(vdev, size, flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+ ivpu_err(vdev, "Failed to allocate BO: %pe vpu_addr 0x%llx size %llu flags 0x%x\n",
bo, range->start, size, flags);
return NULL;
}
ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range);
- if (ret)
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate NPU address for BO: %pe ctx %u size %llu: %d\n",
+ bo, ctx->id, size, ret);
goto err_put;
+ }
- ret = ivpu_bo_pin(bo);
+ ret = ivpu_bo_bind(bo);
if (ret)
goto err_put;
if (flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
- ret = drm_gem_shmem_vmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_lock(bo);
+ ret = drm_gem_shmem_vmap_locked(&bo->base, &map);
+ ivpu_bo_unlock(bo);
if (ret)
goto err_put;
@@ -333,6 +449,21 @@ err_put:
return NULL;
}
+struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags)
+{
+ struct ivpu_addr_range range;
+
+ if (!ivpu_is_within_range(addr, size, &vdev->hw->ranges.runtime)) {
+ ivpu_err(vdev, "Invalid runtime BO address 0x%llx size %llu\n", addr, size);
+ return NULL;
+ }
+
+ if (ivpu_hw_range_init(vdev, &range, addr, size))
+ return NULL;
+
+ return ivpu_bo_create(vdev, &vdev->gctx, &range, size, flags);
+}
+
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags)
{
return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags);
@@ -343,9 +474,9 @@ void ivpu_bo_free(struct ivpu_bo *bo)
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
- dma_resv_lock(bo->base.base.resv, NULL);
- drm_gem_shmem_vunmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ ivpu_bo_lock(bo);
+ drm_gem_shmem_vunmap_locked(&bo->base, &map);
+ ivpu_bo_unlock(bo);
}
drm_gem_object_put(&bo->base.base);
@@ -364,12 +495,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
drm_gem_object_put(obj);
return ret;
@@ -384,6 +515,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+ /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */
+ timeout += 1;
+
obj = drm_gem_object_lookup(file, args->handle);
if (!obj)
return -EINVAL;
@@ -403,10 +537,10 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
- mutex_lock(&bo->lock);
+ ivpu_bo_lock(bo);
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
- bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
+ bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
@@ -415,12 +549,12 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
if (bo->mmu_mapped)
drm_printf(p, " mmu_mapped");
- if (bo->base.base.import_attach)
+ if (drm_gem_is_imported(&bo->base.base))
drm_printf(p, " imported");
drm_printf(p, "\n");
- mutex_unlock(&bo->lock);
+ ivpu_bo_unlock(bo);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index fb7117c13eec..0c3350f22b55 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_GEM_H__
#define __IVPU_GEM_H__
@@ -17,25 +17,29 @@ struct ivpu_bo {
struct list_head bo_list_node;
struct drm_mm_node mm_node;
- struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 flags;
u32 job_status; /* Valid only for command buffer */
+ u32 ctx_id;
bool mmu_mapped;
};
-int ivpu_bo_pin(struct ivpu_bo *bo);
+int ivpu_bo_bind(struct ivpu_bo *bo);
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
struct ivpu_addr_range *range, u64 size, u32 flags);
+struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags);
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags);
void ivpu_bo_free(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
void ivpu_bo_list_print(struct drm_device *dev);
@@ -60,14 +64,27 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
return bo->flags & DRM_IVPU_BO_CACHE_MASK;
}
+static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
+{
+ return to_ivpu_device(bo->base.base.dev);
+}
+
static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
{
+ if (ivpu_is_force_snoop_enabled(ivpu_bo_to_vdev(bo)))
+ return true;
+
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
-static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
+static inline bool ivpu_bo_is_read_only(struct ivpu_bo *bo)
{
- return to_ivpu_device(bo->base.base.dev);
+ return bo->flags & DRM_IVPU_BO_READ_ONLY;
+}
+
+static inline bool ivpu_bo_is_resident(struct ivpu_bo *bo)
+{
+ return !!bo->base.pages;
}
static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
@@ -92,4 +109,9 @@ static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo));
}
+static inline bool ivpu_bo_is_mappable(struct ivpu_bo *bo)
+{
+ return bo->flags & DRM_IVPU_BO_MAPPABLE;
+}
+
#endif /* __IVPU_GEM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem_userptr.c b/drivers/accel/ivpu/ivpu_gem_userptr.c
new file mode 100644
index 000000000000..25ba606164c0
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem_userptr.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+
+static struct sg_table *
+ivpu_gem_userptr_dmabuf_map(struct dma_buf_attachment *attachment,
+ enum dma_data_direction direction)
+{
+ struct sg_table *sgt = attachment->dmabuf->priv;
+ int ret;
+
+ ret = dma_map_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return sgt;
+}
+
+static void ivpu_gem_userptr_dmabuf_unmap(struct dma_buf_attachment *attachment,
+ struct sg_table *sgt,
+ enum dma_data_direction direction)
+{
+ dma_unmap_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void ivpu_gem_userptr_dmabuf_release(struct dma_buf *dma_buf)
+{
+ struct sg_table *sgt = dma_buf->priv;
+ struct sg_page_iter page_iter;
+ struct page *page;
+
+ for_each_sgtable_page(sgt, &page_iter, 0) {
+ page = sg_page_iter_page(&page_iter);
+ unpin_user_page(page);
+ }
+
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+static const struct dma_buf_ops ivpu_gem_userptr_dmabuf_ops = {
+ .map_dma_buf = ivpu_gem_userptr_dmabuf_map,
+ .unmap_dma_buf = ivpu_gem_userptr_dmabuf_unmap,
+ .release = ivpu_gem_userptr_dmabuf_release,
+};
+
+static struct dma_buf *
+ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr,
+ size_t size, uint32_t flags)
+{
+ struct dma_buf_export_info exp_info = {};
+ struct dma_buf *dma_buf;
+ struct sg_table *sgt;
+ struct page **pages;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned int gup_flags = FOLL_LONGTERM;
+ int ret, i, pinned;
+
+ /* Add FOLL_WRITE only if the BO is not read-only */
+ if (!(flags & DRM_IVPU_BO_READ_ONLY))
+ gup_flags |= FOLL_WRITE;
+
+ pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+
+ pinned = pin_user_pages_fast((unsigned long)user_ptr, nr_pages, gup_flags, pages);
+ if (pinned < 0) {
+ ret = pinned;
+ ivpu_dbg(vdev, IOCTL, "Failed to pin user pages: %d\n", ret);
+ goto free_pages_array;
+ }
+
+ if (pinned != nr_pages) {
+ ivpu_dbg(vdev, IOCTL, "Pinned %d pages, expected %lu\n", pinned, nr_pages);
+ ret = -EFAULT;
+ goto unpin_pages;
+ }
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ ret = -ENOMEM;
+ goto unpin_pages;
+ }
+
+ ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, 0, size, GFP_KERNEL);
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create sg table: %d\n", ret);
+ goto free_sgt;
+ }
+
+ exp_info.exp_name = "ivpu_userptr_dmabuf";
+ exp_info.owner = THIS_MODULE;
+ exp_info.ops = &ivpu_gem_userptr_dmabuf_ops;
+ exp_info.size = size;
+ exp_info.flags = O_RDWR | O_CLOEXEC;
+ exp_info.priv = sgt;
+
+ dma_buf = dma_buf_export(&exp_info);
+ if (IS_ERR(dma_buf)) {
+ ret = PTR_ERR(dma_buf);
+ ivpu_dbg(vdev, IOCTL, "Failed to export userptr dma-buf: %d\n", ret);
+ goto free_sg_table;
+ }
+
+ kvfree(pages);
+ return dma_buf;
+
+free_sg_table:
+ sg_free_table(sgt);
+free_sgt:
+ kfree(sgt);
+unpin_pages:
+ for (i = 0; i < pinned; i++)
+ unpin_user_page(pages[i]);
+free_pages_array:
+ kvfree(pages);
+ return ERR_PTR(ret);
+}
+
+static struct ivpu_bo *
+ivpu_bo_create_from_userptr(struct ivpu_device *vdev, void __user *user_ptr,
+ size_t size, uint32_t flags)
+{
+ struct dma_buf *dma_buf;
+ struct drm_gem_object *obj;
+ struct ivpu_bo *bo;
+
+ dma_buf = ivpu_create_userptr_dmabuf(vdev, user_ptr, size, flags);
+ if (IS_ERR(dma_buf))
+ return ERR_CAST(dma_buf);
+
+ obj = ivpu_gem_prime_import(&vdev->drm, dma_buf);
+ if (IS_ERR(obj)) {
+ dma_buf_put(dma_buf);
+ return ERR_CAST(obj);
+ }
+
+ dma_buf_put(dma_buf);
+
+ bo = to_ivpu_bo(obj);
+ bo->flags = flags;
+
+ return bo;
+}
+
+int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ivpu_bo_create_from_userptr *args = data;
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ void __user *user_ptr = u64_to_user_ptr(args->user_ptr);
+ struct ivpu_bo *bo;
+ int ret;
+
+ if (args->flags & ~(DRM_IVPU_BO_HIGH_MEM | DRM_IVPU_BO_DMA_MEM | DRM_IVPU_BO_READ_ONLY)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid BO flags: 0x%x\n", args->flags);
+ return -EINVAL;
+ }
+
+ if (!args->user_ptr || !args->size) {
+ ivpu_dbg(vdev, IOCTL, "Userptr or size are zero: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EINVAL;
+ }
+
+ if (!PAGE_ALIGNED(args->user_ptr) || !PAGE_ALIGNED(args->size)) {
+ ivpu_dbg(vdev, IOCTL, "Userptr or size not page aligned: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EINVAL;
+ }
+
+ if (!access_ok(user_ptr, args->size)) {
+ ivpu_dbg(vdev, IOCTL, "Userptr is not accessible: ptr %llx size %llu\n",
+ args->user_ptr, args->size);
+ return -EFAULT;
+ }
+
+ bo = ivpu_bo_create_from_userptr(vdev, user_ptr, args->size, args->flags);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+ if (ret) {
+ ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ } else {
+ ivpu_dbg(vdev, BO, "Created userptr BO: handle=%u vpu_addr=0x%llx size=%llu flags=0x%x\n",
+ args->handle, bo->vpu_addr, args->size, bo->flags);
+ args->vpu_addr = bo->vpu_addr;
+ }
+
+ drm_gem_object_put(&bo->base.base);
+
+ return ret;
+}
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
new file mode 100644
index 000000000000..d69cd0d93569
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - 2024 Intel Corporation
+ */
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_hw_btrs.h"
+#include "ivpu_hw_ip.h"
+
+#include <asm/msr-index.h>
+#include <asm/msr.h>
+#include <linux/dmi.h>
+#include <linux/fault-inject.h>
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(ivpu_hw_failure);
+
+static char *ivpu_fail_hw;
+module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444);
+MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>");
+#endif
+
+#define FW_SHARED_MEM_ALIGNMENT SZ_512K /* VPU MTRR limitation */
+
+#define ECC_MCA_SIGNAL_ENABLE_MASK 0xff
+
+static char *platform_to_str(u32 platform)
+{
+ switch (platform) {
+ case IVPU_PLATFORM_SILICON:
+ return "SILICON";
+ case IVPU_PLATFORM_SIMICS:
+ return "SIMICS";
+ case IVPU_PLATFORM_FPGA:
+ return "FPGA";
+ case IVPU_PLATFORM_HSLE:
+ return "HSLE";
+ default:
+ return "Invalid platform";
+ }
+}
+
+static void platform_init(struct ivpu_device *vdev)
+{
+ int platform = ivpu_hw_btrs_platform_read(vdev);
+
+ ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", platform_to_str(platform), platform);
+
+ switch (platform) {
+ case IVPU_PLATFORM_SILICON:
+ case IVPU_PLATFORM_SIMICS:
+ case IVPU_PLATFORM_FPGA:
+ case IVPU_PLATFORM_HSLE:
+ vdev->platform = platform;
+ break;
+
+ default:
+ ivpu_err(vdev, "Invalid platform type: %d\n", platform);
+ break;
+ }
+}
+
+static void wa_init(struct ivpu_device *vdev)
+{
+ vdev->wa.punit_disabled = false;
+ vdev->wa.clear_runtime_mem = false;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev);
+
+ if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL &&
+ ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0)
+ vdev->wa.disable_clock_relinquish = true;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE)
+ vdev->wa.disable_clock_relinquish = false;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_DISABLE)
+ vdev->wa.disable_clock_relinquish = true;
+
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ vdev->wa.wp0_during_power_up = true;
+
+ if (ivpu_test_mode & IVPU_TEST_MODE_D0I2_DISABLE)
+ vdev->wa.disable_d0i2 = true;
+
+ IVPU_PRINT_WA(punit_disabled);
+ IVPU_PRINT_WA(clear_runtime_mem);
+ IVPU_PRINT_WA(interrupt_clear_with_0);
+ IVPU_PRINT_WA(disable_clock_relinquish);
+ IVPU_PRINT_WA(wp0_during_power_up);
+ IVPU_PRINT_WA(disable_d0i2);
+}
+
+static void timeouts_init(struct ivpu_device *vdev)
+{
+ if (ivpu_test_mode & IVPU_TEST_MODE_DISABLE_TIMEOUTS) {
+ vdev->timeout.boot = -1;
+ vdev->timeout.jsm = -1;
+ vdev->timeout.tdr = -1;
+ vdev->timeout.inference = -1;
+ vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = -1;
+ } else if (ivpu_is_fpga(vdev)) {
+ vdev->timeout.boot = 50;
+ vdev->timeout.jsm = 15000;
+ vdev->timeout.tdr = 30000;
+ vdev->timeout.inference = 900000;
+ vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = 500;
+ vdev->timeout.state_dump_msg = 10000;
+ } else if (ivpu_is_simics(vdev)) {
+ vdev->timeout.boot = 50;
+ vdev->timeout.jsm = 500;
+ vdev->timeout.tdr = 10000;
+ vdev->timeout.inference = 300000;
+ vdev->timeout.autosuspend = 100;
+ vdev->timeout.d0i3_entry_msg = 100;
+ vdev->timeout.state_dump_msg = 10;
+ } else {
+ vdev->timeout.boot = 1000;
+ vdev->timeout.jsm = 500;
+ vdev->timeout.tdr = 2000;
+ vdev->timeout.inference = 60000;
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ vdev->timeout.autosuspend = 10;
+ else
+ vdev->timeout.autosuspend = 100;
+ vdev->timeout.d0i3_entry_msg = 5;
+ vdev->timeout.state_dump_msg = 100;
+ }
+}
+
+static void priority_bands_init(struct ivpu_device *vdev)
+{
+ /* Idle */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 160000;
+ /* Normal */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 300000;
+ /* Focus */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 200000;
+ /* Realtime */
+ vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 0;
+ vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 50000;
+ vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000;
+}
+
+int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start, u64 size)
+{
+ u64 end;
+
+ if (!range || check_add_overflow(start, size, &end)) {
+ ivpu_err(vdev, "Invalid range: start 0x%llx size %llu\n", start, size);
+ return -EINVAL;
+ }
+
+ range->start = start;
+ range->end = end;
+
+ return 0;
+}
+
+static void memory_ranges_init(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x84800000, SZ_64M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0xa0000000, 511 * SZ_1M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x180000000, SZ_2G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.dma, 0x200000000, SZ_128G);
+ } else {
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x80000000, SZ_64M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x80000000, SZ_2G);
+ ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0x100000000, SZ_256G);
+ vdev->hw->ranges.dma = vdev->hw->ranges.user;
+ }
+
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vdev->hw->ranges.global.start,
+ FW_SHARED_MEM_ALIGNMENT));
+}
+
+static int wp_enable(struct ivpu_device *vdev)
+{
+ return ivpu_hw_btrs_wp_drive(vdev, true);
+}
+
+static int wp_disable(struct ivpu_device *vdev)
+{
+ return ivpu_hw_btrs_wp_drive(vdev, false);
+}
+
+int ivpu_hw_power_up(struct ivpu_device *vdev)
+{
+ int ret;
+
+ if (IVPU_WA(wp0_during_power_up)) {
+ /* WP requests may fail when powering down, so issue WP 0 here */
+ ret = wp_disable(vdev);
+ if (ret)
+ ivpu_warn(vdev, "Failed to disable workpoint: %d\n", ret);
+ }
+
+ ret = ivpu_hw_btrs_d0i3_disable(vdev);
+ if (ret)
+ ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
+
+ ret = wp_enable(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to enable workpoint: %d\n", ret);
+ return ret;
+ }
+
+ if (ivpu_hw_btrs_gen(vdev) >= IVPU_HW_BTRS_LNL) {
+ if (IVPU_WA(disable_clock_relinquish))
+ ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev);
+ ivpu_hw_btrs_profiling_freq_reg_set_lnl(vdev);
+ ivpu_hw_btrs_ats_print_lnl(vdev);
+ }
+
+ ret = ivpu_hw_ip_host_ss_configure(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to configure host SS: %d\n", ret);
+ return ret;
+ }
+
+ ivpu_hw_ip_idle_gen_disable(vdev);
+
+ ret = ivpu_hw_btrs_wait_for_clock_res_own_ack(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for clock resource own ACK\n");
+ return ret;
+ }
+
+ ret = ivpu_hw_ip_pwr_domain_enable(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to enable power domain: %d\n", ret);
+ return ret;
+ }
+
+ ret = ivpu_hw_ip_host_ss_axi_enable(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to enable AXI: %d\n", ret);
+ return ret;
+ }
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_LNL)
+ ivpu_hw_btrs_set_port_arbitration_weights_lnl(vdev);
+
+ ret = ivpu_hw_ip_top_noc_enable(vdev);
+ if (ret)
+ ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret);
+
+ return ret;
+}
+
+static void save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+ vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+ vdev->hw->d0i3_entry_vpu_ts = ivpu_hw_ip_read_perf_timer_counter(vdev);
+}
+
+int ivpu_hw_reset(struct ivpu_device *vdev)
+{
+ int ret = 0;
+
+ if (ivpu_hw_btrs_ip_reset(vdev)) {
+ ivpu_err(vdev, "Failed to reset NPU IP\n");
+ ret = -EIO;
+ }
+
+ if (wp_disable(vdev)) {
+ ivpu_err(vdev, "Failed to disable workpoint\n");
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+int ivpu_hw_power_down(struct ivpu_device *vdev)
+{
+ int ret = 0;
+
+ save_d0i3_entry_timestamp(vdev);
+
+ if (!ivpu_hw_is_idle(vdev))
+ ivpu_warn(vdev, "NPU not idle during power down\n");
+
+ if (ivpu_hw_reset(vdev)) {
+ ivpu_err(vdev, "Failed to reset NPU\n");
+ ret = -EIO;
+ }
+
+ if (ivpu_hw_btrs_d0i3_enable(vdev)) {
+ ivpu_err(vdev, "Failed to enter D0I3\n");
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+int ivpu_hw_init(struct ivpu_device *vdev)
+{
+ ivpu_hw_btrs_info_init(vdev);
+ ivpu_hw_btrs_freq_ratios_init(vdev);
+ priority_bands_init(vdev);
+ memory_ranges_init(vdev);
+ platform_init(vdev);
+ wa_init(vdev);
+ timeouts_init(vdev);
+ atomic_set(&vdev->hw->firewall_irq_counter, 0);
+
+#ifdef CONFIG_FAULT_INJECTION
+ if (ivpu_fail_hw)
+ setup_fault_attr(&ivpu_hw_failure, ivpu_fail_hw);
+#endif
+
+ return 0;
+}
+
+int ivpu_hw_boot_fw(struct ivpu_device *vdev)
+{
+ int ret;
+
+ ivpu_hw_ip_snoop_disable(vdev);
+ ivpu_hw_ip_tbu_mmu_enable(vdev);
+ ret = ivpu_hw_ip_soc_cpu_boot(vdev);
+ if (ret)
+ ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret);
+
+ return ret;
+}
+
+void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+ return;
+ }
+
+ if (enable)
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
+ else
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+}
+
+void ivpu_irq_handlers_init(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx;
+ else
+ vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_40xx;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_mtl;
+ else
+ vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_lnl;
+}
+
+void ivpu_hw_irq_enable(struct ivpu_device *vdev)
+{
+ ivpu_hw_ip_irq_enable(vdev);
+ ivpu_hw_btrs_irq_enable(vdev);
+}
+
+void ivpu_hw_irq_disable(struct ivpu_device *vdev)
+{
+ ivpu_hw_btrs_irq_disable(vdev);
+ ivpu_hw_ip_irq_disable(vdev);
+}
+
+irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr)
+{
+ struct ivpu_device *vdev = ptr;
+ bool ip_handled, btrs_handled;
+
+ ivpu_hw_btrs_global_int_disable(vdev);
+
+ btrs_handled = ivpu_hw_btrs_irq_handler(vdev, irq);
+ if (!ivpu_hw_is_idle((vdev)) || !btrs_handled)
+ ip_handled = ivpu_hw_ip_irq_handler(vdev, irq);
+ else
+ ip_handled = false;
+
+ /* Re-enable global interrupts to re-trigger MSI for pending interrupts */
+ ivpu_hw_btrs_global_int_enable(vdev);
+
+ if (!ip_handled && !btrs_handled)
+ return IRQ_NONE;
+
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ return IRQ_HANDLED;
+}
+
+bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev)
+{
+ unsigned long long msr_integrity_caps;
+ int ret;
+
+ if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX)
+ return false;
+
+ ret = rdmsrq_safe(MSR_INTEGRITY_CAPS, &msr_integrity_caps);
+ if (ret) {
+ ivpu_warn(vdev, "Error reading MSR_INTEGRITY_CAPS: %d", ret);
+ return false;
+ }
+
+ ivpu_dbg(vdev, MISC, "MSR_INTEGRITY_CAPS: 0x%llx\n", msr_integrity_caps);
+
+ return msr_integrity_caps & ECC_MCA_SIGNAL_ENABLE_MASK;
+}
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 094c659d2800..b6d0f0d0dccc 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,39 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
#define __IVPU_HW_H__
#include "ivpu_drv.h"
-
-struct ivpu_hw_ops {
- int (*info_init)(struct ivpu_device *vdev);
- int (*power_up)(struct ivpu_device *vdev);
- int (*boot_fw)(struct ivpu_device *vdev);
- int (*power_down)(struct ivpu_device *vdev);
- int (*reset)(struct ivpu_device *vdev);
- bool (*is_idle)(struct ivpu_device *vdev);
- int (*wait_for_idle)(struct ivpu_device *vdev);
- void (*wdt_disable)(struct ivpu_device *vdev);
- void (*diagnose_failure)(struct ivpu_device *vdev);
- u32 (*profiling_freq_get)(struct ivpu_device *vdev);
- void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
- u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
- u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
- u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
- u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
- u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
- void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id);
- u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev);
- u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev);
- void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr);
- void (*irq_clear)(struct ivpu_device *vdev);
- void (*irq_enable)(struct ivpu_device *vdev);
- void (*irq_disable)(struct ivpu_device *vdev);
- irqreturn_t (*irq_handler)(int irq, void *ptr);
-};
+#include "ivpu_hw_btrs.h"
+#include "ivpu_hw_ip.h"
struct ivpu_addr_range {
resource_size_t start;
@@ -41,8 +16,12 @@ struct ivpu_addr_range {
};
struct ivpu_hw_info {
- const struct ivpu_hw_ops *ops;
struct {
+ bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq);
+ bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq);
+ } irq;
+ struct {
+ struct ivpu_addr_range runtime;
struct ivpu_addr_range global;
struct ivpu_addr_range user;
struct ivpu_addr_range shave;
@@ -58,148 +37,118 @@ struct ivpu_hw_info {
u8 pn_ratio;
u32 profiling_freq;
} pll;
+ struct {
+ u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ } hws;
u32 tile_fuse;
u32 sku;
u16 config;
int dma_bits;
ktime_t d0i3_entry_host_ts;
u64 d0i3_entry_vpu_ts;
-};
-
-extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
-extern const struct ivpu_hw_ops ivpu_hw_40xx_ops;
-
-static inline int ivpu_hw_info_init(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->info_init(vdev);
-};
-
-static inline int ivpu_hw_power_up(struct ivpu_device *vdev)
-{
- ivpu_dbg(vdev, PM, "HW power up\n");
-
- return vdev->hw->ops->power_up(vdev);
-};
-
-static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->boot_fw(vdev);
-};
+ atomic_t firewall_irq_counter;
+};
+
+int ivpu_hw_init(struct ivpu_device *vdev);
+int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start,
+ u64 size);
+int ivpu_hw_power_up(struct ivpu_device *vdev);
+int ivpu_hw_power_down(struct ivpu_device *vdev);
+int ivpu_hw_reset(struct ivpu_device *vdev);
+int ivpu_hw_boot_fw(struct ivpu_device *vdev);
+void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable);
+void ivpu_irq_handlers_init(struct ivpu_device *vdev);
+void ivpu_hw_irq_enable(struct ivpu_device *vdev);
+void ivpu_hw_irq_disable(struct ivpu_device *vdev);
+irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr);
+bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev);
+
+static inline u32 ivpu_hw_btrs_irq_handler(struct ivpu_device *vdev, int irq)
+{
+ return vdev->hw->irq.btrs_irq_handler(vdev, irq);
+}
-static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_ip_irq_handler(struct ivpu_device *vdev, int irq)
{
- return vdev->hw->ops->is_idle(vdev);
-};
+ return vdev->hw->irq.ip_irq_handler(vdev, irq);
+}
-static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
+static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
{
- return vdev->hw->ops->wait_for_idle(vdev);
-};
+ return range->end - range->start;
+}
-static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
- ivpu_dbg(vdev, PM, "HW power down\n");
-
- return vdev->hw->ops->power_down(vdev);
-};
+ return ivpu_hw_btrs_dpu_max_freq_get(vdev);
+}
-static inline int ivpu_hw_reset(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev)
{
- ivpu_dbg(vdev, PM, "HW reset\n");
-
- return vdev->hw->ops->reset(vdev);
-};
+ return ivpu_hw_btrs_dpu_freq_get(vdev);
+}
-static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
{
- vdev->hw->ops->wdt_disable(vdev);
-};
+ ivpu_hw_ip_irq_clear(vdev);
+}
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
{
- return vdev->hw->ops->profiling_freq_get(vdev);
-};
-
-static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
-{
- return vdev->hw->ops->profiling_freq_drive(vdev, enable);
-};
-
-/* Register indirect accesses */
-static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->reg_pll_freq_get(vdev);
-};
-
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+ return vdev->hw->pll.profiling_freq;
}
-static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->reg_telemetry_offset_get(vdev);
-};
-
-static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->reg_telemetry_size_get(vdev);
-};
-
-static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev)
-{
- return vdev->hw->ops->reg_telemetry_enable_get(vdev);
-};
-
-static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id)
+static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev)
{
- vdev->hw->ops->reg_db_set(vdev, db_id);
-};
+ ivpu_hw_ip_diagnose_failure(vdev);
+ ivpu_hw_btrs_diagnose_failure(vdev);
+}
-static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_telemetry_offset_get(struct ivpu_device *vdev)
{
- return vdev->hw->ops->reg_ipc_rx_addr_get(vdev);
-};
+ return ivpu_hw_btrs_telemetry_offset_get(vdev);
+}
-static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_telemetry_size_get(struct ivpu_device *vdev)
{
- return vdev->hw->ops->reg_ipc_rx_count_get(vdev);
-};
+ return ivpu_hw_btrs_telemetry_size_get(vdev);
+}
-static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
+static inline u32 ivpu_hw_telemetry_enable_get(struct ivpu_device *vdev)
{
- vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr);
-};
+ return ivpu_hw_btrs_telemetry_enable_get(vdev);
+}
-static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
{
- vdev->hw->ops->irq_clear(vdev);
-};
+ return ivpu_hw_btrs_is_idle(vdev);
+}
-static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev)
+static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
{
- vdev->hw->ops->irq_enable(vdev);
-};
+ return ivpu_hw_btrs_wait_for_idle(vdev);
+}
-static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev)
+static inline void ivpu_hw_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
{
- vdev->hw->ops->irq_disable(vdev);
-};
+ ivpu_hw_ip_ipc_tx_set(vdev, vpu_addr);
+}
-static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size)
+static inline void ivpu_hw_db_set(struct ivpu_device *vdev, u32 db_id)
{
- range->start = start;
- range->end = start + size;
+ ivpu_hw_ip_db_set(vdev, db_id);
}
-static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
+static inline u32 ivpu_hw_ipc_rx_addr_get(struct ivpu_device *vdev)
{
- return range->end - range->start;
+ return ivpu_hw_ip_ipc_rx_addr_get(vdev);
}
-static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_ipc_rx_count_get(struct ivpu_device *vdev)
{
- vdev->hw->ops->diagnose_failure(vdev);
+ return ivpu_hw_ip_ipc_rx_count_get(vdev);
}
#endif /* __IVPU_HW_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
deleted file mode 100644
index bd25e2d9fb0f..000000000000
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ /dev/null
@@ -1,1065 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020-2024 Intel Corporation
- */
-
-#include "ivpu_drv.h"
-#include "ivpu_fw.h"
-#include "ivpu_hw_37xx_reg.h"
-#include "ivpu_hw_reg_io.h"
-#include "ivpu_hw.h"
-#include "ivpu_ipc.h"
-#include "ivpu_mmu.h"
-#include "ivpu_pm.h"
-
-#define TILE_FUSE_ENABLE_BOTH 0x0
-#define TILE_SKU_BOTH 0x3630
-
-/* Work point configuration values */
-#define CONFIG_1_TILE 0x01
-#define CONFIG_2_TILE 0x02
-#define PLL_RATIO_5_3 0x01
-#define PLL_RATIO_4_3 0x02
-#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio))
-#define WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_5_3)
-#define WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_4_3)
-#define WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_5_3)
-#define WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_4_3)
-#define WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
-
-#define PLL_REF_CLK_FREQ (50 * 1000000)
-#define PLL_SIMULATION_FREQ (10 * 1000000)
-#define PLL_PROF_CLK_FREQ (38400 * 1000)
-#define PLL_DEFAULT_EPP_VALUE 0x80
-
-#define TIM_SAFE_ENABLE 0xf1d0dead
-#define TIM_WATCHDOG_RESET_VALUE 0xffffffff
-
-#define TIMEOUT_US (150 * USEC_PER_MSEC)
-#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
-#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
-
-#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT)))
-
-#define ICB_1_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \
- (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT)))
-
-#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK)
-
-#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
- (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR)))
-
-#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \
- (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)))
-
-#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK)
-#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1)
-
-#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
- (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
-
-static void ivpu_hw_wa_init(struct ivpu_device *vdev)
-{
- vdev->wa.punit_disabled = false;
- vdev->wa.clear_runtime_mem = false;
-
- REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
- if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
- /* Writing 1s does not clear the interrupt status register */
- vdev->wa.interrupt_clear_with_0 = true;
- REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0);
- }
-
- IVPU_PRINT_WA(punit_disabled);
- IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(interrupt_clear_with_0);
-}
-
-static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
-{
- vdev->timeout.boot = 1000;
- vdev->timeout.jsm = 500;
- vdev->timeout.tdr = 2000;
- vdev->timeout.reschedule_suspend = 10;
- vdev->timeout.autosuspend = 10;
- vdev->timeout.d0i3_entry_msg = 5;
-}
-
-static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
-{
- return REGB_POLL_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US);
-}
-
-/* Send KMD initiated workpoint change */
-static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio,
- u16 target_ratio, u16 config)
-{
- int ret;
- u32 val;
-
- ret = ivpu_pll_wait_for_cmd_send(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret);
- return ret;
- }
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0);
- val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val);
- val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val);
- REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, val);
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1);
- val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val);
- val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val);
- REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, val);
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2);
- val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val);
- REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, val);
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_CMD);
- val = REG_SET_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, val);
- REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_CMD, val);
-
- ret = ivpu_pll_wait_for_cmd_send(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable)
-{
- u32 exp_val = enable ? 0x1 : 0x0;
-
- if (IVPU_WA(punit_disabled))
- return 0;
-
- return REGB_POLL_FLD(VPU_37XX_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US);
-}
-
-static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev)
-{
- if (IVPU_WA(punit_disabled))
- return 0;
-
- return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US);
-}
-
-static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
-{
- struct ivpu_hw_info *hw = vdev->hw;
- u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio;
- u32 fmin_fuse, fmax_fuse;
-
- fmin_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMIN_FUSE);
- fuse_min_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse);
- fuse_pn_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse);
-
- fmax_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMAX_FUSE);
- fuse_max_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse);
-
- hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio);
- hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio);
- hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
-}
-
-static int ivpu_hw_37xx_wait_for_vpuip_bar(struct ivpu_device *vdev)
-{
- return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100);
-}
-
-static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
-{
- struct ivpu_hw_info *hw = vdev->hw;
- u16 target_ratio;
- u16 config;
- int ret;
-
- if (IVPU_WA(punit_disabled)) {
- ivpu_dbg(vdev, PM, "Skipping PLL request\n");
- return 0;
- }
-
- if (enable) {
- target_ratio = hw->pll.pn_ratio;
- config = hw->config;
- } else {
- target_ratio = 0;
- config = 0;
- }
-
- ivpu_dbg(vdev, PM, "PLL workpoint request: config 0x%04x pll ratio 0x%x\n",
- config, target_ratio);
-
- ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config);
- if (ret) {
- ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_pll_wait_for_lock(vdev, enable);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for PLL lock\n");
- return ret;
- }
-
- if (enable) {
- ret = ivpu_pll_wait_for_status_ready(vdev);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for PLL ready status\n");
- return ret;
- }
-
- ret = ivpu_hw_37xx_wait_for_vpuip_bar(vdev);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for NPU IP bar\n");
- return ret;
- }
- }
-
- return 0;
-}
-
-static int ivpu_pll_enable(struct ivpu_device *vdev)
-{
- return ivpu_pll_drive(vdev, true);
-}
-
-static int ivpu_pll_disable(struct ivpu_device *vdev)
-{
- return ivpu_pll_drive(vdev, false);
-}
-
-static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev)
-{
- u32 val = 0;
-
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val);
-
- REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val);
-}
-
-static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET);
-
- if (enable) {
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val);
- } else {
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val);
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val);
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val);
- }
-
- REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val);
-}
-
-static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET);
-
- if (enable) {
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
- val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
- } else {
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
- }
-
- REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val);
-}
-
-static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY);
-
- if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev)
-{
- ivpu_boot_host_ss_rst_clr_assert(vdev);
-
- return ivpu_boot_noc_qreqn_check(vdev, 0x0);
-}
-
-static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev)
-{
- REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, 0x0);
-}
-
-static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN);
- if (enable)
- val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
- else
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
- REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val);
-
- ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_noc_qdeny_check(vdev, 0x0);
- if (ret)
- ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev)
-{
- return ivpu_boot_host_ss_axi_drive(vdev, true);
-}
-
-static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
- if (enable) {
- val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
- } else {
- val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
- }
- REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val);
-
- ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0);
- if (ret)
- ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev)
-{
- return ivpu_boot_host_ss_top_noc_drive(vdev, true);
-}
-
-static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
-
- REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
-}
-
-static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
-
- REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val);
-}
-
-static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
-{
- return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU,
- exp_val, PWR_ISLAND_STATUS_TIMEOUT_US);
-}
-
-static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
-
- REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val);
-}
-
-static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE);
-
- if (enable)
- val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
- else
- val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
-
- REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val);
-}
-
-static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
-{
- int ret;
-
- ivpu_boot_pwr_island_trickle_drive(vdev, true);
- ivpu_boot_pwr_island_drive(vdev, true);
-
- ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for power island status\n");
- return ret;
- }
-
- ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qrenqn check %d\n", ret);
- return ret;
- }
-
- ivpu_boot_host_ss_clk_drive(vdev, true);
- ivpu_boot_pwr_island_isolation_drive(vdev, false);
- ivpu_boot_host_ss_rst_drive(vdev, true);
- ivpu_boot_dpu_active_drive(vdev, true);
-
- return ret;
-}
-
-static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
-
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
- val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
-
- REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val);
-}
-
-static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev)
-{
- u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV);
-
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
-
- REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val);
-}
-
-static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev)
-{
- u32 val;
-
- val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
- val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
-
- val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
- REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
-
- val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
- REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
-
- val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
- REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
-
- val = vdev->fw->entry_point >> 9;
- REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
-
- val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val);
- REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
-
- ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n",
- vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume");
-}
-
-static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
- if (ret) {
- ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret);
- return ret;
- }
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL);
- if (enable)
- val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val);
- else
- val = REG_CLR_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val);
- REGB_WR32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, val);
-
- ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
- if (ret)
- ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
-{
- struct ivpu_hw_info *hw = vdev->hw;
-
- hw->tile_fuse = TILE_FUSE_ENABLE_BOTH;
- hw->sku = TILE_SKU_BOTH;
- hw->config = WP_CONFIG_2_TILE_4_3_RATIO;
-
- ivpu_pll_init_frequency_ratios(vdev);
-
- ivpu_hw_init_range(&hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_init_range(&hw->ranges.user, 0xc0000000, 255 * SZ_1M);
- ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G);
- ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G);
-
- vdev->platform = IVPU_PLATFORM_SILICON;
- ivpu_hw_wa_init(vdev);
- ivpu_hw_timeouts_init(vdev);
-
- return 0;
-}
-
-static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
-{
- int ret;
- u32 val;
-
- if (IVPU_WA(punit_disabled))
- return 0;
-
- ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
- return ret;
- }
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
- val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
- REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
-
- ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
- if (ret)
- ivpu_err(vdev, "Timed out waiting for RESET completion\n");
-
- return ret;
-}
-
-static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
-{
- int ret = 0;
-
- if (ivpu_hw_37xx_ip_reset(vdev)) {
- ivpu_err(vdev, "Failed to reset NPU\n");
- ret = -EIO;
- }
-
- if (ivpu_pll_disable(vdev)) {
- ivpu_err(vdev, "Failed to disable PLL\n");
- ret = -EIO;
- }
-
- return ret;
-}
-
-static int ivpu_hw_37xx_d0i3_enable(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_boot_d0i3_drive(vdev, true);
- if (ret)
- ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret);
-
- udelay(5); /* VPU requires 5 us to complete the transition */
-
- return ret;
-}
-
-static int ivpu_hw_37xx_d0i3_disable(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_boot_d0i3_drive(vdev, false);
- if (ret)
- ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
-{
- int ret;
-
- /* PLL requests may fail when powering down, so issue WP 0 here */
- ret = ivpu_pll_disable(vdev);
- if (ret)
- ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);
-
- ret = ivpu_hw_37xx_d0i3_disable(vdev);
- if (ret)
- ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
-
- ret = ivpu_pll_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable PLL: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_host_ss_configure(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to configure host SS: %d\n", ret);
- return ret;
- }
-
- /*
- * The control circuitry for vpu_idle indication logic powers up active.
- * To ensure unnecessary low power mode signal from LRT during bring up,
- * KMD disables the circuitry prior to bringing up the Main Power island.
- */
- ivpu_boot_vpu_idle_gen_disable(vdev);
-
- ret = ivpu_boot_pwr_domain_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable power domain: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_host_ss_axi_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable AXI: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_host_ss_top_noc_enable(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_hw_37xx_boot_fw(struct ivpu_device *vdev)
-{
- ivpu_boot_no_snoop_enable(vdev);
- ivpu_boot_tbu_mmu_enable(vdev);
- ivpu_boot_soc_cpu_boot(vdev);
-
- return 0;
-}
-
-static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
-{
- u32 val;
-
- if (IVPU_WA(punit_disabled))
- return true;
-
- val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_STATUS);
- return REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, val) &&
- REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
-}
-
-static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
-{
- return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
-}
-
-static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
-{
- vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
- vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
-}
-
-static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
-{
- int ret = 0;
-
- ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
-
- if (!ivpu_hw_37xx_is_idle(vdev))
- ivpu_warn(vdev, "NPU not idle during power down\n");
-
- if (ivpu_hw_37xx_reset(vdev)) {
- ivpu_err(vdev, "Failed to reset NPU\n");
- ret = -EIO;
- }
-
- if (ivpu_hw_37xx_d0i3_enable(vdev)) {
- ivpu_err(vdev, "Failed to enter D0I3\n");
- ret = -EIO;
- }
-
- return ret;
-}
-
-static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
-{
- u32 val;
-
- /* Enable writing and set non-zero WDT value */
- REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
-
- /* Enable writing and disable watchdog timer */
- REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0);
-
- /* Now clear the timeout interrupt */
- val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG);
- val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
- REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
-}
-
-static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
-{
- return PLL_PROF_CLK_FREQ;
-}
-
-static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
-{
- /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
-}
-
-static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
- u32 cpu_clock;
-
- if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
- cpu_clock = pll_clock * 2 / 4;
- else
- cpu_clock = pll_clock * 2 / 5;
-
- return cpu_clock;
-}
-
-/* Register indirect accesses */
-static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL);
- pll_curr_ratio &= VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK;
-
- if (!ivpu_is_silicon(vdev))
- return PLL_SIMULATION_FREQ;
-
- return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
-}
-
-static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
-}
-
-static u32 ivpu_hw_37xx_reg_telemetry_size_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE);
-}
-
-static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE);
-}
-
-static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id)
-{
- u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0;
- u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET);
-
- REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
-}
-
-static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
-{
- return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM);
-}
-
-static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev)
-{
- u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
-
- return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
-}
-
-static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
-{
- REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
-}
-
-static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev)
-{
- REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK);
-}
-
-static void ivpu_hw_37xx_irq_enable(struct ivpu_device *vdev)
-{
- REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK);
- REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK);
- REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK);
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-}
-
-static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev)
-{
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK);
- REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull);
- REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0);
-}
-
-static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
-{
- ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
-}
-
-static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
-{
- ivpu_hw_wdt_disable(vdev);
- ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
-}
-
-static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
-{
- ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
-}
-
-/* Handler for IRQs from VPU core (irqV) */
-static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
-{
- u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
-
- if (!status)
- return false;
-
- REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
- ivpu_mmu_irq_evtq_handler(vdev);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ivpu_ipc_irq_handler(vdev, wake_thread);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
- ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status))
- ivpu_mmu_irq_gerr_handler(vdev);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status))
- ivpu_hw_37xx_irq_wdt_mss_handler(vdev);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status))
- ivpu_hw_37xx_irq_wdt_nce_handler(vdev);
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
- ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
-
- return true;
-}
-
-/* Handler for IRQs from Buttress core (irqB) */
-static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
-{
- u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
- bool schedule_recovery = false;
-
- if (!status)
- return false;
-
- if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
- REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL));
-
- if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) {
- ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0));
- REGB_WR32(VPU_37XX_BUTTRESS_ATS_ERR_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) {
- u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG);
-
- ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
- ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log),
- REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log),
- REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log));
- REGB_WR32(VPU_37XX_BUTTRESS_UFI_ERR_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- /* This must be done after interrupts are cleared at the source. */
- if (IVPU_WA(interrupt_clear_with_0))
- /*
- * Writing 1 triggers an interrupt, so we can't perform read update write.
- * Clear local interrupt status by writing 0 to all bits.
- */
- REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0);
- else
- REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);
-
- if (schedule_recovery)
- ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
-
- return true;
-}
-
-static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
-{
- struct ivpu_device *vdev = ptr;
- bool irqv_handled, irqb_handled, wake_thread = false;
-
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
-
- irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
- irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
-
- /* Re-enable global interrupts to re-trigger MSI for pending interrupts */
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-
- if (wake_thread)
- return IRQ_WAKE_THREAD;
- if (irqv_handled || irqb_handled)
- return IRQ_HANDLED;
- return IRQ_NONE;
-}
-
-static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
-{
- u32 irqv = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
- u32 irqb = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
-
- if (ivpu_hw_37xx_reg_ipc_rx_count_get(vdev))
- ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ");
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv))
- ivpu_err(vdev, "WDT MSS timeout detected\n");
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv))
- ivpu_err(vdev, "WDT NCE timeout detected\n");
-
- if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv))
- ivpu_err(vdev, "NOC Firewall irq detected\n");
-
- if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb))
- ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0));
-
- if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) {
- u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG);
-
- ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
- ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log),
- REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log),
- REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log));
- }
-}
-
-const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
- .info_init = ivpu_hw_37xx_info_init,
- .power_up = ivpu_hw_37xx_power_up,
- .is_idle = ivpu_hw_37xx_is_idle,
- .wait_for_idle = ivpu_hw_37xx_wait_for_idle,
- .power_down = ivpu_hw_37xx_power_down,
- .reset = ivpu_hw_37xx_reset,
- .boot_fw = ivpu_hw_37xx_boot_fw,
- .wdt_disable = ivpu_hw_37xx_wdt_disable,
- .diagnose_failure = ivpu_hw_37xx_diagnose_failure,
- .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
- .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
- .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
- .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
- .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
- .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
- .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
- .reg_db_set = ivpu_hw_37xx_reg_db_set,
- .reg_ipc_rx_addr_get = ivpu_hw_37xx_reg_ipc_rx_addr_get,
- .reg_ipc_rx_count_get = ivpu_hw_37xx_reg_ipc_rx_count_get,
- .reg_ipc_tx_set = ivpu_hw_37xx_reg_ipc_tx_set,
- .irq_clear = ivpu_hw_37xx_irq_clear,
- .irq_enable = ivpu_hw_37xx_irq_enable,
- .irq_disable = ivpu_hw_37xx_irq_disable,
- .irq_handler = ivpu_hw_37xx_irq_handler,
-};
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index f6fec1919202..cf5e2f01049c 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -8,78 +8,6 @@
#include <linux/bits.h>
-#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE 0x00000000u
-
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT 0x00000004u
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1)
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2)
-
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16)
-
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16)
-
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0)
-
-#define VPU_37XX_BUTTRESS_WP_REQ_CMD 0x00000014u
-#define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0)
-
-#define VPU_37XX_BUTTRESS_WP_DOWNLOAD 0x00000018u
-#define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0)
-
-#define VPU_37XX_BUTTRESS_CURRENT_PLL 0x0000001cu
-#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0)
-
-#define VPU_37XX_BUTTRESS_PLL_ENABLE 0x00000020u
-
-#define VPU_37XX_BUTTRESS_FMIN_FUSE 0x00000024u
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0)
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8)
-
-#define VPU_37XX_BUTTRESS_FMAX_FUSE 0x00000028u
-#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0)
-
-#define VPU_37XX_BUTTRESS_TILE_FUSE 0x0000002cu
-#define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2)
-
-#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK 0x00000030u
-#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK 0x00000034u
-
-#define VPU_37XX_BUTTRESS_PLL_STATUS 0x00000040u
-#define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1)
-
-#define VPU_37XX_BUTTRESS_VPU_STATUS 0x00000044u
-#define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1)
-
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2)
-
-#define VPU_37XX_BUTTRESS_VPU_IP_RESET 0x00000050u
-#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0)
-
-#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u
-#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u
-#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u
-
-#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u
-#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u
-#define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u
-
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG 0x000000b0u
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20)
-
-#define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u
-
#define VPU_37XX_HOST_SS_CPR_CLK_SET 0x00000084u
#define VPU_37XX_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1)
#define VPU_37XX_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10)
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
deleted file mode 100644
index b0b88d4c8926..000000000000
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ /dev/null
@@ -1,1250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020-2023 Intel Corporation
- */
-
-#include "ivpu_drv.h"
-#include "ivpu_fw.h"
-#include "ivpu_hw.h"
-#include "ivpu_hw_40xx_reg.h"
-#include "ivpu_hw_reg_io.h"
-#include "ivpu_ipc.h"
-#include "ivpu_mmu.h"
-#include "ivpu_pm.h"
-
-#include <linux/dmi.h>
-
-#define TILE_MAX_NUM 6
-#define TILE_MAX_MASK 0x3f
-
-#define LNL_HW_ID 0x4040
-
-#define SKU_TILE_SHIFT 0u
-#define SKU_TILE_MASK 0x0000ffffu
-#define SKU_HW_ID_SHIFT 16u
-#define SKU_HW_ID_MASK 0xffff0000u
-
-#define PLL_CONFIG_DEFAULT 0x0
-#define PLL_CDYN_DEFAULT 0x80
-#define PLL_EPP_DEFAULT 0x80
-#define PLL_REF_CLK_FREQ (50 * 1000000)
-#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
-
-#define PLL_PROFILING_FREQ_DEFAULT 38400000
-#define PLL_PROFILING_FREQ_HIGH 400000000
-
-#define TIM_SAFE_ENABLE 0xf1d0dead
-#define TIM_WATCHDOG_RESET_VALUE 0xffffffff
-
-#define TIMEOUT_US (150 * USEC_PER_MSEC)
-#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
-#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
-
-#define WEIGHTS_DEFAULT 0xf711f711u
-#define WEIGHTS_ATS_DEFAULT 0x0000f711u
-
-#define ICB_0_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT)))
-
-#define ICB_1_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \
- (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT)))
-
-#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK)
-
-#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
- (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \
- (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \
- (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \
- (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR)) | \
- (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR)))
-
-#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK)
-#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1)
-
-#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
- (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
-
-static char *ivpu_platform_to_str(u32 platform)
-{
- switch (platform) {
- case IVPU_PLATFORM_SILICON:
- return "SILICON";
- case IVPU_PLATFORM_SIMICS:
- return "SIMICS";
- case IVPU_PLATFORM_FPGA:
- return "FPGA";
- default:
- return "Invalid platform";
- }
-}
-
-static const struct dmi_system_id ivpu_dmi_platform_simulation[] = {
- {
- .ident = "Intel Simics",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"),
- DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
- DMI_MATCH(DMI_BOARD_SERIAL, "123456789"),
- },
- },
- {
- .ident = "Intel Simics",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "Simics"),
- },
- },
- { }
-};
-
-static void ivpu_hw_read_platform(struct ivpu_device *vdev)
-{
- if (dmi_check_system(ivpu_dmi_platform_simulation))
- vdev->platform = IVPU_PLATFORM_SIMICS;
- else
- vdev->platform = IVPU_PLATFORM_SILICON;
-
- ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n",
- ivpu_platform_to_str(vdev->platform), vdev->platform);
-}
-
-static void ivpu_hw_wa_init(struct ivpu_device *vdev)
-{
- vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
- vdev->wa.clear_runtime_mem = false;
-
- if (ivpu_hw_gen(vdev) == IVPU_HW_40XX)
- vdev->wa.disable_clock_relinquish = true;
-
- IVPU_PRINT_WA(punit_disabled);
- IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(disable_clock_relinquish);
-}
-
-static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
-{
- if (ivpu_is_fpga(vdev)) {
- vdev->timeout.boot = 100000;
- vdev->timeout.jsm = 50000;
- vdev->timeout.tdr = 2000000;
- vdev->timeout.reschedule_suspend = 1000;
- vdev->timeout.autosuspend = -1;
- vdev->timeout.d0i3_entry_msg = 500;
- } else if (ivpu_is_simics(vdev)) {
- vdev->timeout.boot = 50;
- vdev->timeout.jsm = 500;
- vdev->timeout.tdr = 10000;
- vdev->timeout.reschedule_suspend = 10;
- vdev->timeout.autosuspend = -1;
- vdev->timeout.d0i3_entry_msg = 100;
- } else {
- vdev->timeout.boot = 1000;
- vdev->timeout.jsm = 500;
- vdev->timeout.tdr = 2000;
- vdev->timeout.reschedule_suspend = 10;
- vdev->timeout.autosuspend = 10;
- vdev->timeout.d0i3_entry_msg = 5;
- }
-}
-
-static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
-{
- return REGB_POLL_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US);
-}
-
-static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio,
- u16 target_ratio, u16 epp, u16 config, u16 cdyn)
-{
- int ret;
- u32 val;
-
- ret = ivpu_pll_wait_for_cmd_send(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret);
- return ret;
- }
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val);
- REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, val);
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, epp, val);
- REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, val);
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val);
- val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CDYN, cdyn, val);
- REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, val);
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_CMD);
- val = REG_SET_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, val);
- REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_CMD, val);
-
- ret = ivpu_pll_wait_for_cmd_send(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev)
-{
- return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US);
-}
-
-static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev)
-{
- if (ivpu_is_simics(vdev))
- return 0;
-
- return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US);
-}
-
-static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
-{
- struct ivpu_hw_info *hw = vdev->hw;
- u8 fuse_min_ratio, fuse_pn_ratio, fuse_max_ratio;
- u32 fmin_fuse, fmax_fuse;
-
- fmin_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMIN_FUSE);
- fuse_min_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse);
- fuse_pn_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse);
-
- fmax_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMAX_FUSE);
- fuse_max_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse);
-
- hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio);
- hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio);
- hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
-}
-
-static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
-{
- u16 config = enable ? PLL_CONFIG_DEFAULT : 0;
- u16 cdyn = enable ? PLL_CDYN_DEFAULT : 0;
- u16 epp = enable ? PLL_EPP_DEFAULT : 0;
- struct ivpu_hw_info *hw = vdev->hw;
- u16 target_ratio = hw->pll.pn_ratio;
- int ret;
-
- ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, epp: 0x%x, config: 0x%x, cdyn: 0x%x\n",
- PLL_RATIO_TO_FREQ(target_ratio), epp, config, cdyn);
-
- ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio,
- target_ratio, epp, config, cdyn);
- if (ret) {
- ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret);
- return ret;
- }
-
- if (enable) {
- ret = ivpu_pll_wait_for_status_ready(vdev);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for PLL ready status\n");
- return ret;
- }
- }
-
- return 0;
-}
-
-static int ivpu_pll_enable(struct ivpu_device *vdev)
-{
- return ivpu_pll_drive(vdev, true);
-}
-
-static int ivpu_pll_disable(struct ivpu_device *vdev)
-{
- return ivpu_pll_drive(vdev, false);
-}
-
-static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN);
-
- if (enable) {
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val);
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val);
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val);
- } else {
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val);
- }
-
- REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val);
-}
-
-static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN);
-
- if (enable) {
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val);
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val);
- val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val);
- } else {
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val);
- }
-
- REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val);
-}
-
-static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static void ivpu_boot_idle_gen_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN);
-
- if (enable)
- val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val);
- else
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val);
-
- REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val);
-}
-
-static int ivpu_boot_host_ss_check(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_boot_noc_qreqn_check(vdev, 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qreqn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_noc_qacceptn_check(vdev, 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_noc_qdeny_check(vdev, 0x0);
- if (ret)
- ivpu_err(vdev, "Failed qdeny check %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN);
- if (enable)
- val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
- else
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
- REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val);
-
- ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_noc_qdeny_check(vdev, 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
- return ret;
- }
-
- if (enable) {
- REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT);
- REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT);
- }
-
- return ret;
-}
-
-static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev)
-{
- return ivpu_boot_host_ss_axi_drive(vdev, true);
-}
-
-static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN);
- if (enable) {
- val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
- } else {
- val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
- }
- REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val);
-
- ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0);
- if (ret)
- ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev)
-{
- return ivpu_boot_host_ss_top_noc_drive(vdev, true);
-}
-
-static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val);
-
- REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
-
- if (enable)
- ndelay(500);
-}
-
-static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val);
-
- REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val);
-
- if (!enable)
- ndelay(500);
-}
-
-static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
-{
- if (ivpu_is_fpga(vdev))
- return 0;
-
- return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU,
- exp_val, PWR_ISLAND_STATUS_TIMEOUT_US);
-}
-
-static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0);
-
- if (enable)
- val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val);
- else
- val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val);
-
- REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val);
-}
-
-static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES);
-
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
-
- REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val);
-}
-
-static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev)
-{
- u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV);
-
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
- val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
-
- REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val);
-}
-
-static int ivpu_boot_cpu_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_cpu_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
-{
- u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY);
-
- if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val))
- return -EIO;
-
- return 0;
-}
-
-static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_wait_for_clock_own_resource_ack(vdev);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n");
- return ret;
- }
-
- ivpu_boot_pwr_island_trickle_drive(vdev, true);
- ivpu_boot_pwr_island_drive(vdev, true);
-
- ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1);
- if (ret) {
- ivpu_err(vdev, "Timed out waiting for power island status\n");
- return ret;
- }
-
- ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qrenqn check %d\n", ret);
- return ret;
- }
-
- ivpu_boot_host_ss_clk_drive(vdev, true);
- ivpu_boot_host_ss_rst_drive(vdev, true);
- ivpu_boot_pwr_island_isolation_drive(vdev, false);
-
- return ret;
-}
-
-static int ivpu_boot_soc_cpu_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN);
- if (enable)
- val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val);
- else
- val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val);
- REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val);
-
- ret = ivpu_boot_cpu_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
- if (ret) {
- ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_cpu_noc_qdeny_check(vdev, 0x0);
- if (ret)
- ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_boot_soc_cpu_enable(struct ivpu_device *vdev)
-{
- return ivpu_boot_soc_cpu_drive(vdev, true);
-}
-
-static int ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev)
-{
- int ret;
- u32 val;
- u64 val64;
-
- ret = ivpu_boot_soc_cpu_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret);
- return ret;
- }
-
- val64 = vdev->fw->entry_point;
- val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1;
- REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64);
-
- val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO);
- val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val);
- REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val);
-
- ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n",
- ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume");
-
- return 0;
-}
-
-static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable)
-{
- int ret;
- u32 val;
-
- ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
- if (ret) {
- ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret);
- return ret;
- }
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_D0I3_CONTROL);
- if (enable)
- val = REG_SET_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val);
- else
- val = REG_CLR_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val);
- REGB_WR32(VPU_40XX_BUTTRESS_D0I3_CONTROL, val);
-
- ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
- if (ret) {
- ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret);
- return ret;
- }
-
- return 0;
-}
-
-static bool ivpu_tile_disable_check(u32 config)
-{
- /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */
- if (config == 0)
- return true;
-
- if (config > BIT(TILE_MAX_NUM - 1))
- return false;
-
- if ((config & (config - 1)) == 0)
- return true;
-
- return false;
-}
-
-static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
-{
- struct ivpu_hw_info *hw = vdev->hw;
- u32 tile_disable;
- u32 fuse;
-
- fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE);
- if (!REG_TEST_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, VALID, fuse)) {
- ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse);
- return -EIO;
- }
-
- tile_disable = REG_GET_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, CONFIG, fuse);
- if (!ivpu_tile_disable_check(tile_disable)) {
- ivpu_err(vdev, "Fuse: Invalid tile disable config (0x%x)\n", tile_disable);
- return -EIO;
- }
-
- if (tile_disable)
- ivpu_dbg(vdev, MISC, "Fuse: %d tiles enabled. Tile number %d disabled\n",
- TILE_MAX_NUM - 1, ffs(tile_disable) - 1);
- else
- ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM);
-
- hw->tile_fuse = tile_disable;
- hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
-
- ivpu_pll_init_frequency_ratios(vdev);
-
- ivpu_hw_init_range(&vdev->hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_init_range(&vdev->hw->ranges.user, 0x80000000, SZ_256M);
- ivpu_hw_init_range(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M);
- ivpu_hw_init_range(&vdev->hw->ranges.dma, 0x200000000, SZ_8G);
-
- ivpu_hw_read_platform(vdev);
- ivpu_hw_wa_init(vdev);
- ivpu_hw_timeouts_init(vdev);
-
- return 0;
-}
-
-static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev)
-{
- int ret;
- u32 val;
-
- ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US);
- if (ret) {
- ivpu_err(vdev, "Wait for *_TRIGGER timed out\n");
- return ret;
- }
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_IP_RESET);
- val = REG_SET_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, val);
- REGB_WR32(VPU_40XX_BUTTRESS_IP_RESET, val);
-
- ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US);
- if (ret)
- ivpu_err(vdev, "Timed out waiting for RESET completion\n");
-
- return ret;
-}
-
-static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
-{
- int ret = 0;
-
- if (ivpu_hw_40xx_ip_reset(vdev)) {
- ivpu_err(vdev, "Failed to reset NPU IP\n");
- ret = -EIO;
- }
-
- if (ivpu_pll_disable(vdev)) {
- ivpu_err(vdev, "Failed to disable PLL\n");
- ret = -EIO;
- }
-
- return ret;
-}
-
-static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev)
-{
- int ret;
-
- if (IVPU_WA(punit_disabled))
- return 0;
-
- ret = ivpu_boot_d0i3_drive(vdev, true);
- if (ret)
- ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret);
-
- udelay(5); /* VPU requires 5 us to complete the transition */
-
- return ret;
-}
-
-static int ivpu_hw_40xx_d0i3_disable(struct ivpu_device *vdev)
-{
- int ret;
-
- if (IVPU_WA(punit_disabled))
- return 0;
-
- ret = ivpu_boot_d0i3_drive(vdev, false);
- if (ret)
- ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret);
-
- return ret;
-}
-
-static void ivpu_hw_40xx_profiling_freq_reg_set(struct ivpu_device *vdev)
-{
- u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS);
-
- if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT)
- val = REG_CLR_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val);
- else
- val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val);
-
- REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val);
-}
-
-static void ivpu_hw_40xx_ats_print(struct ivpu_device *vdev)
-{
- ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n",
- REGB_RD32(VPU_40XX_BUTTRESS_HM_ATS) ? "Enable" : "Disable");
-}
-
-static void ivpu_hw_40xx_clock_relinquish_disable(struct ivpu_device *vdev)
-{
- u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS);
-
- val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, DISABLE_CLK_RELINQUISH, val);
- REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val);
-}
-
-static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_hw_40xx_d0i3_disable(vdev);
- if (ret)
- ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
-
- ret = ivpu_pll_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable PLL: %d\n", ret);
- return ret;
- }
-
- if (IVPU_WA(disable_clock_relinquish))
- ivpu_hw_40xx_clock_relinquish_disable(vdev);
- ivpu_hw_40xx_profiling_freq_reg_set(vdev);
- ivpu_hw_40xx_ats_print(vdev);
-
- ret = ivpu_boot_host_ss_check(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to configure host SS: %d\n", ret);
- return ret;
- }
-
- ivpu_boot_idle_gen_drive(vdev, false);
-
- ret = ivpu_boot_pwr_domain_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable power domain: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_host_ss_axi_enable(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to enable AXI: %d\n", ret);
- return ret;
- }
-
- ret = ivpu_boot_host_ss_top_noc_enable(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_hw_40xx_boot_fw(struct ivpu_device *vdev)
-{
- int ret;
-
- ivpu_boot_no_snoop_enable(vdev);
- ivpu_boot_tbu_mmu_enable(vdev);
-
- ret = ivpu_boot_soc_cpu_boot(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret);
-
- return ret;
-}
-
-static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
-{
- u32 val;
-
- if (IVPU_WA(punit_disabled))
- return true;
-
- val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS);
- return REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, val) &&
- REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
-}
-
-static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
-{
- return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
-}
-
-static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
-{
- vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
- vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
-}
-
-static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
-{
- int ret = 0;
-
- ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
-
- if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev))
- ivpu_warn(vdev, "Failed to reset the NPU\n");
-
- if (ivpu_pll_disable(vdev)) {
- ivpu_err(vdev, "Failed to disable PLL\n");
- ret = -EIO;
- }
-
- if (ivpu_hw_40xx_d0i3_enable(vdev)) {
- ivpu_err(vdev, "Failed to enter D0I3\n");
- ret = -EIO;
- }
-
- return ret;
-}
-
-static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
-{
- u32 val;
-
- REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
-
- REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0);
-
- val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG);
- val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
- REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
-}
-
-static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
-{
- return vdev->hw->pll.profiling_freq;
-}
-
-static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
-{
- if (enable)
- vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
- else
- vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
-}
-
-/* Register indirect accesses */
-static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
-{
- u32 pll_curr_ratio;
-
- pll_curr_ratio = REGB_RD32(VPU_40XX_BUTTRESS_PLL_FREQ);
- pll_curr_ratio &= VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK;
-
- return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
- return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
-}
-
-static u32 ivpu_hw_40xx_reg_telemetry_size_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE);
-}
-
-static u32 ivpu_hw_40xx_reg_telemetry_enable_get(struct ivpu_device *vdev)
-{
- return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE);
-}
-
-static void ivpu_hw_40xx_reg_db_set(struct ivpu_device *vdev, u32 db_id)
-{
- u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0;
- u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET);
-
- REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
-}
-
-static u32 ivpu_hw_40xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
-{
- return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM);
-}
-
-static u32 ivpu_hw_40xx_reg_ipc_rx_count_get(struct ivpu_device *vdev)
-{
- u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
-
- return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
-}
-
-static void ivpu_hw_40xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
-{
- REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
-}
-
-static void ivpu_hw_40xx_irq_clear(struct ivpu_device *vdev)
-{
- REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK);
-}
-
-static void ivpu_hw_40xx_irq_enable(struct ivpu_device *vdev)
-{
- REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK);
- REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK);
- REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK);
- REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-}
-
-static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev)
-{
- REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK);
- REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull);
- REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul);
-}
-
-static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
-{
- /* TODO: For LNN hang consider engine reset instead of full recovery */
- ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
-}
-
-static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
-{
- ivpu_hw_wdt_disable(vdev);
- ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
-}
-
-static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
-{
- ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
-}
-
-/* Handler for IRQs from VPU core (irqV) */
-static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
-{
- u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
-
- if (!status)
- return false;
-
- REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
- ivpu_mmu_irq_evtq_handler(vdev);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ivpu_ipc_irq_handler(vdev, wake_thread);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
- ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status))
- ivpu_mmu_irq_gerr_handler(vdev);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status))
- ivpu_hw_40xx_irq_wdt_mss_handler(vdev);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status))
- ivpu_hw_40xx_irq_wdt_nce_handler(vdev);
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
- ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
-
- return true;
-}
-
-/* Handler for IRQs from Buttress core (irqB) */
-static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
-{
- bool schedule_recovery = false;
- u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
-
- if (!status)
- return false;
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
- ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) {
- ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
- REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1),
- REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2));
- REGB_WR32(VPU_40XX_BUTTRESS_ATS_ERR_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, status)) {
- ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG));
- REGB_WR32(VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, status)) {
- ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG));
- REGB_WR32(VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, status)) {
- ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x",
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW),
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH));
- REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, status)) {
- ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x",
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW),
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH));
- REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR, 0x1);
- schedule_recovery = true;
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, status)) {
- ivpu_err(vdev, "Survivability error detected\n");
- schedule_recovery = true;
- }
-
- /* This must be done after interrupts are cleared at the source. */
- REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
-
- if (schedule_recovery)
- ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
-
- return true;
-}
-
-static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
-{
- bool irqv_handled, irqb_handled, wake_thread = false;
- struct ivpu_device *vdev = ptr;
-
- REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
-
- irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
- irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
-
- /* Re-enable global interrupts to re-trigger MSI for pending interrupts */
- REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-
- if (wake_thread)
- return IRQ_WAKE_THREAD;
- if (irqv_handled || irqb_handled)
- return IRQ_HANDLED;
- return IRQ_NONE;
-}
-
-static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
-{
- u32 irqv = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
- u32 irqb = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
-
- if (ivpu_hw_40xx_reg_ipc_rx_count_get(vdev))
- ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ");
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv))
- ivpu_err(vdev, "WDT MSS timeout detected\n");
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv))
- ivpu_err(vdev, "WDT NCE timeout detected\n");
-
- if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv))
- ivpu_err(vdev, "NOC Firewall irq detected\n");
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) {
- ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
- REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1),
- REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2));
- }
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, irqb))
- ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG));
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, irqb))
- ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG));
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, irqb))
- ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n",
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW),
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH));
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, irqb))
- ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n",
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW),
- REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH));
-
- if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, irqb))
- ivpu_err(vdev, "Survivability error detected\n");
-}
-
-const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
- .info_init = ivpu_hw_40xx_info_init,
- .power_up = ivpu_hw_40xx_power_up,
- .is_idle = ivpu_hw_40xx_is_idle,
- .wait_for_idle = ivpu_hw_40xx_wait_for_idle,
- .power_down = ivpu_hw_40xx_power_down,
- .reset = ivpu_hw_40xx_reset,
- .boot_fw = ivpu_hw_40xx_boot_fw,
- .wdt_disable = ivpu_hw_40xx_wdt_disable,
- .diagnose_failure = ivpu_hw_40xx_diagnose_failure,
- .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
- .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
- .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
- .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
- .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
- .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
- .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
- .reg_db_set = ivpu_hw_40xx_reg_db_set,
- .reg_ipc_rx_addr_get = ivpu_hw_40xx_reg_ipc_rx_addr_get,
- .reg_ipc_rx_count_get = ivpu_hw_40xx_reg_ipc_rx_count_get,
- .reg_ipc_tx_set = ivpu_hw_40xx_reg_ipc_tx_set,
- .irq_clear = ivpu_hw_40xx_irq_clear,
- .irq_enable = ivpu_hw_40xx_irq_enable,
- .irq_disable = ivpu_hw_40xx_irq_disable,
- .irq_handler = ivpu_hw_40xx_irq_handler,
-};
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
index ff4a5d4f5821..fc0ee8d637f9 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
@@ -8,91 +8,6 @@
#include <linux/bits.h>
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT 0x00000000u
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5)
-#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6)
-
-#define VPU_40XX_BUTTRESS_LOCAL_INT_MASK 0x00000004u
-#define VPU_40XX_BUTTRESS_GLOBAL_INT_MASK 0x00000008u
-
-#define VPU_40XX_BUTTRESS_HM_ATS 0x0000000cu
-
-#define VPU_40XX_BUTTRESS_ATS_ERR_LOG1 0x00000010u
-#define VPU_40XX_BUTTRESS_ATS_ERR_LOG2 0x00000014u
-#define VPU_40XX_BUTTRESS_ATS_ERR_CLEAR 0x00000018u
-
-#define VPU_40XX_BUTTRESS_CFI0_ERR_LOG 0x0000001cu
-#define VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR 0x00000020u
-
-#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u
-
-#define VPU_40XX_BUTTRESS_CFI1_ERR_LOG 0x00000040u
-#define VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR 0x00000044u
-
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW 0x00000048u
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH 0x0000004cu
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR 0x00000050u
-
-#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS 0x00000054u
-
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW 0x00000058u
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH 0x0000005cu
-#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR 0x00000060u
-
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000130u
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0)
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16)
-
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1 0x00000134u
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0)
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16)
-
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000138u
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0)
-#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16)
-
-#define VPU_40XX_BUTTRESS_WP_REQ_CMD 0x0000013cu
-#define VPU_40XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0)
-
-#define VPU_40XX_BUTTRESS_PLL_FREQ 0x00000148u
-#define VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK GENMASK(15, 0)
-
-#define VPU_40XX_BUTTRESS_TILE_FUSE 0x00000150u
-#define VPU_40XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0)
-#define VPU_40XX_BUTTRESS_TILE_FUSE_CONFIG_MASK GENMASK(6, 1)
-
-#define VPU_40XX_BUTTRESS_VPU_STATUS 0x00000154u
-#define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11)
-#define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12)
-
-#define VPU_40XX_BUTTRESS_IP_RESET 0x00000160u
-#define VPU_40XX_BUTTRESS_IP_RESET_TRIGGER_MASK BIT_MASK(0)
-
-#define VPU_40XX_BUTTRESS_D0I3_CONTROL 0x00000164u
-#define VPU_40XX_BUTTRESS_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0)
-#define VPU_40XX_BUTTRESS_D0I3_CONTROL_I3_MASK BIT_MASK(2)
-
-#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000168u
-#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x0000016cu
-#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000170u
-
-#define VPU_40XX_BUTTRESS_FMIN_FUSE 0x00000174u
-#define VPU_40XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0)
-#define VPU_40XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8)
-
-#define VPU_40XX_BUTTRESS_FMAX_FUSE 0x00000178u
-#define VPU_40XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0)
-
#define VPU_40XX_HOST_SS_CPR_CLK_EN 0x00000080u
#define VPU_40XX_HOST_SS_CPR_CLK_EN_TOP_NOC_MASK BIT_MASK(1)
#define VPU_40XX_HOST_SS_CPR_CLK_EN_DSS_MAS_MASK BIT_MASK(10)
@@ -198,6 +113,14 @@
#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu
#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0_CSS_CPU_MASK BIT_MASK(3)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16)
+
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0)
+
#define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u
#define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0)
#define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1)
@@ -205,6 +128,9 @@
#define VPU_40XX_HOST_SS_AON_DPU_ACTIVE 0x00030204u
#define VPU_40XX_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0)
+#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE 0x00030210u
+#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE_REQ_OVERRIDE_MASK BIT_MASK(0)
+
#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO 0x00040040u
#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_DONE_MASK BIT_MASK(0)
#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
new file mode 100644
index 000000000000..06e65c592618
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -0,0 +1,905 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ */
+
+#include <linux/units.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_hw_btrs.h"
+#include "ivpu_hw_btrs_lnl_reg.h"
+#include "ivpu_hw_btrs_mtl_reg.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_pm.h"
+
+#define BTRS_MTL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR)))
+
+#define BTRS_LNL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR)) | \
+ (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR)))
+
+#define BTRS_MTL_ALL_IRQ_MASK (BTRS_MTL_IRQ_MASK | (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, \
+ FREQ_CHANGE)))
+
+#define BTRS_IRQ_DISABLE_MASK ((u32)-1)
+
+#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
+
+
+#define PLL_CDYN_DEFAULT 0x80
+#define PLL_EPP_DEFAULT 0x80
+#define PLL_REF_CLK_FREQ 50000000ull
+#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
+
+#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
+#define TIMEOUT_US (150 * USEC_PER_MSEC)
+
+/* Work point configuration values */
+#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio))
+#define MTL_CONFIG_1_TILE 0x01
+#define MTL_CONFIG_2_TILE 0x02
+#define MTL_PLL_RATIO_5_3 0x01
+#define MTL_PLL_RATIO_4_3 0x02
+#define BTRS_MTL_TILE_FUSE_ENABLE_BOTH 0x0
+#define BTRS_MTL_TILE_SKU_BOTH 0x3630
+
+#define BTRS_LNL_TILE_MAX_NUM 6
+#define BTRS_LNL_TILE_MAX_MASK 0x3f
+
+#define WEIGHTS_DEFAULT 0xf711f711u
+#define WEIGHTS_ATS_DEFAULT 0x0000f711u
+
+#define DCT_REQ 0x2
+#define DCT_ENABLE 0x1
+#define DCT_DISABLE 0x0
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
+int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
+{
+ REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
+ if (REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) == BTRS_MTL_ALL_IRQ_MASK) {
+ /* Writing 1s does not clear the interrupt status register */
+ REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0);
+ return true;
+ }
+
+ return false;
+}
+
+static void freq_ratios_init_mtl(struct ivpu_device *vdev)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+ u32 fmin_fuse, fmax_fuse;
+
+ fmin_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMIN_FUSE);
+ hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, MIN_RATIO, fmin_fuse);
+ hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, PN_RATIO, fmin_fuse);
+
+ fmax_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMAX_FUSE);
+ hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMAX_FUSE, MAX_RATIO, fmax_fuse);
+}
+
+static void freq_ratios_init_lnl(struct ivpu_device *vdev)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+ u32 fmin_fuse, fmax_fuse;
+
+ fmin_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMIN_FUSE);
+ hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, MIN_RATIO, fmin_fuse);
+ hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, PN_RATIO, fmin_fuse);
+
+ fmax_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMAX_FUSE);
+ hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMAX_FUSE, MAX_RATIO, fmax_fuse);
+}
+
+void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ freq_ratios_init_mtl(vdev);
+ else
+ freq_ratios_init_lnl(vdev);
+
+ hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
+ hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
+ hw->pll.pn_ratio = clamp_t(u8, hw->pll.pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
+}
+
+static bool tile_disable_check(u32 config)
+{
+ /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */
+ if (config == 0)
+ return true;
+
+ if (config > BIT(BTRS_LNL_TILE_MAX_NUM - 1))
+ return false;
+
+ if ((config & (config - 1)) == 0)
+ return true;
+
+ return false;
+}
+
+static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config)
+{
+ u32 fuse;
+ u32 config;
+
+ fuse = REGB_RD32(VPU_HW_BTRS_LNL_TILE_FUSE);
+ if (!REG_TEST_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, VALID, fuse)) {
+ ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse);
+ return -EIO;
+ }
+
+ config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse);
+ if (!tile_disable_check(config))
+ ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config);
+
+ ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config);
+
+ *tile_fuse_config = config;
+ return 0;
+}
+
+static int info_init_mtl(struct ivpu_device *vdev)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
+ hw->sku = BTRS_MTL_TILE_SKU_BOTH;
+ hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
+
+ return 0;
+}
+
+static int info_init_lnl(struct ivpu_device *vdev)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+ u32 tile_fuse_config;
+ int ret;
+
+ ret = read_tile_config_fuse(vdev, &tile_fuse_config);
+ if (ret)
+ return ret;
+
+ hw->tile_fuse = tile_fuse_config;
+ hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+
+ return 0;
+}
+
+int ivpu_hw_btrs_info_init(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return info_init_mtl(vdev);
+ else
+ return info_init_lnl(vdev);
+}
+
+static int wp_request_sync(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_POLL_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US);
+ else
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US);
+}
+
+static int wait_for_status_ready(struct ivpu_device *vdev, bool enable)
+{
+ u32 exp_val = enable ? 0x1 : 0x0;
+
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US);
+ else
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US);
+}
+
+struct wp_request {
+ u16 min;
+ u16 max;
+ u16 target;
+ u16 cfg;
+ u16 epp;
+ u16 cdyn;
+};
+
+static void wp_request_mtl(struct ivpu_device *vdev, struct wp_request *wp)
+{
+ u32 val;
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, EPP, PLL_EPP_DEFAULT, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_CMD);
+ val = REG_SET_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_CMD, val);
+}
+
+static void wp_request_lnl(struct ivpu_device *vdev, struct wp_request *wp)
+{
+ u32 val;
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, EPP, wp->epp, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CDYN, wp->cdyn, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, val);
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_CMD);
+ val = REG_SET_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_CMD, val);
+}
+
+static void wp_request(struct ivpu_device *vdev, struct wp_request *wp)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ wp_request_mtl(vdev, wp);
+ else
+ wp_request_lnl(vdev, wp);
+}
+
+static int wp_request_send(struct ivpu_device *vdev, struct wp_request *wp)
+{
+ int ret;
+
+ ret = wp_request_sync(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to sync before workpoint request: %d\n", ret);
+ return ret;
+ }
+
+ wp_request(vdev, wp);
+
+ ret = wp_request_sync(vdev);
+ if (ret)
+ ivpu_err(vdev, "Failed to sync after workpoint request: %d\n", ret);
+
+ return ret;
+}
+
+static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, bool enable)
+{
+ struct ivpu_hw_info *hw = vdev->hw;
+
+ wp->min = hw->pll.min_ratio;
+ wp->max = hw->pll.max_ratio;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) {
+ wp->target = enable ? hw->pll.pn_ratio : 0;
+ wp->cfg = enable ? hw->config : 0;
+ wp->cdyn = 0;
+ wp->epp = 0;
+ } else {
+ wp->target = hw->pll.pn_ratio;
+ wp->cfg = 0;
+ wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0;
+ wp->epp = enable ? PLL_EPP_DEFAULT : 0;
+ }
+}
+
+static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable)
+{
+ u32 exp_val = enable ? 0x1 : 0x0;
+
+ if (ivpu_hw_btrs_gen(vdev) != IVPU_HW_BTRS_MTL)
+ return 0;
+
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ return REGB_POLL_FLD(VPU_HW_BTRS_MTL_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US);
+}
+
+static int wait_for_cdyn_deassert(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return 0;
+
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_CDYN, CDYN, 0, PLL_TIMEOUT_US);
+}
+
+int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
+{
+ struct wp_request wp;
+ int ret;
+
+ if (IVPU_WA(punit_disabled)) {
+ ivpu_dbg(vdev, PM, "Skipping workpoint request\n");
+ return 0;
+ }
+
+ prepare_wp_request(vdev, &wp, enable);
+
+ ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+ pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
+
+ ret = wp_request_send(vdev, &wp);
+ if (ret) {
+ ivpu_err(vdev, "Failed to send workpoint request: %d\n", ret);
+ return ret;
+ }
+
+ ret = wait_for_pll_lock(vdev, enable);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for PLL lock\n");
+ return ret;
+ }
+
+ ret = wait_for_status_ready(vdev, enable);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for NPU ready status\n");
+ return ret;
+ }
+
+ if (!enable) {
+ ret = wait_for_cdyn_deassert(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for CDYN deassert\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int d0i3_drive_mtl(struct ivpu_device *vdev, bool enable)
+{
+ int ret;
+ u32 val;
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret);
+ return ret;
+ }
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL);
+ if (enable)
+ val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val);
+ else
+ val = REG_CLR_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, val);
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+ if (ret)
+ ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret);
+
+ return ret;
+}
+
+static int d0i3_drive_lnl(struct ivpu_device *vdev, bool enable)
+{
+ int ret;
+ u32 val;
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret);
+ return ret;
+ }
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_D0I3_CONTROL);
+ if (enable)
+ val = REG_SET_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val);
+ else
+ val = REG_CLR_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_D0I3_CONTROL, val);
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int d0i3_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return d0i3_drive_mtl(vdev, enable);
+ else
+ return d0i3_drive_lnl(vdev, enable);
+}
+
+int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev)
+{
+ int ret;
+
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ ret = d0i3_drive(vdev, true);
+ if (ret)
+ ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret);
+
+ udelay(5); /* VPU requires 5 us to complete the transition */
+
+ return ret;
+}
+
+int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev)
+{
+ int ret;
+
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ ret = d0i3_drive(vdev, false);
+ if (ret)
+ ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return 0;
+
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US);
+}
+
+void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev)
+{
+ REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT);
+ REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT);
+}
+
+static int ip_reset_mtl(struct ivpu_device *vdev)
+{
+ int ret;
+ u32 val;
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+ return ret;
+ }
+
+ val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_IP_RESET);
+ val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, val);
+ REGB_WR32(VPU_HW_BTRS_MTL_VPU_IP_RESET, val);
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret)
+ ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+ return ret;
+}
+
+static int ip_reset_lnl(struct ivpu_device *vdev)
+{
+ int ret;
+ u32 val;
+
+ ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev);
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Wait for *_TRIGGER timed out\n");
+ return ret;
+ }
+
+ val = REGB_RD32(VPU_HW_BTRS_LNL_IP_RESET);
+ val = REG_SET_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_IP_RESET, val);
+
+ ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret)
+ ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+ return ret;
+}
+
+int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev)
+{
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return ip_reset_mtl(vdev);
+ else
+ return ip_reset_lnl(vdev);
+}
+
+void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev)
+{
+ u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS);
+
+ if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT)
+ val = REG_CLR_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val);
+ else
+ val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val);
+
+ REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val);
+}
+
+void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev)
+{
+ ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n",
+ REGB_RD32(VPU_HW_BTRS_LNL_HM_ATS) ? "Enable" : "Disable");
+}
+
+void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev)
+{
+ u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS);
+
+ val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, DISABLE_CLK_RELINQUISH, val);
+ REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val);
+}
+
+bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev)
+{
+ u32 val;
+
+ if (IVPU_WA(punit_disabled))
+ return true;
+
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) {
+ val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_STATUS);
+
+ return REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, val) &&
+ REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, val);
+ } else {
+ val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS);
+
+ return REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, val) &&
+ REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, val);
+ }
+}
+
+int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+ else
+ return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+ return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+ return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(ratio);
+ else
+ return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+ return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev));
+ else
+ return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev));
+}
+
+/* Handler for IRQs from Buttress core (irqB) */
+bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
+{
+ u32 status = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK;
+ bool schedule_recovery = false;
+
+ if (!status)
+ return false;
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_mtl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
+ ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
+ REGB_WR32(VPU_HW_BTRS_MTL_ATS_ERR_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, status)) {
+ u32 ufi_log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG);
+
+ ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
+ ufi_log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, ufi_log),
+ REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, ufi_log),
+ REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, ufi_log));
+ REGB_WR32(VPU_HW_BTRS_MTL_UFI_ERR_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ /* This must be done after interrupts are cleared at the source. */
+ if (IVPU_WA(interrupt_clear_with_0))
+ /*
+ * Writing 1 triggers an interrupt, so we can't perform read update write.
+ * Clear local interrupt status by writing 0 to all bits.
+ */
+ REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0);
+ else
+ REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, status);
+
+ if (schedule_recovery)
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
+
+ return true;
+}
+
+/* Handler for IRQs from Buttress core (irqB) */
+bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
+{
+ u32 status = REGB_RD32(VPU_HW_BTRS_LNL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK;
+ bool schedule_recovery = false;
+
+ if (!status)
+ return false;
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) {
+ ivpu_dbg(vdev, IRQ, "Survivability IRQ\n");
+ queue_work(system_percpu_wq, &vdev->irq_dct_work);
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+ u32 pll = pll_config_get_lnl(vdev);
+
+ ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+ pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
+ ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
+ REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1),
+ REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2));
+ REGB_WR32(VPU_HW_BTRS_LNL_ATS_ERR_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, status)) {
+ ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG));
+ REGB_WR32(VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, status)) {
+ ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG));
+ REGB_WR32(VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, status)) {
+ ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x",
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW),
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH));
+ REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, status)) {
+ ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x",
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW),
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH));
+ REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR, 0x1);
+ schedule_recovery = true;
+ }
+
+ /* This must be done after interrupts are cleared at the source. */
+ REGB_WR32(VPU_HW_BTRS_LNL_INTERRUPT_STAT, status);
+
+ if (schedule_recovery)
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
+
+ return true;
+}
+
+int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable)
+{
+ u32 val = REGB_RD32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW);
+ u32 cmd = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, CMD, val);
+ u32 param1 = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, PARAM1, val);
+
+ if (cmd != DCT_REQ) {
+ ivpu_err_ratelimited(vdev, "Unsupported PCODE command: 0x%x\n", cmd);
+ return -EBADR;
+ }
+
+ switch (param1) {
+ case DCT_ENABLE:
+ *enable = true;
+ return 0;
+ case DCT_DISABLE:
+ *enable = false;
+ return 0;
+ default:
+ ivpu_err_ratelimited(vdev, "Invalid PARAM1 value: %u\n", param1);
+ return -EINVAL;
+ }
+}
+
+void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent)
+{
+ u32 val = 0;
+ u32 cmd = enable ? DCT_ENABLE : DCT_DISABLE;
+
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, CMD, DCT_REQ, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM1, cmd, val);
+ val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val);
+
+ REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
+}
+
+u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET);
+ else
+ return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET);
+}
+
+u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE);
+ else
+ return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE);
+}
+
+u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE);
+ else
+ return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE);
+}
+
+void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1);
+ else
+ REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1);
+}
+
+void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0);
+ else
+ REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0);
+}
+
+void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) {
+ REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, (u32)(~BTRS_MTL_IRQ_MASK));
+ REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0);
+ } else {
+ REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, (u32)(~BTRS_LNL_IRQ_MASK));
+ REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0);
+ }
+}
+
+void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) {
+ REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1);
+ REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK);
+ } else {
+ REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1);
+ REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK);
+ }
+}
+
+static void diagnose_failure_mtl(struct ivpu_device *vdev)
+{
+ u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK;
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, reg))
+ ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, reg)) {
+ u32 log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG);
+
+ ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
+ log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, log),
+ REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, log),
+ REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, log));
+ }
+}
+
+static void diagnose_failure_lnl(struct ivpu_device *vdev)
+{
+ u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK;
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, reg)) {
+ ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
+ REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1),
+ REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2));
+ }
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, reg))
+ ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG));
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, reg))
+ ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG));
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, reg))
+ ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n",
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW),
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH));
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, reg))
+ ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n",
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW),
+ REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH));
+
+ if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, reg))
+ ivpu_err(vdev, "Survivability IRQ\n");
+}
+
+void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+ return diagnose_failure_mtl(vdev);
+ else
+ return diagnose_failure_lnl(vdev);
+}
+
+int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev)
+{
+ u32 reg = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS);
+
+ return REG_GET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PLATFORM, reg);
+}
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
new file mode 100644
index 000000000000..c4c10e22f30f
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_BTRS_H__
+#define __IVPU_HW_BTRS_H__
+
+#include "ivpu_drv.h"
+#include "ivpu_hw_37xx_reg.h"
+#include "ivpu_hw_40xx_reg.h"
+#include "ivpu_hw_reg_io.h"
+
+#define PLL_PROFILING_FREQ_DEFAULT 38400000
+#define PLL_PROFILING_FREQ_HIGH 400000000
+
+#define DCT_DEFAULT_ACTIVE_PERCENT 30u
+#define DCT_PERIOD_US 35300u
+
+int ivpu_hw_btrs_info_init(struct ivpu_device *vdev);
+void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev);
+int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev);
+int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable);
+int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev);
+int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev);
+int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev);
+void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev);
+bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev);
+int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev);
+int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
+void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
+void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
+void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
+bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
+bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
+int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
+void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent);
+u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
+void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev);
+void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev);
+void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev);
+void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev);
+void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev);
+int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev);
+
+#endif /* __IVPU_HW_BTRS_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
new file mode 100644
index 000000000000..a81a9ba540fa
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_BTRS_LNL_REG_H__
+#define __IVPU_HW_BTRS_LNL_REG_H__
+
+#include <linux/bits.h>
+
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT 0x00000000u
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5)
+#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6)
+
+#define VPU_HW_BTRS_LNL_LOCAL_INT_MASK 0x00000004u
+#define VPU_HW_BTRS_LNL_GLOBAL_INT_MASK 0x00000008u
+
+#define VPU_HW_BTRS_LNL_HM_ATS 0x0000000cu
+
+#define VPU_HW_BTRS_LNL_ATS_ERR_LOG1 0x00000010u
+#define VPU_HW_BTRS_LNL_ATS_ERR_LOG2 0x00000014u
+#define VPU_HW_BTRS_LNL_ATS_ERR_CLEAR 0x00000018u
+
+#define VPU_HW_BTRS_LNL_CFI0_ERR_LOG 0x0000001cu
+#define VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR 0x00000020u
+
+#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u
+
+#define VPU_HW_BTRS_LNL_CFI1_ERR_LOG 0x00000040u
+#define VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR 0x00000044u
+
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW 0x00000048u
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH 0x0000004cu
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR 0x00000050u
+
+#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS 0x00000054u
+
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW 0x00000058u
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH 0x0000005cu
+#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR 0x00000060u
+
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS 0x00000070u
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_CMD_MASK GENMASK(7, 0)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM1_MASK GENMASK(15, 8)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM2_MASK GENMASK(23, 16)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM3_MASK GENMASK(31, 24)
+
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW 0x00000074u
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_CMD_MASK GENMASK(7, 0)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM1_MASK GENMASK(15, 8)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM2_MASK GENMASK(23, 16)
+#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM3_MASK GENMASK(31, 24)
+
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0 0x00000130u
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16)
+
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1 0x00000134u
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16)
+
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2 0x00000138u
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16)
+
+#define VPU_HW_BTRS_LNL_WP_REQ_CMD 0x0000013cu
+#define VPU_HW_BTRS_LNL_WP_REQ_CMD_SEND_MASK BIT_MASK(0)
+
+#define VPU_HW_BTRS_LNL_PLL_FREQ 0x00000148u
+#define VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK GENMASK(15, 0)
+
+#define VPU_HW_BTRS_LNL_CDYN 0x0000014cu
+#define VPU_HW_BTRS_LNL_CDYN_CDYN_MASK GENMASK(15, 0)
+
+#define VPU_HW_BTRS_LNL_TILE_FUSE 0x00000150u
+#define VPU_HW_BTRS_LNL_TILE_FUSE_VALID_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_LNL_TILE_FUSE_CONFIG_MASK GENMASK(6, 1)
+
+#define VPU_HW_BTRS_LNL_VPU_STATUS 0x00000154u
+#define VPU_HW_BTRS_LNL_VPU_STATUS_READY_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_IDLE_MASK BIT_MASK(1)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12)
+#define VPU_HW_BTRS_LNL_VPU_STATUS_PLATFORM_MASK GENMASK(31, 29)
+
+#define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u
+#define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0)
+
+#define VPU_HW_BTRS_LNL_D0I3_CONTROL 0x00000164u
+#define VPU_HW_BTRS_LNL_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_LNL_D0I3_CONTROL_I3_MASK BIT_MASK(2)
+
+#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET 0x00000168u
+#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE 0x0000016cu
+#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE 0x00000170u
+
+#define VPU_HW_BTRS_LNL_FMIN_FUSE 0x00000174u
+#define VPU_HW_BTRS_LNL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0)
+#define VPU_HW_BTRS_LNL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8)
+
+#define VPU_HW_BTRS_LNL_FMAX_FUSE 0x00000178u
+#define VPU_HW_BTRS_LNL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0)
+
+#endif /* __IVPU_HW_BTRS_LNL_REG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h
new file mode 100644
index 000000000000..e93d539e066f
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_BTRS_MTL_REG_H__
+#define __IVPU_HW_BTRS_MTL_REG_H__
+
+#include <linux/bits.h>
+
+#define VPU_HW_BTRS_MTL_INTERRUPT_TYPE 0x00000000u
+
+#define VPU_HW_BTRS_MTL_INTERRUPT_STAT 0x00000004u
+#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1)
+#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2)
+
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0 0x00000008u
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16)
+
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1 0x0000000cu
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0)
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16)
+
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2 0x00000010u
+#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0)
+
+#define VPU_HW_BTRS_MTL_WP_REQ_CMD 0x00000014u
+#define VPU_HW_BTRS_MTL_WP_REQ_CMD_SEND_MASK BIT_MASK(0)
+
+#define VPU_HW_BTRS_MTL_WP_DOWNLOAD 0x00000018u
+#define VPU_HW_BTRS_MTL_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0)
+
+#define VPU_HW_BTRS_MTL_CURRENT_PLL 0x0000001cu
+#define VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK GENMASK(15, 0)
+
+#define VPU_HW_BTRS_MTL_PLL_ENABLE 0x00000020u
+
+#define VPU_HW_BTRS_MTL_FMIN_FUSE 0x00000024u
+#define VPU_HW_BTRS_MTL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0)
+#define VPU_HW_BTRS_MTL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8)
+
+#define VPU_HW_BTRS_MTL_FMAX_FUSE 0x00000028u
+#define VPU_HW_BTRS_MTL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0)
+
+#define VPU_HW_BTRS_MTL_TILE_FUSE 0x0000002cu
+#define VPU_HW_BTRS_MTL_TILE_FUSE_VALID_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_MTL_TILE_FUSE_SKU_MASK GENMASK(3, 2)
+
+#define VPU_HW_BTRS_MTL_LOCAL_INT_MASK 0x00000030u
+#define VPU_HW_BTRS_MTL_GLOBAL_INT_MASK 0x00000034u
+
+#define VPU_HW_BTRS_MTL_PLL_STATUS 0x00000040u
+#define VPU_HW_BTRS_MTL_PLL_STATUS_LOCK_MASK BIT_MASK(1)
+
+#define VPU_HW_BTRS_MTL_VPU_STATUS 0x00000044u
+#define VPU_HW_BTRS_MTL_VPU_STATUS_READY_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_MTL_VPU_STATUS_IDLE_MASK BIT_MASK(1)
+
+#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL 0x00000060u
+#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0)
+#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2)
+
+#define VPU_HW_BTRS_MTL_VPU_IP_RESET 0x00000050u
+#define VPU_HW_BTRS_MTL_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0)
+
+#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET 0x00000080u
+#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE 0x00000084u
+#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE 0x00000088u
+
+#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_0 0x000000a0u
+#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_1 0x000000a4u
+#define VPU_HW_BTRS_MTL_ATS_ERR_CLEAR 0x000000a8u
+
+#define VPU_HW_BTRS_MTL_UFI_ERR_LOG 0x000000b0u
+#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0)
+#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12)
+#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20)
+
+#define VPU_HW_BTRS_MTL_UFI_ERR_CLEAR 0x000000b4u
+
+#endif /* __IVPU_HW_BTRS_MTL_REG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
new file mode 100644
index 000000000000..06aa1e7dc50b
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -0,0 +1,1199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_hw.h"
+#include "ivpu_hw_37xx_reg.h"
+#include "ivpu_hw_40xx_reg.h"
+#include "ivpu_hw_btrs.h"
+#include "ivpu_hw_ip.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_mmu.h"
+#include "ivpu_pm.h"
+
+#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
+
+#define TIM_SAFE_ENABLE 0xf1d0dead
+#define TIM_WATCHDOG_RESET_VALUE 0xffffffff
+
+#define ICB_0_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT)))
+
+#define ICB_1_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT)))
+
+#define ICB_0_1_IRQ_MASK_37XX ((((u64)ICB_1_IRQ_MASK_37XX) << 32) | ICB_0_IRQ_MASK_37XX)
+
+#define ICB_0_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT)))
+
+#define ICB_1_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT)))
+
+#define ICB_0_1_IRQ_MASK_40XX ((((u64)ICB_1_IRQ_MASK_40XX) << 32) | ICB_0_IRQ_MASK_40XX)
+
+#define ITF_FIREWALL_VIOLATION_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
+ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
+
+#define ITF_FIREWALL_VIOLATION_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
+ (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
+
+static int wait_for_ip_bar(struct ivpu_device *vdev)
+{
+ return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100);
+}
+
+static void host_ss_rst_clr(struct ivpu_device *vdev)
+{
+ u32 val = 0;
+
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val);
+}
+
+static int host_ss_noc_qreqn_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qreqn_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return host_ss_noc_qreqn_check_37xx(vdev, exp_val);
+ else
+ return host_ss_noc_qreqn_check_40xx(vdev, exp_val);
+}
+
+static int host_ss_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return host_ss_noc_qacceptn_check_37xx(vdev, exp_val);
+ else
+ return host_ss_noc_qacceptn_check_40xx(vdev, exp_val);
+}
+
+static int host_ss_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int host_ss_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return host_ss_noc_qdeny_check_37xx(vdev, exp_val);
+ else
+ return host_ss_noc_qdeny_check_40xx(vdev, exp_val);
+}
+
+static int top_noc_qrenqn_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qrenqn_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return top_noc_qrenqn_check_37xx(vdev, exp_val);
+ else
+ return top_noc_qrenqn_check_40xx(vdev, exp_val);
+}
+
+int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev)
+{
+ int ret;
+
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ ret = wait_for_ip_bar(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for NPU IP bar\n");
+ return ret;
+ }
+ host_ss_rst_clr(vdev);
+ }
+
+ ret = host_ss_noc_qreqn_check(vdev, 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed qreqn check: %d\n", ret);
+ return ret;
+ }
+
+ ret = host_ss_noc_qacceptn_check(vdev, 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
+ return ret;
+ }
+
+ ret = host_ss_noc_qdeny_check(vdev, 0x0);
+ if (ret)
+ ivpu_err(vdev, "Failed qdeny check %d\n", ret);
+
+ return ret;
+}
+
+static void idle_gen_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, val);
+}
+
+static void idle_gen_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val);
+
+ REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val);
+}
+
+void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ idle_gen_drive_37xx(vdev, true);
+ else
+ idle_gen_drive_40xx(vdev, true);
+}
+
+void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ idle_gen_drive_37xx(vdev, false);
+ else
+ idle_gen_drive_40xx(vdev, false);
+}
+
+static void
+pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status)
+{
+ u32 val;
+
+ val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val);
+ REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val);
+
+ val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, STATUS_DLY, status, val);
+ REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, val);
+}
+
+static void pwr_island_trickle_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
+}
+
+static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val);
+
+ REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
+}
+
+static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val);
+
+ REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val);
+}
+
+static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val);
+}
+
+static void pwr_island_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ pwr_island_trickle_drive_37xx(vdev, true);
+ ndelay(500);
+ pwr_island_drive_37xx(vdev, true);
+ } else {
+ pwr_island_trickle_drive_40xx(vdev, true);
+ ndelay(500);
+ pwr_island_drive_40xx(vdev, true);
+ }
+}
+
+static int wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, exp_val,
+ PWR_ISLAND_STATUS_TIMEOUT_US);
+ else
+ return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, exp_val,
+ PWR_ISLAND_STATUS_TIMEOUT_US);
+}
+
+static void pwr_island_isolation_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val);
+}
+
+static void pwr_island_isolation_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val);
+
+ REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val);
+}
+
+static void pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ pwr_island_isolation_drive_37xx(vdev, enable);
+ else
+ pwr_island_isolation_drive_40xx(vdev, enable);
+}
+
+static void pwr_island_isolation_disable(struct ivpu_device *vdev)
+{
+ pwr_island_isolation_drive(vdev, false);
+}
+
+static void host_ss_clk_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
+ } else {
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
+ }
+
+ REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val);
+}
+
+static void host_ss_clk_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val);
+ } else {
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val);
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val);
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val);
+ }
+
+ REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val);
+}
+
+static void host_ss_clk_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ host_ss_clk_drive_37xx(vdev, enable);
+ else
+ host_ss_clk_drive_40xx(vdev, enable);
+}
+
+static void host_ss_clk_enable(struct ivpu_device *vdev)
+{
+ host_ss_clk_drive(vdev, true);
+}
+
+static void host_ss_rst_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val);
+ } else {
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val);
+ }
+
+ REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val);
+}
+
+static void host_ss_rst_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val);
+ } else {
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val);
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val);
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val);
+ }
+
+ REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val);
+}
+
+static void host_ss_rst_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ host_ss_rst_drive_37xx(vdev, enable);
+ else
+ host_ss_rst_drive_40xx(vdev, enable);
+}
+
+static void host_ss_rst_enable(struct ivpu_device *vdev)
+{
+ host_ss_rst_drive(vdev, true);
+}
+
+static void host_ss_noc_qreqn_top_socmmio_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+ REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val);
+}
+
+static void host_ss_noc_qreqn_top_socmmio_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+ REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val);
+}
+
+static void host_ss_noc_qreqn_top_socmmio_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ host_ss_noc_qreqn_top_socmmio_drive_37xx(vdev, enable);
+ else
+ host_ss_noc_qreqn_top_socmmio_drive_40xx(vdev, enable);
+}
+
+static int host_ss_axi_drive(struct ivpu_device *vdev, bool enable)
+{
+ int ret;
+
+ host_ss_noc_qreqn_top_socmmio_drive(vdev, enable);
+
+ ret = host_ss_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed HOST SS NOC QACCEPTN check: %d\n", ret);
+ return ret;
+ }
+
+ ret = host_ss_noc_qdeny_check(vdev, 0x0);
+ if (ret)
+ ivpu_err(vdev, "Failed HOST SS NOC QDENY check: %d\n", ret);
+
+ return ret;
+}
+
+static void top_noc_qreqn_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ } else {
+ val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ }
+
+ REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val);
+}
+
+static void top_noc_qreqn_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
+
+ if (enable) {
+ val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ } else {
+ val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ }
+
+ REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val);
+}
+
+static void top_noc_qreqn_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ top_noc_qreqn_drive_37xx(vdev, enable);
+ else
+ top_noc_qreqn_drive_40xx(vdev, enable);
+}
+
+int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev)
+{
+ return host_ss_axi_drive(vdev, true);
+}
+
+static int top_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return top_noc_qacceptn_check_37xx(vdev, exp_val);
+ else
+ return top_noc_qacceptn_check_40xx(vdev, exp_val);
+}
+
+static int top_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY);
+
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return top_noc_qdeny_check_37xx(vdev, exp_val);
+ else
+ return top_noc_qdeny_check_40xx(vdev, exp_val);
+}
+
+static int top_noc_drive(struct ivpu_device *vdev, bool enable)
+{
+ int ret;
+
+ top_noc_qreqn_drive(vdev, enable);
+
+ ret = top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed TOP NOC QACCEPTN check: %d\n", ret);
+ return ret;
+ }
+
+ ret = top_noc_qdeny_check(vdev, 0x0);
+ if (ret)
+ ivpu_err(vdev, "Failed TOP NOC QDENY check: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev)
+{
+ return top_noc_drive(vdev, true);
+}
+
+static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
+ else
+ val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
+
+ REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val);
+}
+
+static void pwr_island_delay_set(struct ivpu_device *vdev)
+{
+ bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH;
+ u32 post, post1, post2, status;
+
+ if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX)
+ return;
+
+ switch (ivpu_device_id(vdev)) {
+ case PCI_DEVICE_ID_WCL:
+ case PCI_DEVICE_ID_PTL_P:
+ post = high ? 18 : 0;
+ post1 = 0;
+ post2 = 0;
+ status = high ? 46 : 3;
+ break;
+
+ case PCI_DEVICE_ID_NVL:
+ post = high ? 198 : 17;
+ post1 = 0;
+ post2 = high ? 198 : 17;
+ status = 0;
+ break;
+
+ default:
+ dump_stack();
+ ivpu_err(vdev, "Unknown device ID\n");
+ return;
+ }
+
+ pwr_island_delay_set_50xx(vdev, post, post1, post2, status);
+}
+
+int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev)
+{
+ int ret;
+
+ pwr_island_delay_set(vdev);
+ pwr_island_enable(vdev);
+
+ ret = wait_for_pwr_island_status(vdev, 0x1);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for power island status\n");
+ return ret;
+ }
+
+ ret = top_noc_qreqn_check(vdev, 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed TOP NOC QREQN check %d\n", ret);
+ return ret;
+ }
+
+ host_ss_clk_enable(vdev);
+ pwr_island_isolation_disable(vdev);
+ host_ss_rst_enable(vdev);
+
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ dpu_active_drive_37xx(vdev, true);
+
+ return ret;
+}
+
+u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
+ else
+ return REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
+}
+
+static void ivpu_hw_ip_snoop_disable_37xx(struct ivpu_device *vdev)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
+
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
+
+ if (ivpu_is_force_snoop_enabled(vdev))
+ val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
+ else
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
+
+ REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val);
+}
+
+static void ivpu_hw_ip_snoop_disable_40xx(struct ivpu_device *vdev)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES);
+
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
+
+ if (ivpu_is_force_snoop_enabled(vdev))
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
+
+ REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val);
+}
+
+void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return ivpu_hw_ip_snoop_disable_37xx(vdev);
+ else
+ return ivpu_hw_ip_snoop_disable_40xx(vdev);
+}
+
+static void ivpu_hw_ip_tbu_mmu_enable_37xx(struct ivpu_device *vdev)
+{
+ u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV);
+
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
+
+ REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val);
+}
+
+static void ivpu_hw_ip_tbu_mmu_enable_40xx(struct ivpu_device *vdev)
+{
+ u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV);
+
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
+
+ REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val);
+}
+
+void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return ivpu_hw_ip_tbu_mmu_enable_37xx(vdev);
+ else
+ return ivpu_hw_ip_tbu_mmu_enable_40xx(vdev);
+}
+
+static int soc_cpu_boot_37xx(struct ivpu_device *vdev)
+{
+ u32 val;
+
+ val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
+ val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
+
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+ val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+ val = vdev->fw->entry_point >> 9;
+ REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
+
+ val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val);
+ REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
+
+ ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n",
+ vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume");
+
+ return 0;
+}
+
+static int cpu_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static int cpu_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val)
+{
+ u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY);
+
+ if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val))
+ return -EIO;
+
+ return 0;
+}
+
+static void cpu_noc_top_mmio_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN);
+
+ if (enable)
+ val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val);
+ else
+ val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val);
+ REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val);
+}
+
+static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable)
+{
+ int ret;
+
+ cpu_noc_top_mmio_drive_40xx(vdev, enable);
+
+ ret = cpu_noc_qacceptn_check_40xx(vdev, enable ? 0x1 : 0x0);
+ if (ret) {
+ ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
+ return ret;
+ }
+
+ ret = cpu_noc_qdeny_check_40xx(vdev, 0x0);
+ if (ret)
+ ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
+
+ return ret;
+}
+
+static int soc_cpu_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX)
+ return 0;
+
+ return soc_cpu_drive_40xx(vdev, true);
+}
+
+static int soc_cpu_boot_40xx(struct ivpu_device *vdev)
+{
+ int ret;
+ u32 val;
+ u64 val64;
+
+ ret = soc_cpu_enable(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret);
+ return ret;
+ }
+
+ val64 = vdev->fw->entry_point;
+ val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1;
+ REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64);
+
+ val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO);
+ val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val);
+ REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val);
+
+ ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n",
+ ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume");
+
+ return 0;
+}
+
+int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return soc_cpu_boot_37xx(vdev);
+ else
+ return soc_cpu_boot_40xx(vdev);
+}
+
+static void wdt_disable_37xx(struct ivpu_device *vdev)
+{
+ u32 val;
+
+ /* Enable writing and set non-zero WDT value */
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+
+ /* Enable writing and disable watchdog timer */
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0);
+
+ /* Now clear the timeout interrupt */
+ val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG);
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
+}
+
+static void wdt_disable_40xx(struct ivpu_device *vdev)
+{
+ u32 val;
+
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0);
+
+ val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG);
+ val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
+}
+
+void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return wdt_disable_37xx(vdev);
+ else
+ return wdt_disable_40xx(vdev);
+}
+
+static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev)
+{
+ u32 count = readl(vdev->regv + VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
+
+ return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
+}
+
+static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev)
+{
+ u32 count = readl(vdev->regv + VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
+
+ return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
+}
+
+u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return ipc_rx_count_get_37xx(vdev);
+ else
+ return ipc_rx_count_get_40xx(vdev);
+}
+
+void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_37XX);
+ REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_37XX);
+ } else {
+ REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_40XX);
+ REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_40XX);
+ }
+}
+
+void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull);
+ REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0);
+ } else {
+ REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull);
+ REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul);
+ }
+}
+
+static void diagnose_failure_37xx(struct ivpu_device *vdev)
+{
+ u32 reg = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX;
+
+ if (ipc_rx_count_get_37xx(vdev))
+ ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ");
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg))
+ ivpu_err(vdev, "WDT MSS timeout detected\n");
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg))
+ ivpu_err(vdev, "WDT NCE timeout detected\n");
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg))
+ ivpu_err(vdev, "NOC Firewall irq detected\n");
+}
+
+static void diagnose_failure_40xx(struct ivpu_device *vdev)
+{
+ u32 reg = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX;
+
+ if (ipc_rx_count_get_40xx(vdev))
+ ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ");
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg))
+ ivpu_err(vdev, "WDT MSS timeout detected\n");
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg))
+ ivpu_err(vdev, "WDT NCE timeout detected\n");
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg))
+ ivpu_err(vdev, "NOC Firewall irq detected\n");
+}
+
+void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ diagnose_failure_37xx(vdev);
+ else
+ diagnose_failure_40xx(vdev);
+}
+
+void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_37XX);
+ else
+ REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_40XX);
+}
+
+static void irq_wdt_nce_handler(struct ivpu_device *vdev)
+{
+ ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
+}
+
+static void irq_wdt_mss_handler(struct ivpu_device *vdev)
+{
+ ivpu_hw_ip_wdt_disable(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
+}
+
+static void irq_noc_firewall_handler(struct ivpu_device *vdev)
+{
+ atomic_inc(&vdev->hw->firewall_irq_counter);
+
+ ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n",
+ atomic_read(&vdev->hw->firewall_irq_counter));
+}
+
+/* Handler for IRQs from NPU core */
+bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq)
+{
+ u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX;
+
+ if (!status)
+ return false;
+
+ REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
+ ivpu_mmu_irq_evtq_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
+ ivpu_ipc_irq_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
+ ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status))
+ ivpu_mmu_irq_gerr_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status))
+ irq_wdt_mss_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status))
+ irq_wdt_nce_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
+ irq_noc_firewall_handler(vdev);
+
+ return true;
+}
+
+/* Handler for IRQs from NPU core */
+bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq)
+{
+ u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX;
+
+ if (!status)
+ return false;
+
+ REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
+ ivpu_mmu_irq_evtq_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
+ ivpu_ipc_irq_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
+ ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status))
+ ivpu_mmu_irq_gerr_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status))
+ irq_wdt_mss_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status))
+ irq_wdt_nce_handler(vdev);
+
+ if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
+ irq_noc_firewall_handler(vdev);
+
+ return true;
+}
+
+static void db_set_37xx(struct ivpu_device *vdev, u32 db_id)
+{
+ u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0;
+ u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET);
+
+ REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+}
+
+static void db_set_40xx(struct ivpu_device *vdev, u32 db_id)
+{
+ u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0;
+ u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET);
+
+ REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+}
+
+void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ db_set_37xx(vdev, db_id);
+ else
+ db_set_40xx(vdev, db_id);
+}
+
+u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM);
+ else
+ return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM);
+}
+
+void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
+{
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+ else
+ REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+}
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.h b/drivers/accel/ivpu/ivpu_hw_ip.h
new file mode 100644
index 000000000000..5b1b391aa577
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_ip.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_IP_H__
+#define __IVPU_HW_IP_H__
+
+#include "ivpu_drv.h"
+
+int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev);
+void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev);
+void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev);
+int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev);
+int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev);
+int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev);
+u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev);
+void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev);
+void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev);
+int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev);
+void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev);
+void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev);
+u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev);
+void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev);
+bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq);
+bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq);
+void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id);
+u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev);
+void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr);
+void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev);
+void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev);
+void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev);
+void ivpu_hw_ip_fabric_req_override_enable_50xx(struct ivpu_device *vdev);
+void ivpu_hw_ip_fabric_req_override_disable_50xx(struct ivpu_device *vdev);
+
+#endif /* __IVPU_HW_IP_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h
index 79b3f441eac4..66259b0ead02 100644
--- a/drivers/accel/ivpu/ivpu_hw_reg_io.h
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@@ -7,6 +7,7 @@
#define __IVPU_HW_REG_IO_H__
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -16,13 +17,11 @@
#define REG_IO_ERROR 0xffffffff
#define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__)
-#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg))
#define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__)
#define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__)
-#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg))
#define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__)
#define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__)
#define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__)
@@ -47,31 +46,42 @@
#define REG_TEST_FLD_NUM(REG, FLD, num, val) \
((num) == FIELD_GET(REG##_##FLD##_MASK, val))
-#define REGB_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
-
-#define REGV_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
+#define REGB_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regb, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+#define REGV_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regv, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+extern struct fault_attr ivpu_hw_failure;
+
+static inline int __must_check
+ivpu_hw_reg_poll_fld(struct ivpu_device *vdev, void __iomem *base,
+ u32 reg_offset, u32 reg_mask, u32 exp_masked_val, u32 timeout_us,
+ const char *func_name, const char *reg_name, const char *fld_name)
+{
+ u32 reg_val;
+ int ret;
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s started (exp_val 0x%x)\n",
+ func_name, reg_name, reg_offset, fld_name, exp_masked_val);
+
+ ret = read_poll_timeout(readl, reg_val, (reg_val & reg_mask) == exp_masked_val,
+ REG_POLL_SLEEP_US, timeout_us, false, base + reg_offset);
+
+#ifdef CONFIG_FAULT_INJECTION
+ if (should_fail(&ivpu_hw_failure, 1))
+ ret = -ETIMEDOUT;
+#endif
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s %s (reg_val 0x%08x)\n",
+ func_name, reg_name, reg_offset, fld_name, ret ? "ETIMEDOUT" : "OK", reg_val);
+
+ return ret;
+}
static inline u32
ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 56ff067f63e2..1f13bf95b2b3 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -15,6 +15,7 @@
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
#define IPC_MAX_RX_MSG 128
@@ -129,7 +130,7 @@ static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr)
static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
{
- ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
+ ivpu_hw_ipc_tx_set(vdev, vpu_addr);
}
static void
@@ -140,7 +141,6 @@ ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_rx_msg *rx_msg;
lockdep_assert_held(&ipc->cons_lock);
- lockdep_assert_irqs_disabled();
rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
if (!rx_msg) {
@@ -210,8 +210,7 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
}
-static int
-ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req)
+int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
int ret;
@@ -228,6 +227,7 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v
goto unlock;
ivpu_ipc_tx(vdev, cons->tx_vpu_addr);
+ trace_jsm("[tx]", req);
unlock:
mutex_unlock(&ipc->lock);
@@ -279,12 +279,13 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
- ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
+ ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
ret = -EBADMSG;
}
if (jsm_msg)
memcpy(jsm_msg, rx_msg->jsm_msg, size);
+ trace_jsm("[rx]", rx_msg->jsm_msg);
}
ivpu_ipc_rx_msg_del(vdev, rx_msg);
@@ -292,15 +293,17 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
return ret;
}
-static int
+int
ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms)
+ struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms)
{
struct ivpu_ipc_consumer cons;
int ret;
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) &&
+ pm_runtime_enabled(vdev->drm.dev));
+
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
ret = ivpu_ipc_send(vdev, &cons, req);
@@ -326,19 +329,21 @@ consumer_del:
return ret;
}
-int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms)
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
{
struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
struct vpu_jsm_msg hb_resp;
int ret, hb_ret;
- drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
if (ret != -ETIMEDOUT)
- return ret;
+ goto rpm_put;
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@@ -346,21 +351,33 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
if (hb_ret == -ETIMEDOUT)
ivpu_pm_trigger_recovery(vdev, "IPC timeout");
+rpm_put:
+ ivpu_rpm_put(vdev);
return ret;
}
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms)
+int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ u32 channel, unsigned long timeout_ms)
{
+ struct ivpu_ipc_consumer cons;
int ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return ret;
- ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
+ ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
+ ret = ivpu_ipc_send(vdev, &cons, req);
+ if (ret) {
+ ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
+ goto consumer_del;
+ }
+
+ msleep(timeout_ms);
+
+consumer_del:
+ ivpu_ipc_consumer_del(vdev, &cons);
ivpu_rpm_put(vdev);
return ret;
}
@@ -378,7 +395,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
return false;
}
-void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons;
@@ -392,8 +409,8 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
* Driver needs to purge all messages from IPC FIFO to clear IPC interrupt.
* Without purge IPC FIFO to 0 next IPC interrupts won't be generated.
*/
- while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
- vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
+ while (ivpu_hw_ipc_rx_count_get(vdev)) {
+ vpu_addr = ivpu_hw_ipc_rx_addr_get(vdev);
if (vpu_addr == REG_IO_ERROR) {
ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
return;
@@ -442,12 +459,12 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
}
}
- if (wake_thread)
- *wake_thread = !list_empty(&ipc->cb_msg_list);
+ queue_work(system_percpu_wq, &vdev->irq_ipc_work);
}
-irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_work_fn(struct work_struct *work)
{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_ipc_work);
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
struct list_head cb_msg_list;
@@ -462,8 +479,6 @@ irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
ivpu_ipc_rx_msg_del(vdev, rx_msg);
}
-
- return IRQ_HANDLED;
}
int ivpu_ipc_init(struct ivpu_device *vdev)
@@ -520,7 +535,6 @@ void ivpu_ipc_fini(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
- drm_WARN_ON(&vdev->drm, ipc->on);
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 40ca3cc4e61f..b524a1985b9d 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_IPC_H__
@@ -89,22 +89,25 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
void ivpu_ipc_disable(struct ivpu_device *vdev);
void ivpu_ipc_reset(struct ivpu_device *vdev);
-void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
-irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_work_fn(struct work_struct *work);
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 channel, ivpu_ipc_rx_callback_t callback);
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
+int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+ struct vpu_jsm_msg *req);
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
unsigned long timeout_ms);
-
-int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms);
+int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp_type,
+ struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms);
+int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ u32 channel, unsigned long timeout_ms);
#endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index a49bc9105ed0..4f8564e2878a 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <drm/drm_file.h>
@@ -8,158 +8,381 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/module.h>
#include <uapi/drm/ivpu_accel.h>
#include "ivpu_drv.h"
+#include "ivpu_fw.h"
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
+#include "ivpu_mmu.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
+#include "vpu_boot_api.h"
#define CMD_BUF_IDX 0
-#define JOB_ID_JOB_MASK GENMASK(7, 0)
-#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
{
- ivpu_hw_reg_db_set(vdev, cmdq->db_id);
+ ivpu_hw_db_set(vdev, cmdq->db_id);
}
-static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
+static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
+ struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+ if (ivpu_fw_preempt_buf_size(vdev) == 0)
+ return 0;
+
+ cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user,
+ vdev->fw->primary_preempt_buf_size,
+ DRM_IVPU_BO_WC);
+ if (!cmdq->primary_preempt_buf) {
+ ivpu_err(vdev, "Failed to create primary preemption buffer\n");
+ return -ENOMEM;
+ }
+
+ cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma,
+ vdev->fw->secondary_preempt_buf_size,
+ DRM_IVPU_BO_WC);
+ if (!cmdq->secondary_preempt_buf) {
+ ivpu_err(vdev, "Failed to create secondary preemption buffer\n");
+ goto err_free_primary;
+ }
+
+ return 0;
+
+err_free_primary:
+ ivpu_bo_free(cmdq->primary_preempt_buf);
+ cmdq->primary_preempt_buf = NULL;
+ return -ENOMEM;
+}
+
+static void ivpu_preemption_buffers_free(struct ivpu_device *vdev,
+ struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+ if (cmdq->primary_preempt_buf)
+ ivpu_bo_free(cmdq->primary_preempt_buf);
+ if (cmdq->secondary_preempt_buf)
+ ivpu_bo_free(cmdq->secondary_preempt_buf);
+}
+
+static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv,
+ struct ivpu_cmdq *cmdq, struct ivpu_job *job)
+{
+ int ret;
+
+ /* Use preemption buffer provided by the user space */
+ if (job->primary_preempt_buf)
+ return 0;
+
+ if (!cmdq->primary_preempt_buf) {
+ /* Allocate per command queue preemption buffers */
+ ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
+ if (ret)
+ return ret;
+ }
+
+ /* Use preemption buffers allocated by the kernel */
+ job->primary_preempt_buf = cmdq->primary_preempt_buf;
+ job->secondary_preempt_buf = cmdq->secondary_preempt_buf;
+
+ return 0;
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
{
- struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB};
struct ivpu_device *vdev = file_priv->vdev;
- struct vpu_job_queue_header *jobq_header;
struct ivpu_cmdq *cmdq;
- int ret;
cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
if (!cmdq)
return NULL;
- ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
- goto err_free_cmdq;
- }
-
cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
- goto err_erase_xa;
-
- cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) /
- sizeof(struct vpu_job_queue_entry));
-
- cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
- jobq_header = &cmdq->jobq->header;
- jobq_header->engine_idx = engine;
- jobq_header->head = 0;
- jobq_header->tail = 0;
- wmb(); /* Flush WC buffer for jobq->header */
+ goto err_free_cmdq;
return cmdq;
-err_erase_xa:
- xa_erase(&vdev->db_xa, cmdq->db_id);
err_free_cmdq:
kfree(cmdq);
return NULL;
}
-static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+/**
+ * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue.
+ * @cmdq: Pointer to the command queue structure.
+ *
+ * Returns the number of entries that can fit in the command queue memory.
+ */
+static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
{
- if (!cmdq)
- return;
+ size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
+
+ return size / sizeof(struct vpu_job_queue_entry);
+}
+
+/**
+ * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode.
+ * @vdev: Pointer to the ivpu device structure.
+ * @flags: Input flags to determine the command queue flags.
+ *
+ * Returns the calculated command queue flags, considering both the input flags
+ * and the current test mode settings.
+ */
+static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags)
+{
+ u32 cmdq_flags = 0;
+
+ if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX))
+ cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+ /* Test mode can override the TURBO flag coming from the application */
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE)
+ cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE)
+ cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+ return cmdq_flags;
+}
+
+static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+ ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
ivpu_bo_free(cmdq->mem);
- xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
kfree(cmdq);
}
-static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine)
+static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags)
{
struct ivpu_device *vdev = file_priv->vdev;
- struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+ struct ivpu_cmdq *cmdq = NULL;
int ret;
lockdep_assert_held(&file_priv->lock);
+ cmdq = ivpu_cmdq_alloc(file_priv);
if (!cmdq) {
- cmdq = ivpu_cmdq_alloc(file_priv, engine);
- if (!cmdq)
- return NULL;
- file_priv->cmdq[engine] = cmdq;
+ ivpu_err(vdev, "Failed to allocate command queue\n");
+ return NULL;
}
+ ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
+ &file_priv->cmdq_id_next, GFP_KERNEL);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret);
+ goto err_free_cmdq;
+ }
+
+ cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
+ cmdq->priority = priority;
- if (cmdq->db_registered)
- return cmdq;
+ cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
+ cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
+ cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags);
+
+ ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n",
+ cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags);
+ return cmdq;
+
+err_free_cmdq:
+ ivpu_cmdq_free(file_priv, cmdq);
+ return NULL;
+}
+
+static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine,
+ u8 priority)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ int ret;
- ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
- cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
+ ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id,
+ task_pid_nr(current), engine,
+ cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (ret)
- return NULL;
+ return ret;
- cmdq->db_registered = true;
+ ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id,
+ priority);
+ if (ret)
+ return ret;
- return cmdq;
+ return 0;
+}
+
+static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ int ret;
+
+ ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next,
+ GFP_KERNEL);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to allocate doorbell ID: %d\n", ret);
+ return ret;
+ }
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id,
+ cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
+ else
+ ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
+ cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
+
+ if (!ret) {
+ ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n",
+ cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority);
+ } else {
+ xa_erase(&vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
+ }
+
+ return ret;
+}
+
+static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
+{
+ jobq->header.head = 0;
+ jobq->header.tail = 0;
+
+ wmb(); /* Flush WC buffer for jobq->header */
}
-static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine)
+static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+ struct ivpu_device *vdev = file_priv->vdev;
+ int ret;
lockdep_assert_held(&file_priv->lock);
- if (cmdq) {
- file_priv->cmdq[engine] = NULL;
- if (cmdq->db_registered)
- ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id);
+ if (cmdq->db_id)
+ return 0;
- ivpu_cmdq_free(file_priv, cmdq);
+ ivpu_cmdq_jobq_reset(vdev, cmdq->jobq);
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
+ if (ret)
+ return ret;
}
+
+ ret = ivpu_register_db(file_priv, cmdq);
+ if (ret)
+ return ret;
+
+ return 0;
}
-void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
+static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- int i;
+ struct ivpu_device *vdev = file_priv->vdev;
+ int ret;
lockdep_assert_held(&file_priv->lock);
- for (i = 0; i < IVPU_NUM_ENGINES; i++)
- ivpu_cmdq_release_locked(file_priv, i);
+ if (!cmdq->db_id)
+ return 0;
+
+ ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
+ if (!ret)
+ ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id);
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
+ if (!ret)
+ ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n",
+ cmdq->id, file_priv->ctx.id);
+ }
+
+ xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
+
+ return 0;
}
-/*
- * Mark the doorbell as unregistered and reset job queue pointers.
- * This function needs to be called when the VPU hardware is restarted
- * and FW loses job queue state. The next time job queue is used it
- * will be registered again.
- */
-static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
+static inline u8 ivpu_job_to_jsm_priority(u8 priority)
+{
+ if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT)
+ return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL;
+
+ return priority - 1;
+}
+
+static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+ ivpu_cmdq_unregister(file_priv, cmdq);
+ xa_erase(&file_priv->cmdq_xa, cmdq->id);
+ ivpu_cmdq_free(file_priv, cmdq);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority)
+{
+ struct ivpu_cmdq *cmdq;
+ unsigned long id;
lockdep_assert_held(&file_priv->lock);
- if (cmdq) {
- cmdq->db_registered = false;
- cmdq->jobq->header.head = 0;
- cmdq->jobq->header.tail = 0;
- wmb(); /* Flush WC buffer for jobq header */
+ xa_for_each(&file_priv->cmdq_xa, id, cmdq)
+ if (cmdq->is_legacy && cmdq->priority == priority)
+ break;
+
+ if (!cmdq) {
+ cmdq = ivpu_cmdq_create(file_priv, priority, 0);
+ if (!cmdq)
+ return NULL;
+ cmdq->is_legacy = true;
+ }
+
+ return cmdq;
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq;
+
+ lockdep_assert_held(&file_priv->lock);
+
+ cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id);
+ if (!cmdq) {
+ ivpu_dbg(vdev, IOCTL, "Failed to find command queue with ID: %u\n", cmdq_id);
+ return NULL;
}
+
+ return cmdq;
+}
+
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
+{
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
+
+ lockdep_assert_held(&file_priv->lock);
+
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_destroy(file_priv, cmdq);
}
-static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv)
+/*
+ * Mark the doorbell as unregistered
+ * This function needs to be called when the VPU hardware is restarted
+ * and FW loses job queue state. The next time job queue is used it
+ * will be registered again.
+ */
+static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv)
{
- int i;
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
mutex_lock(&file_priv->lock);
- for (i = 0; i < IVPU_NUM_ENGINES; i++)
- ivpu_cmdq_reset_locked(file_priv, i);
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) {
+ xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
+ cmdq->db_id = 0;
+ }
mutex_unlock(&file_priv->lock);
}
@@ -172,10 +395,29 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
mutex_lock(&vdev->context_list_lock);
xa_for_each(&vdev->context_xa, ctx_id, file_priv)
- ivpu_cmdq_reset_all(file_priv);
+ ivpu_cmdq_reset(file_priv);
mutex_unlock(&vdev->context_list_lock);
+}
+void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
+{
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
+
+ lockdep_assert_held(&file_priv->lock);
+ ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id);
+
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_unregister(file_priv, cmdq);
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
+ ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+
+ ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id);
+
+ file_priv->aborted = true;
}
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -188,17 +430,28 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
/* Check if there is space left in job queue */
if (next_entry == header->head) {
- ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
- job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+ ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n",
+ job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail);
return -EBUSY;
}
- entry = &cmdq->jobq->job[tail];
+ entry = &cmdq->jobq->slot[tail].job;
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
+
+ if (job->primary_preempt_buf) {
+ entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr;
+ entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf);
+ }
+
+ if (job->secondary_preempt_buf) {
+ entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr;
+ entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf);
+ }
+
wmb(); /* Ensure that tail is updated after filling entry */
header->tail = next_entry;
wmb(); /* Flush WC buffer for jobq header */
@@ -254,8 +507,8 @@ static void ivpu_job_destroy(struct ivpu_job *job)
struct ivpu_device *vdev = job->vdev;
u32 i;
- ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
- job->job_id, job->file_priv->ctx.id, job->engine_idx);
+ ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx);
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
@@ -281,12 +534,13 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
job->bo_count = bo_count;
job->done_fence = ivpu_fence_create(vdev);
if (!job->done_fence) {
- ivpu_warn_ratelimited(vdev, "Failed to create a fence\n");
+ ivpu_err(vdev, "Failed to create a fence\n");
goto err_free_job;
}
job->file_priv = ivpu_file_priv_get(file_priv);
+ trace_job("create", job);
ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
return job;
@@ -295,27 +549,94 @@ err_free_job:
return NULL;
}
-static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id)
{
struct ivpu_job *job;
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
job = xa_erase(&vdev->submitted_jobs_xa, job_id);
+ if (xa_empty(&vdev->submitted_jobs_xa) && job) {
+ vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts),
+ vdev->busy_time);
+ }
+
+ return job;
+}
+
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ switch (job_status) {
+ case VPU_JSM_STATUS_PROCESSING_ERR:
+ case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX:
+ {
+ struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id);
+
+ if (!job)
+ return false;
+
+ /* Trigger an engine reset */
+ guard(mutex)(&job->file_priv->lock);
+
+ job->job_status = job_status;
+
+ if (job->file_priv->has_mmu_faults)
+ return false;
+
+ /*
+ * Mark context as faulty and defer destruction of the job to jobs abort thread
+ * handler to synchronize between both faults and jobs returning context violation
+ * status and ensure both are handled in the same way
+ */
+ job->file_priv->has_mmu_faults = true;
+ queue_work(system_percpu_wq, &vdev->context_abort_work);
+ return true;
+ }
+ default:
+ /* Complete job with error status, engine reset not required */
+ break;
+ }
+
+ return false;
+}
+
+static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+ struct ivpu_job *job;
+
+ lockdep_assert_held(&vdev->submitted_jobs_lock);
+
+ job = xa_load(&vdev->submitted_jobs_xa, job_id);
if (!job)
return -ENOENT;
- if (job->file_priv->has_mmu_faults)
- job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ ivpu_job_remove_from_submitted_jobs(vdev, job_id);
- job->bos[CMD_BUF_IDX]->job_status = job_status;
+ if (job->job_status == VPU_JSM_STATUS_SUCCESS) {
+ if (job->file_priv->has_mmu_faults)
+ job->job_status = DRM_IVPU_JOB_STATUS_ABORTED;
+ else
+ job->job_status = job_status;
+ }
+
+ job->bos[CMD_BUF_IDX]->job_status = job->job_status;
dma_fence_signal(job->done_fence);
- ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
- job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ trace_job("done", job);
+ ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
+ job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx,
+ job->job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
ivpu_rpm_put(vdev);
+
+ if (!xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
+
return 0;
}
@@ -324,42 +645,75 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
struct ivpu_job *job;
unsigned long id;
+ mutex_lock(&vdev->submitted_jobs_lock);
+
xa_for_each(&vdev->submitted_jobs_xa, id, job)
ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
-static int ivpu_job_submit(struct ivpu_job *job)
+void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id)
+{
+ struct ivpu_job *job;
+ unsigned long id;
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+
+ xa_for_each(&vdev->submitted_jobs_xa, id, job)
+ if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id)
+ ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
+}
+
+static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = job->vdev;
- struct xa_limit job_id_range;
struct ivpu_cmdq *cmdq;
+ bool is_first_job;
int ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return ret;
+ mutex_lock(&vdev->submitted_jobs_lock);
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
+ if (cmdq_id == 0)
+ cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority);
+ else
+ cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id);
if (!cmdq) {
- ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n",
- file_priv->ctx.id, job->engine_idx);
ret = -EINVAL;
- goto err_unlock_file_priv;
+ goto err_unlock;
}
- job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
- job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
+ ret = ivpu_cmdq_register(file_priv, cmdq);
+ if (ret) {
+ ivpu_err(vdev, "Failed to register command queue: %d\n", ret);
+ goto err_unlock;
+ }
- xa_lock(&vdev->submitted_jobs_xa);
- ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+ ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job);
if (ret) {
+ ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n",
+ job->job_id, ret);
+ goto err_unlock;
+ }
+
+ job->cmdq_id = cmdq->id;
+
+ is_first_job = xa_empty(&vdev->submitted_jobs_xa);
+ ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
+ &file_priv->job_id_next, GFP_KERNEL);
+ if (ret < 0) {
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
file_priv->ctx.id);
ret = -EBUSY;
- goto err_unlock_submitted_jobs_xa;
+ goto err_unlock;
}
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -373,36 +727,39 @@ static int ivpu_job_submit(struct ivpu_job *job)
wmb(); /* Flush WC buffer for jobq header */
} else {
ivpu_cmdq_ring_db(vdev, cmdq);
+ if (is_first_job)
+ vdev->busy_start_ts = ktime_get();
}
- ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n",
- job->job_id, file_priv->ctx.id, job->engine_idx,
+ trace_job("submit", job);
+ ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n",
+ job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
- xa_unlock(&vdev->submitted_jobs_xa);
-
mutex_unlock(&file_priv->lock);
- if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+ }
+
+ mutex_unlock(&vdev->submitted_jobs_lock);
return 0;
err_erase_xa:
- __xa_erase(&vdev->submitted_jobs_xa, job->job_id);
-err_unlock_submitted_jobs_xa:
- xa_unlock(&vdev->submitted_jobs_xa);
-err_unlock_file_priv:
+ xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_unlock:
mutex_unlock(&file_priv->lock);
+ mutex_unlock(&vdev->submitted_jobs_lock);
ivpu_rpm_put(vdev);
return ret;
}
static int
ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
- u32 buf_count, u32 commands_offset)
+ u32 buf_count, u32 commands_offset, u32 preempt_buffer_index)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = file_priv->vdev;
struct ww_acquire_ctx acquire_ctx;
enum dma_resv_usage usage;
@@ -413,40 +770,58 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
for (i = 0; i < buf_count; i++) {
struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]);
- if (!obj)
+ if (!obj) {
+ ivpu_dbg(vdev, IOCTL, "Failed to lookup GEM object with handle %u\n",
+ buf_handles[i]);
return -ENOENT;
+ }
job->bos[i] = to_ivpu_bo(obj);
- ret = ivpu_bo_pin(job->bos[i]);
+ ret = ivpu_bo_bind(job->bos[i]);
if (ret)
return ret;
}
bo = job->bos[CMD_BUF_IDX];
if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
- ivpu_warn(vdev, "Buffer is already in use\n");
+ ivpu_dbg(vdev, IOCTL, "Buffer is already in use by another job\n");
return -EBUSY;
}
if (commands_offset >= ivpu_bo_size(bo)) {
- ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u for buffer size %zu\n",
+ commands_offset, ivpu_bo_size(bo));
return -EINVAL;
}
job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
+ if (preempt_buffer_index) {
+ struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index];
+
+ if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) {
+ ivpu_dbg(vdev, IOCTL, "Preemption buffer is too small\n");
+ return -EINVAL;
+ }
+ if (ivpu_bo_is_mappable(preempt_bo)) {
+ ivpu_dbg(vdev, IOCTL, "Preemption buffer cannot be mappable\n");
+ return -EINVAL;
+ }
+ job->primary_preempt_buf = preempt_bo;
+ }
+
ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
&acquire_ctx);
if (ret) {
- ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
+ ivpu_warn_ratelimited(vdev, "Failed to lock reservations: %d\n", ret);
return ret;
}
for (i = 0; i < buf_count; i++) {
ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
if (ret) {
- ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+ ivpu_warn_ratelimited(vdev, "Failed to reserve fences: %d\n", ret);
goto unlock_reservations;
}
}
@@ -464,40 +839,20 @@ unlock_reservations:
return ret;
}
-int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id,
+ u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset,
+ u32 preempt_buffer_index, u8 priority)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
- struct drm_ivpu_submit *params = data;
struct ivpu_job *job;
u32 *buf_handles;
int idx, ret;
- if (params->engine > DRM_IVPU_ENGINE_COPY)
- return -EINVAL;
-
- if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
- return -EINVAL;
-
- if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
- return -EINVAL;
-
- if (!IS_ALIGNED(params->commands_offset, 8))
- return -EINVAL;
-
- if (!file_priv->ctx.id)
- return -EINVAL;
-
- if (file_priv->has_mmu_faults)
- return -EBADFD;
-
- buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL);
+ buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL);
if (!buf_handles)
return -ENOMEM;
- ret = copy_from_user(buf_handles,
- (void __user *)params->buffers_ptr,
- params->buffer_count * sizeof(u32));
+ ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32));
if (ret) {
ret = -EFAULT;
goto err_free_handles;
@@ -508,25 +863,22 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_free_handles;
}
- ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
- file_priv->ctx.id, params->buffer_count);
+ ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n",
+ file_priv->ctx.id, cmdq_id, buffer_count);
- job = ivpu_job_create(file_priv, params->engine, params->buffer_count);
+ job = ivpu_job_create(file_priv, engine, buffer_count);
if (!job) {
- ivpu_err(vdev, "Failed to create job\n");
ret = -ENOMEM;
goto err_exit_dev;
}
- ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
- params->commands_offset);
- if (ret) {
- ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
+ ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset,
+ preempt_buffer_index);
+ if (ret)
goto err_destroy_job;
- }
down_read(&vdev->pm->reset_lock);
- ret = ivpu_job_submit(job);
+ ret = ivpu_job_submit(job, priority, cmdq_id);
up_read(&vdev->pm->reset_lock);
if (ret)
goto err_signal_fence;
@@ -546,12 +898,176 @@ err_free_handles:
return ret;
}
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_submit *args = data;
+ u8 priority;
+
+ if (args->engine != DRM_IVPU_ENGINE_COMPUTE) {
+ ivpu_dbg(vdev, IOCTL, "Invalid engine %d\n", args->engine);
+ return -EINVAL;
+ }
+
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) {
+ ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority);
+ return -EINVAL;
+ }
+
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) {
+ ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count);
+ return -EINVAL;
+ }
+
+ if (!IS_ALIGNED(args->commands_offset, 8)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset);
+ return -EINVAL;
+ }
+
+ if (!file_priv->ctx.id) {
+ ivpu_dbg(vdev, IOCTL, "Context not initialized\n");
+ return -EINVAL;
+ }
+
+ if (file_priv->has_mmu_faults) {
+ ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id);
+ return -EBADFD;
+ }
+
+ priority = ivpu_job_to_jsm_priority(args->priority);
+
+ return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine,
+ (void __user *)args->buffers_ptr, args->commands_offset, 0, priority);
+}
+
+int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_cmdq_submit *args = data;
+
+ if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
+ return -ENODEV;
+ }
+
+ if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) {
+ ivpu_dbg(vdev, IOCTL, "Invalid command queue ID %u\n", args->cmdq_id);
+ return -EINVAL;
+ }
+
+ if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) {
+ ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count);
+ return -EINVAL;
+ }
+
+ if (args->preempt_buffer_index >= args->buffer_count) {
+ ivpu_dbg(vdev, IOCTL, "Invalid preemption buffer index %u\n",
+ args->preempt_buffer_index);
+ return -EINVAL;
+ }
+
+ if (!IS_ALIGNED(args->commands_offset, 8)) {
+ ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset);
+ return -EINVAL;
+ }
+
+ if (!file_priv->ctx.id) {
+ ivpu_dbg(vdev, IOCTL, "Context not initialized\n");
+ return -EINVAL;
+ }
+
+ if (file_priv->has_mmu_faults) {
+ ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id);
+ return -EBADFD;
+ }
+
+ return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE,
+ (void __user *)args->buffers_ptr, args->commands_offset,
+ args->preempt_buffer_index, 0);
+}
+
+int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_cmdq_create *args = data;
+ struct ivpu_cmdq *cmdq;
+ int ret;
+
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
+ return -ENODEV;
+ }
+
+ if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) {
+ ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority);
+ return -EINVAL;
+ }
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->lock);
+
+ cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags);
+ if (cmdq)
+ args->cmdq_id = cmdq->id;
+
+ mutex_unlock(&file_priv->lock);
+
+ ivpu_rpm_put(vdev);
+
+ return cmdq ? 0 : -ENOMEM;
+}
+
+int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_cmdq_destroy *args = data;
+ struct ivpu_cmdq *cmdq;
+ u32 cmdq_id = 0;
+ int ret;
+
+ if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) {
+ ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n");
+ return -ENODEV;
+ }
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->lock);
+
+ cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id);
+ if (!cmdq || cmdq->is_legacy) {
+ ret = -ENOENT;
+ } else {
+ cmdq_id = cmdq->id;
+ ivpu_cmdq_destroy(file_priv, cmdq);
+ ret = 0;
+ }
+
+ mutex_unlock(&file_priv->lock);
+
+ /* Abort any pending jobs only if cmdq was destroyed */
+ if (!ret)
+ ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
+
+ ivpu_rpm_put(vdev);
+
+ return ret;
+}
+
static void
ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
struct vpu_jsm_msg *jsm_msg)
{
struct vpu_ipc_msg_payload_job_done *payload;
- int ret;
if (!jsm_msg) {
ivpu_err(vdev, "IPC message has no JSM payload\n");
@@ -564,9 +1080,12 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
}
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
- ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
- if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
- ivpu_start_job_timeout_detection(vdev);
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+ if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status))
+ /* No engine error, complete the job normally */
+ ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+ mutex_unlock(&vdev->submitted_jobs_lock);
}
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
@@ -579,3 +1098,56 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}
+
+void ivpu_context_abort_work_fn(struct work_struct *work)
+{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work);
+ struct ivpu_file_priv *file_priv;
+ struct ivpu_job *job;
+ unsigned long ctx_id;
+ unsigned long id;
+
+ if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0))
+ return;
+
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ if (ivpu_jsm_reset_engine(vdev, 0))
+ goto runtime_put;
+
+ mutex_lock(&vdev->context_list_lock);
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+ if (!file_priv->has_mmu_faults || file_priv->aborted)
+ continue;
+
+ mutex_lock(&file_priv->lock);
+ ivpu_context_abort_locked(file_priv);
+ mutex_unlock(&file_priv->lock);
+ }
+ mutex_unlock(&vdev->context_list_lock);
+
+ /*
+ * We will not receive new MMU event interrupts until existing events are discarded
+ * however, we want to discard these events only after aborting the faulty context
+ * to avoid generating new faults from that context
+ */
+ ivpu_mmu_discard_events(vdev);
+
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ goto runtime_put;
+
+ if (ivpu_jsm_hws_resume_engine(vdev, 0))
+ goto runtime_put;
+ /*
+ * In hardware scheduling mode NPU already has stopped processing jobs
+ * and won't send us any further notifications, thus we have to free job related resources
+ * and notify userspace
+ */
+ mutex_lock(&vdev->submitted_jobs_lock);
+ xa_for_each(&vdev->submitted_jobs_xa, id, job)
+ if (job->file_priv->aborted)
+ ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED);
+ mutex_unlock(&vdev->submitted_jobs_lock);
+
+runtime_put:
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index ca4984071cc7..3ab61e6a5616 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_JOB_H__
@@ -15,51 +15,78 @@ struct ivpu_device;
struct ivpu_file_priv;
/**
- * struct ivpu_cmdq - Object representing device queue used to send jobs.
- * @jobq: Pointer to job queue memory shared with the device
- * @mem: Memory allocated for the job queue, shared with device
- * @entry_count Number of job entries in the queue
- * @db_id: Doorbell assigned to this job queue
- * @db_registered: True if doorbell is registered in device
+ * struct ivpu_cmdq - Represents a command queue for submitting jobs to the VPU.
+ * Tracks queue memory, preemption buffers, and metadata for job management.
+ * @jobq: Pointer to job queue memory shared with the device
+ * @primary_preempt_buf: Primary preemption buffer for this queue (optional)
+ * @secondary_preempt_buf: Secondary preemption buffer for this queue (optional)
+ * @mem: Memory allocated for the job queue, shared with device
+ * @entry_count: Number of job entries in the queue
+ * @id: Unique command queue ID
+ * @db_id: Doorbell ID assigned to this job queue
+ * @priority: Priority level of the command queue
+ * @is_legacy: True if this is a legacy command queue
*/
struct ivpu_cmdq {
struct vpu_job_queue *jobq;
+ struct ivpu_bo *primary_preempt_buf;
+ struct ivpu_bo *secondary_preempt_buf;
struct ivpu_bo *mem;
u32 entry_count;
+ u32 id;
u32 db_id;
- bool db_registered;
+ u8 priority;
+ bool is_legacy;
};
/**
- * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer.
- * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW.
- * This is a unit of execution, and be tracked by the job_id for
- * any status reporting from VPU FW through IPC JOB RET/DONE message.
- * @file_priv: The client that submitted this job
- * @job_id: Job ID for KMD tracking and job status reporting from VPU FW
- * @status: Status of the Job from IPC JOB RET/DONE message
- * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job
- * @submit_status_offset: Offset within batch buffer where job completion handler
- will update the job status
+ * struct ivpu_job - Representing a batch or DMA buffer submitted to the VPU.
+ * Each job is a unit of execution, tracked by job_id for status reporting from VPU FW.
+ * The structure holds all resources and metadata needed for job submission, execution,
+ * and completion handling.
+ * @vdev: Pointer to the VPU device
+ * @file_priv: The client context that submitted this job
+ * @done_fence: Fence signaled when job completes
+ * @cmd_buf_vpu_addr: VPU address of the command buffer for this job
+ * @cmdq_id: Command queue ID used for submission
+ * @job_id: Unique job ID for tracking and status reporting
+ * @engine_idx: Engine index for job execution
+ * @job_status: Status reported by firmware for this job
+ * @primary_preempt_buf: Primary preemption buffer for job
+ * @secondary_preempt_buf: Secondary preemption buffer for job (optional)
+ * @bo_count: Number of buffer objects associated with this job
+ * @bos: Array of buffer objects used by the job (batch buffer is at index 0)
*/
struct ivpu_job {
struct ivpu_device *vdev;
struct ivpu_file_priv *file_priv;
struct dma_fence *done_fence;
u64 cmd_buf_vpu_addr;
+ u32 cmdq_id;
u32 job_id;
u32 engine_idx;
+ u32 job_status;
+ struct ivpu_bo *primary_preempt_buf;
+ struct ivpu_bo *secondary_preempt_buf;
size_t bo_count;
struct ivpu_bo *bos[] __counted_by(bo_count);
};
int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
+void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
+bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status);
+void ivpu_context_abort_work_fn(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 8cea0dd731b9..0256b2dfefc1 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+#include "vpu_jsm_api.h"
const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
{
@@ -48,9 +50,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
- IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE);
@@ -103,14 +106,10 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret) {
- ivpu_err_ratelimited(vdev, "Failed to register doorbell %d: %d\n", db_id, ret);
- return ret;
- }
-
- ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id);
+ if (ret)
+ ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret);
- return 0;
+ return ret;
}
int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id)
@@ -123,14 +122,10 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id)
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret) {
- ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret);
- return ret;
- }
-
- ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %u: %d\n", db_id, ret);
- return 0;
+ return ret;
}
int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
@@ -139,7 +134,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.query_engine_hb.engine_idx = engine;
@@ -162,15 +157,17 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_reset.engine_idx = engine;
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
- if (ret)
+ if (ret) {
ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine reset failed");
+ }
return ret;
}
@@ -181,7 +178,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_preempt.engine_idx = engine;
@@ -204,7 +201,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
- VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm);
if (ret)
ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n",
command, ret);
@@ -255,11 +252,16 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE };
struct vpu_jsm_msg resp;
+ int ret;
req.payload.ssid_release.host_ssid = host_ssid;
- return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
- VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to release context: %d\n", ret);
+
+ return ret;
}
int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
@@ -273,11 +275,288 @@ int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
req.payload.pwr_d0i3_enter.send_response = 1;
- ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
- &resp, VPU_IPC_CHAN_GEN_CMD,
- vdev->timeout.d0i3_entry_msg);
+ ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp,
+ VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg);
if (ret)
return ret;
return ivpu_hw_wait_for_idle(vdev);
}
+
+int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id,
+ u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_CREATE_CMD_QUEUE };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.hws_create_cmdq.host_ssid = ctx_id;
+ req.payload.hws_create_cmdq.process_id = pid;
+ req.payload.hws_create_cmdq.engine_idx = engine;
+ req.payload.hws_create_cmdq.cmdq_group = cmdq_group;
+ req.payload.hws_create_cmdq.cmdq_id = cmdq_id;
+ req.payload.hws_create_cmdq.cmdq_base = cmdq_base;
+ req.payload.hws_create_cmdq.cmdq_size = cmdq_size;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to create command queue: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DESTROY_CMD_QUEUE };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.hws_destroy_cmdq.host_ssid = ctx_id;
+ req.payload.hws_destroy_cmdq.cmdq_id = cmdq_id;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to destroy command queue: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id,
+ u64 cmdq_base, u32 cmdq_size)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_REGISTER_DB };
+ struct vpu_jsm_msg resp;
+ int ret = 0;
+
+ req.payload.hws_register_db.db_id = db_id;
+ req.payload.hws_register_db.host_ssid = ctx_id;
+ req.payload.hws_register_db.cmdq_id = cmdq_id;
+ req.payload.hws_register_db.cmdq_base = cmdq_base;
+ req.payload.hws_register_db.cmdq_size = cmdq_size;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret);
+
+ return ret;
+}
+
+int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_ENGINE_RESUME };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ if (engine != VPU_ENGINE_COMPUTE)
+ return -EINVAL;
+
+ req.payload.hws_resume_engine.engine_idx = engine;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret) {
+ ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret);
+ ivpu_pm_trigger_recovery(vdev, "Engine resume failed");
+ }
+
+ return ret;
+}
+
+int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id,
+ u32 priority)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.hws_set_context_sched_properties.host_ssid = ctx_id;
+ req.payload.hws_set_context_sched_properties.cmdq_id = cmdq_id;
+ req.payload.hws_set_context_sched_properties.priority_band = priority;
+ req.payload.hws_set_context_sched_properties.realtime_priority_level = 0;
+ req.payload.hws_set_context_sched_properties.in_process_priority = 0;
+ req.payload.hws_set_context_sched_properties.context_quantum = 20000;
+ req.payload.hws_set_context_sched_properties.grace_period_same_priority = 10000;
+ req.payload.hws_set_context_sched_properties.grace_period_lower_priority = 0;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to set context sched properties: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid,
+ u64 vpu_log_buffer_va)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.hws_set_scheduling_log.engine_idx = engine_idx;
+ req.payload.hws_set_scheduling_log.host_ssid = host_ssid;
+ req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va;
+ req.payload.hws_set_scheduling_log.notify_index = 0;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to set scheduling log: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP };
+ struct vpu_jsm_msg resp;
+ struct ivpu_hw_info *hw = vdev->hw;
+ struct vpu_ipc_msg_payload_hws_priority_band_setup *setup =
+ &req.payload.hws_priority_band_setup;
+ int ret;
+
+ for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE;
+ band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) {
+ setup->grace_period[band] = hw->hws.grace_period[band];
+ setup->process_grace_period[band] = hw->hws.process_grace_period[band];
+ setup->process_quantum[band] = hw->hws.process_quantum[band];
+ }
+ setup->normal_band_percentage = 10;
+
+ ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP,
+ &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask,
+ u64 sampling_rate, u64 buffer_addr, u64 buffer_size)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.metric_streamer_start.metric_group_mask = metric_group_mask;
+ req.payload.metric_streamer_start.sampling_rate = sampling_rate;
+ req.payload.metric_streamer_start.buffer_addr = buffer_addr;
+ req.payload.metric_streamer_start.buffer_size = buffer_size;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret) {
+ ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret)
+ ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret);
+
+ return ret;
+}
+
+int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask,
+ u64 buffer_addr, u64 buffer_size, u64 *bytes_written)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.metric_streamer_update.metric_group_mask = metric_group_mask;
+ req.payload.metric_streamer_update.buffer_addr = buffer_addr;
+ req.payload.metric_streamer_update.buffer_size = buffer_size;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret) {
+ ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret);
+ return ret;
+ }
+
+ if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) {
+ ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n",
+ resp.payload.metric_streamer_done.bytes_written, buffer_size);
+ return -EOVERFLOW;
+ }
+
+ *bytes_written = resp.payload.metric_streamer_done.bytes_written;
+
+ return ret;
+}
+
+int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr,
+ u64 buffer_size, u32 *sample_size, u64 *info_size)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ req.payload.metric_streamer_start.metric_group_mask = metric_group_mask;
+ req.payload.metric_streamer_start.buffer_addr = buffer_addr;
+ req.payload.metric_streamer_start.buffer_size = buffer_size;
+
+ ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ if (ret) {
+ ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret);
+ return ret;
+ }
+
+ if (!resp.payload.metric_streamer_done.sample_size) {
+ ivpu_warn_ratelimited(vdev, "Invalid sample size\n");
+ return -EBADMSG;
+ }
+
+ if (sample_size)
+ *sample_size = resp.payload.metric_streamer_done.sample_size;
+ if (info_size)
+ *info_size = resp.payload.metric_streamer_done.bytes_written;
+
+ return ret;
+}
+
+int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_ENABLE };
+ struct vpu_jsm_msg resp;
+
+ req.payload.pwr_dct_control.dct_active_us = active_us;
+ req.payload.pwr_dct_control.dct_inactive_us = inactive_us;
+
+ return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+}
+
+int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE };
+ struct vpu_jsm_msg resp;
+
+ return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+}
+
+int ivpu_jsm_state_dump(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP };
+
+ return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD,
+ vdev->timeout.state_dump_msg);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index ae75e5dbcc41..9e84d3526a14 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_JSM_MSG_H__
@@ -23,4 +23,26 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra
u64 trace_hw_component_mask);
int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
+int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id,
+ u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size);
+int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id);
+int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id,
+ u64 cmdq_base, u32 cmdq_size);
+int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine);
+int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id,
+ u32 priority);
+int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid,
+ u64 vpu_log_buffer_va);
+int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev);
+int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask,
+ u64 sampling_rate, u64 buffer_addr, u64 buffer_size);
+int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask);
+int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask,
+ u64 buffer_addr, u64 buffer_size, u64 *bytes_written);
+int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr,
+ u64 buffer_size, u32 *sample_size, u64 *info_size);
+int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us);
+int ivpu_jsm_dct_disable(struct ivpu_device *vdev);
+int ivpu_jsm_state_dump(struct ivpu_device *vdev);
+
#endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 2e46b322c450..e1baf6b64935 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/circ_buf.h>
@@ -20,6 +20,12 @@
#define IVPU_MMU_REG_CR0 0x00200020u
#define IVPU_MMU_REG_CR0ACK 0x00200024u
#define IVPU_MMU_REG_CR0ACK_VAL_MASK GENMASK(31, 0)
+#define IVPU_MMU_REG_CR0_ATSCHK_MASK BIT(4)
+#define IVPU_MMU_REG_CR0_CMDQEN_MASK BIT(3)
+#define IVPU_MMU_REG_CR0_EVTQEN_MASK BIT(2)
+#define IVPU_MMU_REG_CR0_PRIQEN_MASK BIT(1)
+#define IVPU_MMU_REG_CR0_SMMUEN_MASK BIT(0)
+
#define IVPU_MMU_REG_CR1 0x00200028u
#define IVPU_MMU_REG_CR2 0x0020002cu
#define IVPU_MMU_REG_IRQ_CTRL 0x00200050u
@@ -141,12 +147,6 @@
#define IVPU_MMU_IRQ_EVTQ_EN BIT(2)
#define IVPU_MMU_IRQ_GERROR_EN BIT(0)
-#define IVPU_MMU_CR0_ATSCHK BIT(4)
-#define IVPU_MMU_CR0_CMDQEN BIT(3)
-#define IVPU_MMU_CR0_EVTQEN BIT(2)
-#define IVPU_MMU_CR0_PRIQEN BIT(1)
-#define IVPU_MMU_CR0_SMMUEN BIT(0)
-
#define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10)
#define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8)
#define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6)
@@ -519,7 +519,8 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
if (ret)
return ret;
- clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
@@ -567,7 +568,8 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
int ret;
memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE);
- clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE);
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE);
mmu->cmdq.prod = 0;
mmu->cmdq.cons = 0;
@@ -594,7 +596,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0);
REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0);
- val = IVPU_MMU_CR0_CMDQEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, CMDQEN, 0);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
@@ -615,12 +617,12 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0);
REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0);
- val |= IVPU_MMU_CR0_EVTQEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
- val |= IVPU_MMU_CR0_ATSCHK;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, ATSCHK, val);
ret = ivpu_mmu_reg_write_cr0(vdev, val);
if (ret)
return ret;
@@ -629,7 +631,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
if (ret)
return ret;
- val |= IVPU_MMU_CR0_SMMUEN;
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, SMMUEN, val);
return ivpu_mmu_reg_write_cr0(vdev, val);
}
@@ -661,7 +663,8 @@ static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid)
WRITE_ONCE(entry[1], str[1]);
WRITE_ONCE(entry[0], str[0]);
- clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE);
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE);
ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]);
}
@@ -693,7 +696,7 @@ unlock:
return ret;
}
-static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
+static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid)
{
struct ivpu_mmu_info *mmu = vdev->mmu;
struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
@@ -705,40 +708,40 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
return -EINVAL;
entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
-
- if (cd_dma != 0) {
- cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) |
- FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
- IVPU_MMU_CD_0_TCR_EPD1 |
- IVPU_MMU_CD_0_AA64 |
- IVPU_MMU_CD_0_R |
- IVPU_MMU_CD_0_ASET |
- IVPU_MMU_CD_0_V;
- cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
- cd[2] = 0;
- cd[3] = 0x0000000000007444;
-
- /* For global context generate memory fault on VPU */
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
- cd[0] |= IVPU_MMU_CD_0_A;
- } else {
- memset(cd, 0, sizeof(cd));
- }
+ drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid);
+
+ cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) |
+ FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
+ IVPU_MMU_CD_0_TCR_EPD1 |
+ IVPU_MMU_CD_0_AA64 |
+ IVPU_MMU_CD_0_R |
+ IVPU_MMU_CD_0_ASET;
+ cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
+ cd[2] = 0;
+ cd[3] = 0x0000000000007444;
+
+ /* For global and reserved contexts generate memory fault on VPU */
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || ssid == IVPU_RESERVED_CONTEXT_MMU_SSID)
+ cd[0] |= IVPU_MMU_CD_0_A;
+
+ if (valid)
+ cd[0] |= IVPU_MMU_CD_0_V;
WRITE_ONCE(entry[1], cd[1]);
WRITE_ONCE(entry[2], cd[2]);
WRITE_ONCE(entry[3], cd[3]);
WRITE_ONCE(entry[0], cd[0]);
- clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
- ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
- cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
+ ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+ valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
mutex_lock(&mmu->lock);
if (!mmu->on)
@@ -746,38 +749,18 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
if (ret)
- goto unlock;
+ goto err_invalidate;
ret = ivpu_mmu_cmdq_sync(vdev);
+ if (ret)
+ goto err_invalidate;
unlock:
mutex_unlock(&mmu->lock);
- return ret;
-}
-
-static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma);
- if (ret)
- ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma)
-{
- int ret;
-
- if (ssid == 0) {
- ivpu_err(vdev, "Invalid SSID: %u\n", ssid);
- return -EINVAL;
- }
-
- ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma);
- if (ret)
- ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret);
+ return 0;
+err_invalidate:
+ WRITE_ONCE(entry[0], 0);
+ mutex_unlock(&mmu->lock);
return ret;
}
@@ -804,12 +787,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
return ret;
}
- ret = ivpu_mmu_cd_add_gbl(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
- return ret;
- }
-
ret = ivpu_mmu_enable(vdev);
if (ret) {
ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
@@ -874,8 +851,9 @@ static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event)
u64 in_addr = ((u64)event[5]) << 32 | event[4];
u32 sid = event[1];
- ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n",
- op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr);
+ ivpu_err_ratelimited(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n",
+ op, ivpu_mmu_event_to_str(op), ssid, sid,
+ event[2], event[3], in_addr, fetch_addr);
}
static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
@@ -892,25 +870,107 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
return evt;
}
+static int ivpu_mmu_evtq_set(struct ivpu_device *vdev, bool enable)
+{
+ u32 val = REGV_RD32(IVPU_MMU_REG_CR0);
+
+ if (enable)
+ val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
+ else
+ val = REG_CLR_FLD(IVPU_MMU_REG_CR0, EVTQEN, val);
+ REGV_WR32(IVPU_MMU_REG_CR0, val);
+
+ return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US);
+}
+
+static int ivpu_mmu_evtq_enable(struct ivpu_device *vdev)
+{
+ return ivpu_mmu_evtq_set(vdev, true);
+}
+
+static int ivpu_mmu_evtq_disable(struct ivpu_device *vdev)
+{
+ return ivpu_mmu_evtq_set(vdev, false);
+}
+
+void ivpu_mmu_discard_events(struct ivpu_device *vdev)
+{
+ struct ivpu_mmu_info *mmu = vdev->mmu;
+
+ mutex_lock(&mmu->lock);
+ /*
+ * Disable event queue (stop MMU from updating the producer)
+ * to allow synchronization of consumer and producer indexes
+ */
+ ivpu_mmu_evtq_disable(vdev);
+
+ vdev->mmu->evtq.cons = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
+ REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
+ vdev->mmu->evtq.prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
+
+ ivpu_mmu_evtq_enable(vdev);
+
+ drm_WARN_ON_ONCE(&vdev->drm, vdev->mmu->evtq.cons != vdev->mmu->evtq.prod);
+
+ mutex_unlock(&mmu->lock);
+}
+
+int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid)
+{
+ struct ivpu_mmu_info *mmu = vdev->mmu;
+ struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+ u64 *entry;
+ u64 val;
+
+ if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
+ return -EINVAL;
+
+ mutex_lock(&mmu->lock);
+
+ entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
+
+ val = READ_ONCE(entry[0]);
+ val &= ~IVPU_MMU_CD_0_R;
+ WRITE_ONCE(entry[0], val);
+
+ if (!ivpu_is_force_snoop_enabled(vdev))
+ clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
+
+ ivpu_mmu_cmdq_write_cfgi_all(vdev);
+ ivpu_mmu_cmdq_sync(vdev);
+
+ mutex_unlock(&mmu->lock);
+
+ return 0;
+}
+
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
{
+ struct ivpu_file_priv *file_priv;
u32 *event;
u32 ssid;
ivpu_dbg(vdev, IRQ, "MMU event queue\n");
- while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
- ivpu_mmu_dump_event(vdev, event);
-
- ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+ while ((event = ivpu_mmu_get_event(vdev))) {
+ ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, *event);
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID ||
+ ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) {
+ ivpu_mmu_dump_event(vdev, event);
ivpu_pm_trigger_recovery(vdev, "MMU event");
return;
}
- ivpu_mmu_user_context_mark_invalid(vdev, ssid);
- REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
+ file_priv = xa_load(&vdev->context_xa, ssid);
+ if (file_priv) {
+ if (!READ_ONCE(file_priv->has_mmu_faults)) {
+ ivpu_mmu_dump_event(vdev, event);
+ WRITE_ONCE(file_priv->has_mmu_faults, true);
+ }
+ }
}
+
+ queue_work(system_percpu_wq, &vdev->context_abort_work);
}
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
@@ -958,12 +1018,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val);
}
-int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
+int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
{
- return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma);
+ return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true);
}
-void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid)
+void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid)
{
- ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */
+ ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false);
}
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
index 6fa35c240710..1ce7529746ad 100644
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -40,12 +40,14 @@ struct ivpu_mmu_info {
int ivpu_mmu_init(struct ivpu_device *vdev);
void ivpu_mmu_disable(struct ivpu_device *vdev);
int ivpu_mmu_enable(struct ivpu_device *vdev);
-int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
-void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid);
+int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
+void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid);
int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev);
+void ivpu_mmu_discard_events(struct ivpu_device *vdev);
+int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid);
#endif /* __IVPU_MMU_H__ */
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 128aef8e5a19..87ad593ef47d 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -24,6 +24,7 @@
#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52)
#define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
#define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
+#define IVPU_MMU_ENTRY_FLAG_RO BIT(7)
#define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
@@ -89,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_
}
}
-static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
-{
- dma_addr_t pgd_dma;
-
- pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
- if (!pgtable->pgd_dma_ptr)
- return -ENOMEM;
-
- pgtable->pgd_dma = pgd_dma;
-
- return 0;
-}
-
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
int pgd_idx, pud_idx, pmd_idx;
@@ -139,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
}
ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+ pgtable->pgd_dma_ptr = NULL;
+ pgtable->pgd_dma = 0;
+}
+
+static u64*
+ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+ u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr;
+ dma_addr_t pgd_dma;
+
+ if (pgd_dma_ptr)
+ return pgd_dma_ptr;
+
+ pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
+ if (!pgd_dma_ptr)
+ return NULL;
+
+ pgtable->pgd_dma_ptr = pgd_dma_ptr;
+ pgtable->pgd_dma = pgd_dma;
+
+ return pgd_dma_ptr;
}
static u64*
@@ -236,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx
int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID);
+
+ /* Allocate PGD - first level page table if needed */
+ if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable))
+ return -ENOMEM;
+
/* Allocate PUD - second level page table if needed */
if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
return -ENOMEM;
@@ -319,6 +334,91 @@ ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
return 0;
}
+static void ivpu_mmu_context_set_page_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ u64 vpu_addr)
+{
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+ ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] |= IVPU_MMU_ENTRY_FLAG_RO;
+}
+
+static void ivpu_mmu_context_split_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ u64 vpu_addr)
+{
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+ ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] &= ~IVPU_MMU_ENTRY_FLAG_CONT;
+}
+
+static void ivpu_mmu_context_split_64k_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ u64 vpu_addr)
+{
+ u64 start = ALIGN_DOWN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE);
+ u64 end = ALIGN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE);
+ u64 offset = 0;
+
+ ivpu_dbg(vdev, MMU_MAP, "Split 64K page ctx: %u vpu_addr: 0x%llx\n", ctx->id, vpu_addr);
+
+ while (start + offset < end) {
+ ivpu_mmu_context_split_page(vdev, ctx, start + offset);
+ offset += IVPU_MMU_PAGE_SIZE;
+ }
+}
+
+int
+ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr,
+ size_t size)
+{
+ u64 end = vpu_addr + size;
+ size_t size_left = size;
+ int ret;
+
+ if (size == 0)
+ return 0;
+
+ if (drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr | size, IVPU_MMU_PAGE_SIZE)))
+ return -EINVAL;
+
+ mutex_lock(&ctx->lock);
+
+ ivpu_dbg(vdev, MMU_MAP, "Set read-only pages ctx: %u vpu_addr: 0x%llx size: %lu\n",
+ ctx->id, vpu_addr, size);
+
+ if (!ivpu_disable_mmu_cont_pages) {
+ /* Split 64K contiguous page at the beginning if needed */
+ if (!IS_ALIGNED(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE))
+ ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr);
+
+ /* Split 64K contiguous page at the end if needed */
+ if (!IS_ALIGNED(vpu_addr + size, IVPU_MMU_CONT_PAGES_SIZE))
+ ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr + size);
+ }
+
+ while (size_left) {
+ if (vpu_addr < end)
+ ivpu_mmu_context_set_page_ro(vdev, ctx, vpu_addr);
+
+ vpu_addr += IVPU_MMU_PAGE_SIZE;
+ size_left -= IVPU_MMU_PAGE_SIZE;
+ }
+
+ /* Ensure page table modifications are flushed from wc buffers to memory */
+ wmb();
+
+ mutex_unlock(&ctx->lock);
+ ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
+ if (ret)
+ ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+
+ return 0;
+}
+
static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
{
while (size) {
@@ -330,8 +430,9 @@ static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_a
int
ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
+ u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only)
{
+ size_t start_vpu_addr = vpu_addr;
struct scatterlist *sg;
int ret;
u64 prot;
@@ -349,6 +450,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
prot = IVPU_MMU_ENTRY_MAPPED;
if (llc_coherent)
prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
+ if (read_only)
+ prot |= IVPU_MMU_ENTRY_FLAG_RO;
mutex_lock(&ctx->lock);
@@ -362,20 +465,36 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
if (ret) {
ivpu_err(vdev, "Failed to map context pages\n");
- mutex_unlock(&ctx->lock);
- return ret;
+ goto err_unmap_pages;
}
vpu_addr += size;
}
+ if (!ctx->is_cd_valid) {
+ ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable);
+ if (ret) {
+ ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n",
+ ctx->id, ret);
+ goto err_unmap_pages;
+ }
+ ctx->is_cd_valid = true;
+ }
+
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
- mutex_unlock(&ctx->lock);
-
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
- if (ret)
+ if (ret) {
ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+ goto err_unmap_pages;
+ }
+
+ mutex_unlock(&ctx->lock);
+ return 0;
+
+err_unmap_pages:
+ ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr);
+ mutex_unlock(&ctx->lock);
return ret;
}
@@ -410,7 +529,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
if (ret)
- ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+ ivpu_warn_ratelimited(vdev, "Failed to invalidate TLB for ctx %u: %d\n",
+ ctx->id, ret);
}
int
@@ -444,109 +564,77 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n
mutex_unlock(&ctx->lock);
}
-static int
-ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
+void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
{
u64 start, end;
- int ret;
mutex_init(&ctx->lock);
- ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
- return ret;
- }
-
if (!context_id) {
- start = vdev->hw->ranges.global.start;
+ start = vdev->hw->ranges.runtime.start;
end = vdev->hw->ranges.shave.end;
} else {
- start = vdev->hw->ranges.user.start;
- end = vdev->hw->ranges.dma.end;
+ start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start);
+ end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end);
}
drm_mm_init(&ctx->mm, start, end - start);
ctx->id = context_id;
-
- return 0;
}
-static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
- return;
+ if (ctx->is_cd_valid) {
+ ivpu_mmu_cd_clear(vdev, ctx->id);
+ ctx->is_cd_valid = false;
+ }
mutex_destroy(&ctx->lock);
ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
drm_mm_takedown(&ctx->mm);
-
- ctx->pgtable.pgd_dma_ptr = NULL;
- ctx->pgtable.pgd_dma = 0;
}
-int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
+void ivpu_mmu_global_context_init(struct ivpu_device *vdev)
{
- return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
+ ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
}
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
{
- return ivpu_mmu_context_fini(vdev, &vdev->gctx);
+ ivpu_mmu_context_fini(vdev, &vdev->gctx);
}
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
{
- return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
-}
-
-void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
-{
- return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
-}
-
-void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
-{
- struct ivpu_file_priv *file_priv;
-
- xa_lock(&vdev->context_xa);
-
- file_priv = xa_load(&vdev->context_xa, ssid);
- if (file_priv)
- file_priv->has_mmu_faults = true;
-
- xa_unlock(&vdev->context_xa);
-}
-
-int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
-{
int ret;
- drm_WARN_ON(&vdev->drm, !ctx_id);
+ ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
- ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
- return ret;
+ mutex_lock(&vdev->rctx.lock);
+
+ if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
+ ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
+ ret = -ENOMEM;
+ goto err_ctx_fini;
}
- ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
+ ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
if (ret) {
- ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
- goto err_context_fini;
+ ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
+ goto err_ctx_fini;
}
- return 0;
+ mutex_unlock(&vdev->rctx.lock);
+ return ret;
-err_context_fini:
- ivpu_mmu_context_fini(vdev, ctx);
+err_ctx_fini:
+ mutex_unlock(&vdev->rctx.lock);
+ ivpu_mmu_context_fini(vdev, &vdev->rctx);
return ret;
}
-void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
{
- drm_WARN_ON(&vdev->drm, !ctx->id);
-
- ivpu_mmu_clear_pgtable(vdev, ctx->id);
- ivpu_mmu_context_fini(vdev, ctx);
+ ivpu_mmu_cd_clear(vdev, vdev->rctx.id);
+ ivpu_mmu_context_fini(vdev, &vdev->rctx);
}
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index 535db3a1fc74..663a11a9db11 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,28 +23,29 @@ struct ivpu_mmu_pgtable {
};
struct ivpu_mmu_context {
- struct mutex lock; /* Protects: mm, pgtable */
+ struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */
struct drm_mm mm;
struct ivpu_mmu_pgtable pgtable;
+ bool is_cd_valid;
u32 id;
};
-int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id);
+void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
+void ivpu_mmu_global_context_init(struct ivpu_device *vdev);
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);
-int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
-void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
-void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
-
int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
u64 size, struct drm_mm_node *node);
void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
+ u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only);
void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt);
+int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ u64 vpu_addr, size_t size);
#endif /* __IVPU_MMU_CONTEXT_H__ */
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
new file mode 100644
index 000000000000..1d9c1cb17924
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_ms.h"
+#include "ivpu_pm.h"
+
+#define MS_INFO_BUFFER_SIZE SZ_64K
+#define MS_NUM_BUFFERS 2
+#define MS_READ_PERIOD_MULTIPLIER 2
+#define MS_MIN_SAMPLE_PERIOD_NS 1000000
+
+static struct ivpu_ms_instance *
+get_instance_by_mask(struct ivpu_file_priv *file_priv, u64 metric_mask)
+{
+ struct ivpu_ms_instance *ms;
+
+ lockdep_assert_held(&file_priv->ms_lock);
+
+ list_for_each_entry(ms, &file_priv->ms_instance_list, ms_instance_node)
+ if (ms->mask == metric_mask)
+ return ms;
+
+ return NULL;
+}
+
+int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct drm_ivpu_metric_streamer_start *args = data;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_ms_instance *ms;
+ u32 sample_size;
+ u64 buf_size;
+ int ret;
+
+ if (!args->metric_group_mask || !args->read_period_samples ||
+ args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
+ return -EINVAL;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->ms_lock);
+
+ if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
+ ivpu_dbg(vdev, IOCTL, "Instance already exists (mask %#llx)\n",
+ args->metric_group_mask);
+ ret = -EALREADY;
+ goto unlock;
+ }
+
+ ms = kzalloc(sizeof(*ms), GFP_KERNEL);
+ if (!ms) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ms->mask = args->metric_group_mask;
+
+ ret = ivpu_jsm_metric_streamer_info(vdev, ms->mask, 0, 0, &sample_size, NULL);
+ if (ret)
+ goto err_free_ms;
+
+ buf_size = PAGE_ALIGN((u64)args->read_period_samples * sample_size *
+ MS_READ_PERIOD_MULTIPLIER * MS_NUM_BUFFERS);
+ if (buf_size > ivpu_hw_range_size(&vdev->hw->ranges.global)) {
+ ivpu_dbg(vdev, IOCTL, "Requested MS buffer size %llu exceeds range size %llu\n",
+ buf_size, ivpu_hw_range_size(&vdev->hw->ranges.global));
+ ret = -EINVAL;
+ goto err_free_ms;
+ }
+
+ ms->bo = ivpu_bo_create_global(vdev, buf_size, DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
+ if (!ms->bo) {
+ ivpu_dbg(vdev, IOCTL, "Failed to allocate MS buffer (size %llu)\n", buf_size);
+ ret = -ENOMEM;
+ goto err_free_ms;
+ }
+
+ ms->buff_size = ivpu_bo_size(ms->bo) / MS_NUM_BUFFERS;
+ ms->active_buff_vpu_addr = ms->bo->vpu_addr;
+ ms->inactive_buff_vpu_addr = ms->bo->vpu_addr + ms->buff_size;
+ ms->active_buff_ptr = ivpu_bo_vaddr(ms->bo);
+ ms->inactive_buff_ptr = ivpu_bo_vaddr(ms->bo) + ms->buff_size;
+
+ ret = ivpu_jsm_metric_streamer_start(vdev, ms->mask, args->sampling_period_ns,
+ ms->active_buff_vpu_addr, ms->buff_size);
+ if (ret)
+ goto err_free_bo;
+
+ args->sample_size = sample_size;
+ args->max_data_size = ivpu_bo_size(ms->bo);
+ list_add_tail(&ms->ms_instance_node, &file_priv->ms_instance_list);
+ goto unlock;
+
+err_free_bo:
+ ivpu_bo_free(ms->bo);
+err_free_ms:
+ kfree(ms);
+unlock:
+ mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
+ return ret;
+}
+
+static int
+copy_leftover_bytes(struct ivpu_ms_instance *ms,
+ void __user *user_ptr, u64 user_size, u64 *user_bytes_copied)
+{
+ u64 copy_bytes;
+
+ if (ms->leftover_bytes) {
+ copy_bytes = min(user_size - *user_bytes_copied, ms->leftover_bytes);
+ if (copy_to_user(user_ptr + *user_bytes_copied, ms->leftover_addr, copy_bytes))
+ return -EFAULT;
+
+ ms->leftover_bytes -= copy_bytes;
+ ms->leftover_addr += copy_bytes;
+ *user_bytes_copied += copy_bytes;
+ }
+
+ return 0;
+}
+
+static int
+copy_samples_to_user(struct ivpu_device *vdev, struct ivpu_ms_instance *ms,
+ void __user *user_ptr, u64 user_size, u64 *user_bytes_copied)
+{
+ u64 bytes_written;
+ int ret;
+
+ *user_bytes_copied = 0;
+
+ ret = copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied);
+ if (ret)
+ return ret;
+
+ if (*user_bytes_copied == user_size)
+ return 0;
+
+ ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, ms->inactive_buff_vpu_addr,
+ ms->buff_size, &bytes_written);
+ if (ret)
+ return ret;
+
+ swap(ms->active_buff_vpu_addr, ms->inactive_buff_vpu_addr);
+ swap(ms->active_buff_ptr, ms->inactive_buff_ptr);
+
+ ms->leftover_bytes = bytes_written;
+ ms->leftover_addr = ms->inactive_buff_ptr;
+
+ return copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied);
+}
+
+int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ivpu_metric_streamer_get_data *args = data;
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_ms_instance *ms;
+ u64 bytes_written;
+ int ret;
+
+ if (!args->metric_group_mask)
+ return -EINVAL;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->ms_lock);
+
+ ms = get_instance_by_mask(file_priv, args->metric_group_mask);
+ if (!ms) {
+ ivpu_dbg(vdev, IOCTL, "Instance doesn't exist for mask: %#llx\n",
+ args->metric_group_mask);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (!args->buffer_size) {
+ ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, 0, 0, &bytes_written);
+ if (ret)
+ goto unlock;
+ args->data_size = bytes_written + ms->leftover_bytes;
+ goto unlock;
+ }
+
+ if (!args->buffer_ptr) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ ret = copy_samples_to_user(vdev, ms, u64_to_user_ptr(args->buffer_ptr),
+ args->buffer_size, &args->data_size);
+unlock:
+ mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
+ return ret;
+}
+
+static void free_instance(struct ivpu_file_priv *file_priv, struct ivpu_ms_instance *ms)
+{
+ lockdep_assert_held(&file_priv->ms_lock);
+
+ list_del(&ms->ms_instance_node);
+ ivpu_jsm_metric_streamer_stop(file_priv->vdev, ms->mask);
+ ivpu_bo_free(ms->bo);
+ kfree(ms);
+}
+
+int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct drm_ivpu_metric_streamer_stop *args = data;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_ms_instance *ms;
+ int ret;
+
+ if (!args->metric_group_mask)
+ return -EINVAL;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&file_priv->ms_lock);
+
+ ms = get_instance_by_mask(file_priv, args->metric_group_mask);
+ if (ms)
+ free_instance(file_priv, ms);
+
+ mutex_unlock(&file_priv->ms_lock);
+
+ ivpu_rpm_put(vdev);
+ return ms ? 0 : -EINVAL;
+}
+
+static inline struct ivpu_bo *get_ms_info_bo(struct ivpu_file_priv *file_priv)
+{
+ lockdep_assert_held(&file_priv->ms_lock);
+
+ if (file_priv->ms_info_bo)
+ return file_priv->ms_info_bo;
+
+ file_priv->ms_info_bo = ivpu_bo_create_global(file_priv->vdev, MS_INFO_BUFFER_SIZE,
+ DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
+ return file_priv->ms_info_bo;
+}
+
+int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ivpu_metric_streamer_get_data *args = data;
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct ivpu_bo *bo;
+ u64 info_size;
+ int ret;
+
+ if (!args->metric_group_mask)
+ return -EINVAL;
+
+ if (!args->buffer_size)
+ return ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask,
+ 0, 0, NULL, &args->data_size);
+ if (!args->buffer_ptr)
+ return -EINVAL;
+
+ mutex_lock(&file_priv->ms_lock);
+
+ bo = get_ms_info_bo(file_priv);
+ if (!bo) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, bo->vpu_addr,
+ ivpu_bo_size(bo), NULL, &info_size);
+ if (ret)
+ goto unlock;
+
+ if (args->buffer_size < info_size) {
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer_ptr), ivpu_bo_vaddr(bo), info_size))
+ ret = -EFAULT;
+
+ args->data_size = info_size;
+unlock:
+ mutex_unlock(&file_priv->ms_lock);
+
+ return ret;
+}
+
+void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
+{
+ struct ivpu_ms_instance *ms, *tmp;
+ struct ivpu_device *vdev = file_priv->vdev;
+
+ pm_runtime_get_sync(vdev->drm.dev);
+
+ mutex_lock(&file_priv->ms_lock);
+
+ if (file_priv->ms_info_bo) {
+ ivpu_bo_free(file_priv->ms_info_bo);
+ file_priv->ms_info_bo = NULL;
+ }
+
+ list_for_each_entry_safe(ms, tmp, &file_priv->ms_instance_list, ms_instance_node)
+ free_instance(file_priv, ms);
+
+ mutex_unlock(&file_priv->ms_lock);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+}
+
+void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
+{
+ struct ivpu_file_priv *file_priv;
+ unsigned long ctx_id;
+
+ mutex_lock(&vdev->context_list_lock);
+
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv)
+ ivpu_ms_cleanup(file_priv);
+
+ mutex_unlock(&vdev->context_list_lock);
+}
diff --git a/drivers/accel/ivpu/ivpu_ms.h b/drivers/accel/ivpu/ivpu_ms.h
new file mode 100644
index 000000000000..fbd5ebebc3d9
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_ms.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+#ifndef __IVPU_MS_H__
+#define __IVPU_MS_H__
+
+#include <linux/list.h>
+
+struct drm_device;
+struct drm_file;
+struct ivpu_bo;
+struct ivpu_device;
+struct ivpu_file_priv;
+
+struct ivpu_ms_instance {
+ struct ivpu_bo *bo;
+ struct list_head ms_instance_node;
+ u64 mask;
+ u64 buff_size;
+ u64 active_buff_vpu_addr;
+ u64 inactive_buff_vpu_addr;
+ void *active_buff_ptr;
+ void *inactive_buff_ptr;
+ u64 leftover_bytes;
+ void *leftover_addr;
+};
+
+int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv);
+void ivpu_ms_cleanup_all(struct ivpu_device *vdev);
+
+#endif /* __IVPU_MS_H__ */
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 4f5ea466731f..480c075d87f6 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -9,25 +9,34 @@
#include <linux/pm_runtime.h>
#include <linux/reboot.h>
-#include "vpu_boot_api.h"
+#include "ivpu_coredump.h"
#include "ivpu_drv.h"
-#include "ivpu_hw.h"
#include "ivpu_fw.h"
#include "ivpu_fw_log.h"
+#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
+#include "ivpu_ms.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
+#include "vpu_boot_api.h"
static bool ivpu_disable_recovery;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected");
+#endif
static unsigned long ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+static unsigned long ivpu_inference_timeout_ms;
+module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -36,14 +45,16 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
ivpu_cmdq_reset_all_contexts(vdev);
ivpu_ipc_reset(vdev);
+ ivpu_fw_log_reset(vdev);
ivpu_fw_load(vdev);
fw->entry_point = fw->cold_boot_entry_point;
+ fw->last_heartbeat = 0;
}
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
- struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem);
+ struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem_bp);
if (!bp->save_restore_ret_address) {
ivpu_pm_prepare_cold_boot(vdev);
@@ -72,8 +83,8 @@ static int ivpu_resume(struct ivpu_device *vdev)
int ret;
retry:
- pci_restore_state(to_pci_dev(vdev->drm.dev));
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+ pci_restore_state(to_pci_dev(vdev->drm.dev));
ret = ivpu_hw_power_up(vdev);
if (ret) {
@@ -109,39 +120,57 @@ err_power_down:
return ret;
}
-static void ivpu_pm_recovery_work(struct work_struct *work)
+static void ivpu_pm_reset_begin(struct ivpu_device *vdev)
{
- struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
- struct ivpu_device *vdev = pm->vdev;
- char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
- int ret;
-
- ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
-
- ret = pm_runtime_resume_and_get(vdev->drm.dev);
- if (ret)
- ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
-
- ivpu_fw_log_dump(vdev);
+ pm_runtime_disable(vdev->drm.dev);
atomic_inc(&vdev->pm->reset_counter);
atomic_set(&vdev->pm->reset_pending, 1);
down_write(&vdev->pm->reset_lock);
+}
+
+static void ivpu_pm_reset_complete(struct ivpu_device *vdev)
+{
+ int ret;
- ivpu_suspend(vdev);
ivpu_pm_prepare_cold_boot(vdev);
ivpu_jobs_abort_all(vdev);
+ ivpu_ms_cleanup_all(vdev);
ret = ivpu_resume(vdev);
- if (ret)
+ if (ret) {
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+ pm_runtime_set_suspended(vdev->drm.dev);
+ } else {
+ pm_runtime_set_active(vdev->drm.dev);
+ }
up_write(&vdev->pm->reset_lock);
atomic_set(&vdev->pm->reset_pending, 0);
- kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
pm_runtime_mark_last_busy(vdev->drm.dev);
- pm_runtime_put_autosuspend(vdev->drm.dev);
+ pm_runtime_enable(vdev->drm.dev);
+}
+
+static void ivpu_pm_recovery_work(struct work_struct *work)
+{
+ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
+ struct ivpu_device *vdev = pm->vdev;
+ char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
+
+ ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
+
+ ivpu_pm_reset_begin(vdev);
+
+ if (!pm_runtime_status_suspended(vdev->drm.dev)) {
+ ivpu_jsm_state_dump(vdev);
+ ivpu_dev_coredump(vdev);
+ ivpu_suspend(vdev);
+ }
+
+ ivpu_pm_reset_complete(vdev);
+
+ kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
}
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
@@ -153,16 +182,11 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
return;
}
- if (ivpu_is_fpga(vdev)) {
- ivpu_err(vdev, "Recovery not available on FPGA\n");
- return;
- }
-
/* Trigger recovery if it's not in progress */
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
ivpu_hw_diagnose_failure(vdev);
ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
- queue_work(system_long_wq, &vdev->pm->recovery_work);
+ queue_work(system_dfl_wq, &vdev->pm->recovery_work);
}
}
@@ -170,7 +194,30 @@ static void ivpu_job_timeout_work(struct work_struct *work)
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+ unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms :
+ vdev->timeout.inference;
+ u64 inference_max_retries;
+ u64 heartbeat;
+
+ if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
+ goto recovery;
+ }
+ inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms);
+ if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n",
+ inference_max_retries);
+ goto recovery;
+ }
+
+ vdev->fw->last_heartbeat = heartbeat;
+ ivpu_start_job_timeout_detection(vdev);
+ return;
+
+recovery:
+ atomic_set(&vdev->job_timeout_counter, 0);
ivpu_pm_trigger_recovery(vdev, "TDR");
}
@@ -179,12 +226,14 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
/* No-op if already queued */
- queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+ queue_delayed_work(system_percpu_wq, &vdev->pm->job_timeout_work,
+ msecs_to_jiffies(timeout_ms));
}
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
{
cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+ atomic_set(&vdev->job_timeout_counter, 0);
}
int ivpu_pm_suspend_cb(struct device *dev)
@@ -193,6 +242,7 @@ int ivpu_pm_suspend_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
unsigned long timeout;
+ trace_pm("suspend");
ivpu_dbg(vdev, PM, "Suspend..\n");
timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
@@ -210,6 +260,7 @@ int ivpu_pm_suspend_cb(struct device *dev)
ivpu_pm_prepare_warm_boot(vdev);
ivpu_dbg(vdev, PM, "Suspend done.\n");
+ trace_pm("suspend done");
return 0;
}
@@ -220,6 +271,7 @@ int ivpu_pm_resume_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
int ret;
+ trace_pm("resume");
ivpu_dbg(vdev, PM, "Resume..\n");
ret = ivpu_resume(vdev);
@@ -227,6 +279,7 @@ int ivpu_pm_resume_cb(struct device *dev)
ivpu_err(vdev, "Failed to resume: %d\n", ret);
ivpu_dbg(vdev, PM, "Resume done.\n");
+ trace_pm("resume done");
return ret;
}
@@ -235,42 +288,40 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
- bool hw_is_idle = true;
- int ret;
+ int ret, ret_d0i3;
+ bool is_idle;
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
+ trace_pm("runtime suspend");
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
- if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
- ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
- vdev->pm->suspend_reschedule_counter);
- pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
- vdev->pm->suspend_reschedule_counter--;
- return -EAGAIN;
- }
+ ivpu_mmu_disable(vdev);
- if (!vdev->pm->suspend_reschedule_counter)
- hw_is_idle = false;
- else if (ivpu_jsm_pwr_d0i3_enter(vdev))
- hw_is_idle = false;
+ is_idle = ivpu_hw_is_idle(vdev) || vdev->pm->dct_active_percent;
+ if (!is_idle)
+ ivpu_err(vdev, "NPU is not idle before autosuspend\n");
+
+ ret_d0i3 = ivpu_jsm_pwr_d0i3_enter(vdev);
+ if (ret_d0i3)
+ ivpu_err(vdev, "Failed to prepare for d0i3: %d\n", ret_d0i3);
ret = ivpu_suspend(vdev);
if (ret)
ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret);
- if (!hw_is_idle) {
- ivpu_err(vdev, "NPU failed to enter idle, force suspended.\n");
- ivpu_fw_log_dump(vdev);
+ if (!is_idle || ret_d0i3) {
+ ivpu_err(vdev, "Forcing cold boot due to previous errors\n");
+ atomic_inc(&vdev->pm->reset_counter);
+ ivpu_dev_coredump(vdev);
ivpu_pm_prepare_cold_boot(vdev);
} else {
ivpu_pm_prepare_warm_boot(vdev);
}
- vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
-
ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
+ trace_pm("runtime suspend done");
return 0;
}
@@ -281,6 +332,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
int ret;
+ trace_pm("runtime resume");
ivpu_dbg(vdev, PM, "Runtime resume..\n");
ret = ivpu_resume(vdev);
@@ -288,6 +340,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev)
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
ivpu_dbg(vdev, PM, "Runtime resume done.\n");
+ trace_pm("runtime resume done");
return ret;
}
@@ -297,25 +350,16 @@ int ivpu_rpm_get(struct ivpu_device *vdev)
int ret;
ret = pm_runtime_resume_and_get(vdev->drm.dev);
- if (!drm_WARN_ON(&vdev->drm, ret < 0))
- vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
-
- return ret;
-}
-
-int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = pm_runtime_get_if_in_use(vdev->drm.dev);
- drm_WARN_ON(&vdev->drm, ret < 0);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+ pm_runtime_set_suspended(vdev->drm.dev);
+ }
return ret;
}
void ivpu_rpm_put(struct ivpu_device *vdev)
{
- pm_runtime_mark_last_busy(vdev->drm.dev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
@@ -324,33 +368,26 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
struct ivpu_device *vdev = pci_get_drvdata(pdev);
ivpu_dbg(vdev, PM, "Pre-reset..\n");
- atomic_inc(&vdev->pm->reset_counter);
- atomic_set(&vdev->pm->reset_pending, 1);
- pm_runtime_get_sync(vdev->drm.dev);
- down_write(&vdev->pm->reset_lock);
- ivpu_prepare_for_reset(vdev);
- ivpu_hw_reset(vdev);
- ivpu_pm_prepare_cold_boot(vdev);
- ivpu_jobs_abort_all(vdev);
+ ivpu_pm_reset_begin(vdev);
+
+ if (!pm_runtime_status_suspended(vdev->drm.dev)) {
+ ivpu_prepare_for_reset(vdev);
+ ivpu_hw_reset(vdev);
+ }
+
ivpu_dbg(vdev, PM, "Pre-reset done.\n");
}
void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
{
struct ivpu_device *vdev = pci_get_drvdata(pdev);
- int ret;
ivpu_dbg(vdev, PM, "Post-reset..\n");
- ret = ivpu_resume(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
- up_write(&vdev->pm->reset_lock);
- atomic_set(&vdev->pm->reset_pending, 0);
- ivpu_dbg(vdev, PM, "Post-reset done.\n");
- pm_runtime_mark_last_busy(vdev->drm.dev);
- pm_runtime_put_autosuspend(vdev->drm.dev);
+ ivpu_pm_reset_complete(vdev);
+
+ ivpu_dbg(vdev, PM, "Post-reset done.\n");
}
void ivpu_pm_init(struct ivpu_device *vdev)
@@ -360,7 +397,6 @@ void ivpu_pm_init(struct ivpu_device *vdev)
int delay;
pm->vdev = vdev;
- pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
init_rwsem(&pm->reset_lock);
atomic_set(&pm->reset_pending, 0);
@@ -376,23 +412,22 @@ void ivpu_pm_init(struct ivpu_device *vdev)
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, delay);
+ pm_runtime_set_active(dev);
ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
}
-void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
+void ivpu_pm_disable_recovery(struct ivpu_device *vdev)
{
drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
- cancel_work_sync(&vdev->pm->recovery_work);
+ disable_work_sync(&vdev->pm->recovery_work);
}
void ivpu_pm_enable(struct ivpu_device *vdev)
{
struct device *dev = vdev->drm.dev;
- pm_runtime_set_active(dev);
pm_runtime_allow(dev);
- pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
}
@@ -401,3 +436,76 @@ void ivpu_pm_disable(struct ivpu_device *vdev)
pm_runtime_get_noresume(vdev->drm.dev);
pm_runtime_forbid(vdev->drm.dev);
}
+
+int ivpu_pm_dct_init(struct ivpu_device *vdev)
+{
+ if (vdev->pm->dct_active_percent)
+ return ivpu_pm_dct_enable(vdev, vdev->pm->dct_active_percent);
+
+ return 0;
+}
+
+int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
+{
+ u32 active_us, inactive_us;
+ int ret;
+
+ if (active_percent == 0 || active_percent > 100)
+ return -EINVAL;
+
+ active_us = (DCT_PERIOD_US * active_percent) / 100;
+ inactive_us = DCT_PERIOD_US - active_us;
+
+ vdev->pm->dct_active_percent = active_percent;
+
+ ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
+ active_percent, active_us, inactive_us);
+
+ ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
+ if (ret) {
+ ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int ivpu_pm_dct_disable(struct ivpu_device *vdev)
+{
+ int ret;
+
+ vdev->pm->dct_active_percent = 0;
+
+ ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
+
+ ret = ivpu_jsm_dct_disable(vdev);
+ if (ret) {
+ ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
+{
+ struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_dct_work);
+ bool enable;
+ int ret;
+
+ if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
+ return;
+
+ if (enable)
+ ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
+ else
+ ret = ivpu_pm_dct_disable(vdev);
+
+ if (!ret) {
+ /* Convert percent to U1.7 format */
+ u8 val = DIV_ROUND_CLOSEST(vdev->pm->dct_active_percent * 128, 100);
+
+ ivpu_hw_btrs_dct_set_status(vdev, enable, val);
+ }
+
+}
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index ec60fbeefefc..a2aa7a27f32e 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_PM_H__
@@ -19,13 +19,13 @@ struct ivpu_pm_info {
atomic_t reset_counter;
atomic_t reset_pending;
bool is_warmboot;
- u32 suspend_reschedule_counter;
+ u8 dct_active_percent;
};
void ivpu_pm_init(struct ivpu_device *vdev);
void ivpu_pm_enable(struct ivpu_device *vdev);
void ivpu_pm_disable(struct ivpu_device *vdev);
-void ivpu_pm_cancel_recovery(struct ivpu_device *vdev);
+void ivpu_pm_disable_recovery(struct ivpu_device *vdev);
int ivpu_pm_suspend_cb(struct device *dev);
int ivpu_pm_resume_cb(struct device *dev);
@@ -36,11 +36,15 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev);
void ivpu_pm_reset_done_cb(struct pci_dev *pdev);
int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
-int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason);
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
+int ivpu_pm_dct_init(struct ivpu_device *vdev);
+int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent);
+int ivpu_pm_dct_disable(struct ivpu_device *vdev);
+void ivpu_pm_irq_dct_work_fn(struct work_struct *work);
+
#endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
new file mode 100644
index 000000000000..d250a10caca9
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025 Intel Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/units.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_fw.h"
+#include "ivpu_hw.h"
+#include "ivpu_sysfs.h"
+
+/**
+ * DOC: npu_busy_time_us
+ *
+ * npu_busy_time_us is the time that the device spent executing jobs.
+ * The time is counted when and only when there are jobs submitted to firmware.
+ *
+ * This time can be used to measure the utilization of NPU, either by calculating
+ * npu_busy_time_us difference between two timepoints (i.e. measuring the time
+ * that the NPU was active during some workload) or monitoring utilization percentage
+ * by reading npu_busy_time_us periodically.
+ *
+ * When reading the value periodically, it shouldn't be read too often as it may have
+ * an impact on job submission performance. Recommended period is 1 second.
+ */
+static ssize_t
+npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ ktime_t total, now = 0;
+
+ mutex_lock(&vdev->submitted_jobs_lock);
+
+ total = vdev->busy_time;
+ if (!xa_empty(&vdev->submitted_jobs_xa))
+ now = ktime_sub(ktime_get(), vdev->busy_start_ts);
+ mutex_unlock(&vdev->submitted_jobs_lock);
+
+ return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
+}
+
+static DEVICE_ATTR_RO(npu_busy_time_us);
+
+/**
+ * DOC: npu_memory_utilization
+ *
+ * The npu_memory_utilization is used to report in bytes a current NPU memory utilization.
+ *
+ */
+static ssize_t
+npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ struct ivpu_bo *bo;
+ u64 total_npu_memory = 0;
+
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
+ if (ivpu_bo_is_resident(bo))
+ total_npu_memory += ivpu_bo_size(bo);
+ mutex_unlock(&vdev->bo_list_lock);
+
+ return sysfs_emit(buf, "%lld\n", total_npu_memory);
+}
+
+static DEVICE_ATTR_RO(npu_memory_utilization);
+
+/**
+ * DOC: sched_mode
+ *
+ * The sched_mode is used to report current NPU scheduling mode.
+ *
+ * It returns following strings:
+ * - "HW" - Hardware Scheduler mode
+ * - "OS" - Operating System Scheduler mode
+ *
+ */
+static ssize_t
+sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+
+ return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS");
+}
+
+static DEVICE_ATTR_RO(sched_mode);
+
+/**
+ * DOC: npu_max_frequency
+ *
+ * The npu_max_frequency shows maximum frequency in MHz of the NPU's data
+ * processing unit
+ */
+static ssize_t
+npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = ivpu_hw_dpu_max_freq_get(vdev);
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_max_frequency_mhz);
+
+/**
+ * DOC: npu_current_frequency_mhz
+ *
+ * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's
+ * data processing unit
+ */
+static ssize_t
+npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+ u32 freq = 0;
+
+ /* Read frequency only if device is active, otherwise frequency is 0 */
+ if (pm_runtime_get_if_active(vdev->drm.dev) > 0) {
+ freq = ivpu_hw_dpu_freq_get(vdev);
+
+ pm_runtime_put_autosuspend(vdev->drm.dev);
+ }
+
+ return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_current_frequency_mhz);
+
+static struct attribute *ivpu_dev_attrs[] = {
+ &dev_attr_npu_busy_time_us.attr,
+ &dev_attr_npu_memory_utilization.attr,
+ &dev_attr_sched_mode.attr,
+ &dev_attr_npu_max_frequency_mhz.attr,
+ &dev_attr_npu_current_frequency_mhz.attr,
+ NULL,
+};
+
+static struct attribute_group ivpu_dev_attr_group = {
+ .attrs = ivpu_dev_attrs,
+};
+
+void ivpu_sysfs_init(struct ivpu_device *vdev)
+{
+ int ret;
+
+ ret = devm_device_add_group(vdev->drm.dev, &ivpu_dev_attr_group);
+ if (ret)
+ ivpu_warn(vdev, "Failed to add group to device, ret %d", ret);
+}
diff --git a/drivers/accel/ivpu/ivpu_sysfs.h b/drivers/accel/ivpu/ivpu_sysfs.h
new file mode 100644
index 000000000000..9836f09b35a3
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Intel Corporation
+ */
+
+#ifndef __IVPU_SYSFS_H__
+#define __IVPU_SYSFS_H__
+
+#include "ivpu_drv.h"
+
+void ivpu_sysfs_init(struct ivpu_device *vdev);
+
+#endif /* __IVPU_SYSFS_H__ */
diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h
new file mode 100644
index 000000000000..eb792038e701
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_trace.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __IVPU_TRACE_H__
+
+#include <linux/tracepoint.h>
+#include "ivpu_drv.h"
+#include "ivpu_job.h"
+#include "vpu_jsm_api.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_ipc.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vpu
+#define TRACE_INCLUDE_FILE ivpu_trace
+
+TRACE_EVENT(pm,
+ TP_PROTO(const char *event),
+ TP_ARGS(event),
+ TP_STRUCT__entry(__field(const char *, event)),
+ TP_fast_assign(__entry->event = event;),
+ TP_printk("%s", __entry->event)
+);
+
+TRACE_EVENT(job,
+ TP_PROTO(const char *event, struct ivpu_job *job),
+ TP_ARGS(event, job),
+ TP_STRUCT__entry(__field(const char *, event)
+ __field(u32, ctx_id)
+ __field(u32, engine_id)
+ __field(u32, job_id)
+ ),
+ TP_fast_assign(__entry->event = event;
+ __entry->ctx_id = job->file_priv->ctx.id;
+ __entry->engine_id = job->engine_idx;
+ __entry->job_id = job->job_id;),
+ TP_printk("%s context:%d engine:%d job:%d",
+ __entry->event,
+ __entry->ctx_id,
+ __entry->engine_id,
+ __entry->job_id)
+);
+
+TRACE_EVENT(jsm,
+ TP_PROTO(const char *event, struct vpu_jsm_msg *msg),
+ TP_ARGS(event, msg),
+ TP_STRUCT__entry(__field(const char *, event)
+ __field(const char *, type)
+ __field(enum vpu_ipc_msg_status, status)
+ __field(u32, request_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(__entry->event = event;
+ __entry->type = ivpu_jsm_msg_type_to_str(msg->type);
+ __entry->status = msg->status;
+ __entry->request_id = msg->request_id;
+ __entry->result = msg->result;),
+ TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x",
+ __entry->event,
+ __entry->type,
+ __entry->status,
+ __entry->request_id,
+ __entry->result)
+);
+
+#endif /* __IVPU_TRACE_H__ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c
new file mode 100644
index 000000000000..f8fb99de0de3
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_trace_points.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "ivpu_trace.h"
+#endif
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 87cac7bc730a..218468bbbcad 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (c) 2020-2023, Intel Corporation.
+ * Copyright (c) 2020-2024, Intel Corporation.
*/
#ifndef VPU_BOOT_API_H
#define VPU_BOOT_API_H
/*
- * =========== FW API version information beginning ================
- * The bellow values will be used to construct the version info this way:
+ * The below values will be used to construct the version info this way:
* fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
* VPU_BOOT_API_VER_MINOR;
* VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
@@ -27,19 +26,18 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 22
+#define VPU_BOOT_API_VER_MINOR 28
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 0
+#define VPU_BOOT_API_VER_PATCH 3
/*
* Index in the API version table
* Must be unique for each API
*/
#define VPU_BOOT_API_VER_INDEX 0
-/* ------------ FW API version information end ---------------------*/
#pragma pack(push, 4)
@@ -78,8 +76,20 @@ struct vpu_firmware_header {
* submission queue size and device capabilities.
*/
u32 preemption_buffer_2_size;
+ /*
+ * Maximum preemption buffer size that the FW can use: no need for the host
+ * driver to allocate more space than that specified by these fields.
+ * A value of 0 means no declared limit.
+ */
+ u32 preemption_buffer_1_max_size;
+ u32 preemption_buffer_2_max_size;
/* Space reserved for future preemption-related fields. */
- u32 preemption_reserved[6];
+ u32 preemption_reserved[4];
+ /* FW image read only section start address, 4KB aligned */
+ u64 ro_section_start_address;
+ /* FW image read only section size, 4KB aligned */
+ u32 ro_section_size;
+ u32 reserved;
};
/*
@@ -131,7 +141,7 @@ enum vpu_trace_destination {
/*
* Processor bit shifts (for loggable HW components).
*/
-#define VPU_TRACE_PROC_BIT_ARM 0
+#define VPU_TRACE_PROC_BIT_RESERVED 0
#define VPU_TRACE_PROC_BIT_LRT 1
#define VPU_TRACE_PROC_BIT_LNN 2
#define VPU_TRACE_PROC_BIT_SHV_0 3
@@ -159,8 +169,6 @@ enum vpu_trace_destination {
/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST
-#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config {
u8 use;
@@ -194,6 +202,17 @@ struct vpu_warm_boot_section {
*/
#define POWER_PROFILE_SURVIVABILITY 0x1
+/**
+ * Enum for dvfs_mode boot param.
+ */
+enum vpu_governor {
+ VPU_GOV_DEFAULT = 0, /* Default Governor for the system */
+ VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */
+ VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */
+ VPU_GOV_POWER_SAVE = 3, /* Power save governor */
+ VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */
+};
+
struct vpu_boot_params {
u32 magic;
u32 vpu_id;
@@ -296,7 +315,14 @@ struct vpu_boot_params {
u32 temp_sensor_period_ms;
/** PLL ratio for efficient clock frequency */
u32 pn_freq_pll_ratio;
- /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+ /**
+ * DVFS Mode:
+ * 0 - Default, DVFS mode selected by the firmware
+ * 1 - Max Performance
+ * 2 - On Demand
+ * 3 - Power Save
+ * 4 - On Demand Priority Aware
+ */
u32 dvfs_mode;
/**
* Depending on DVFS Mode:
@@ -327,13 +353,20 @@ struct vpu_boot_params {
u64 d0i3_entry_vpu_ts;
/*
* The system time of the host operating system in microseconds.
- * E.g the number of microseconds since 1st of January 1970, or whatever date the
- * host operating system uses to maintain system time.
+ * E.g the number of microseconds since 1st of January 1970, or whatever
+ * date the host operating system uses to maintain system time.
* This value will be used to track system time on the VPU.
* The KMD is required to update this value on every VPU reset.
*/
u64 system_time_us;
- u32 pad4[18];
+ u32 pad4[2];
+ /*
+ * The delta between device monotonic time and the current value of the
+ * HW timestamp register, in ticks. Written by the firmware during boot.
+ * Can be used by the KMD to calculate device time.
+ */
+ u64 device_time_delta_ticks;
+ u32 pad7[14];
/* Warm boot information: 0x400 - 0x43F */
u32 warm_boot_sections_count;
u32 warm_boot_start_address_reference;
@@ -370,10 +403,7 @@ struct vpu_boot_params {
u32 pad6[734];
};
-/*
- * Magic numbers set between host and vpu to detect corruptio of tracing init
- */
-
+/* Magic numbers set between host and vpu to detect corruption of tracing init */
#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
/* Tracing buffer message format definitions */
@@ -393,7 +423,9 @@ struct vpu_tracing_buffer_header {
u32 host_canary_start;
/* offset from start of buffer for trace entries */
u32 read_index;
- u32 pad_to_cache_line_size_0[14];
+ /* keeps track of wrapping on the reader side */
+ u32 read_wrap_count;
+ u32 pad_to_cache_line_size_0[13];
/* End of first cache line */
/**
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index e46f3531211a..bca6a44dc041 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -1,15 +1,16 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (c) 2020-2023, Intel Corporation.
+ * Copyright (c) 2020-2025, Intel Corporation.
+ */
+
+/**
+ * @addtogroup Jsm
+ * @{
*/
/**
* @file
* @brief JSM shared definitions
- *
- * @ingroup Jsm
- * @brief JSM shared definitions
- * @{
*/
#ifndef VPU_JSM_API_H
#define VPU_JSM_API_H
@@ -22,12 +23,12 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 15
+#define VPU_JSM_API_VER_MINOR 33
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_JSM_API_VER_PATCH 6
+#define VPU_JSM_API_VER_PATCH 0
/*
* Index in the API version table
@@ -36,7 +37,7 @@
/*
* Number of Priority Bands for Hardware Scheduling
- * Bands: RealTime, Focus, Normal, Idle
+ * Bands: Idle(0), Normal(1), Focus(2), RealTime(3)
*/
#define VPU_HWS_NUM_PRIORITY_BANDS 4
@@ -53,8 +54,7 @@
* Engine indexes.
*/
#define VPU_ENGINE_COMPUTE 0
-#define VPU_ENGINE_COPY 1
-#define VPU_ENGINE_NB 2
+#define VPU_ENGINE_NB 1
/*
* VPU status values.
@@ -72,8 +72,15 @@
#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES 0xAU
#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED 0xBU
#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU
-/* Job status returned when the job was preempted mid-inference */
+/* @deprecated (use VPU_JSM_STATUS_PREEMPTED_MID_COMMAND instead) */
#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU
+/* Job status returned when the job was preempted mid-command */
+#define VPU_JSM_STATUS_PREEMPTED_MID_COMMAND 0xDU
+/* Range of status codes that require engine reset */
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN 0xEU
+#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
+#define VPU_JSM_STATUS_MVNCI_PREEMPTION_TIMED_OUT 0xFU
+#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX 0x1FU
/*
* Host <-> VPU IPC channels.
@@ -86,18 +93,70 @@
/*
* Job flags bit masks.
*/
-#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
-#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000
-
-/*
- * Sizes of the reserved areas in jobs, in bytes.
- */
-#define VPU_JOB_RESERVED_BYTES 8
+enum {
+ /*
+ * Null submission mask.
+ * When set, batch buffer's commands are not processed but returned as
+ * successful immediately, except fences and timestamps.
+ * When cleared, batch buffer's commands are processed normally.
+ * Used for testing and profiling purposes.
+ */
+ VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U),
+ /*
+ * Inline command mask.
+ * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below).
+ * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below).
+ */
+ VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U),
+ /*
+ * VPU private data mask.
+ * Reserved for the VPU to store private data about the job (or inline command)
+ * while being processed.
+ */
+ VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U
+};
/*
- * Sizes of the reserved areas in job queues, in bytes.
+ * Job queue flags bit masks.
*/
-#define VPU_JOB_QUEUE_RESERVED_BYTES 52
+enum {
+ /*
+ * No job done notification mask.
+ * When set, indicates that no job done notification should be sent for any
+ * job from this queue. When cleared, indicates that job done notification
+ * should be sent for every job completed from this queue.
+ */
+ VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U),
+ /*
+ * Native fence usage mask.
+ * When set, indicates that job queue uses native fences (as inline commands
+ * in job queue). Such queues may also use legacy fences (as commands in batch buffers).
+ * When cleared, indicates the job queue only uses legacy fences.
+ * NOTES:
+ * 1. For queues using native fences, VPU expects that all jobs in the queue
+ * are immediately followed by an inline command object. This object is expected
+ * to be a fence signal command in most cases, but can also be a NOP in case the host
+ * does not need per-job fence signalling. Other inline commands objects can be
+ * inserted between "job and inline command" pairs.
+ * 2. Native fence queues are only supported on VPU 40xx onwards.
+ */
+ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
+ /*
+ * Enable turbo mode for testing NPU performance; not recommended for regular usage.
+ */
+ VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U),
+ /*
+ * Queue error detection mode flag
+ * For 'interactive' queues (this bit not set), the FW will identify queues that have not
+ * completed a job inside the TDR timeout as in error as part of engine reset sequence.
+ * For 'non-interactive' queues (this bit set), the FW will identify queues that have not
+ * progressed the heartbeat inside the non-interactive no-progress timeout as in error as
+ * part of engine reset sequence. Additionally, there is an upper limit applied to these
+ * queues: even if they progress the heartbeat, if they run longer than non-interactive
+ * timeout, then the FW will also identify them as in error.
+ */
+ VPU_JOB_QUEUE_FLAGS_NON_INTERACTIVE = (1 << 3U)
+};
/*
* Max length (including trailing NULL char) of trace entity name (e.g., the
@@ -141,33 +200,140 @@
#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
/*
+ * Inline commands types.
+ */
+/*
+ * NOP.
+ * VPU does nothing other than consuming the inline command object.
+ */
+#define VPU_INLINE_CMD_TYPE_NOP 0x0
+/*
+ * Fence wait.
+ * VPU waits for the fence current value to reach monitored value.
+ * Fence wait operations are executed upon job dispatching. While waiting for
+ * the fence to be satisfied, VPU blocks fetching of the next objects in the queue.
+ * Jobs present in the queue prior to the fence wait object may be processed
+ * concurrently.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1
+/*
+ * Fence signal.
+ * VPU sets the fence current value to the provided value. If new current value
+ * is equal to or higher than monitored value, VPU sends fence signalled notification
+ * to the host. Fence signal operations are executed upon completion of all the jobs
+ * present in the queue prior to them, and in-order relative to each other in the queue.
+ * But jobs in-between them may be processed concurrently and may complete out-of-order.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2
+
+/**
+ * Job scheduling priority bands for both hardware scheduling and OS scheduling.
+ */
+enum vpu_job_scheduling_priority_band {
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4,
+};
+
+/**
* Job format.
+ * Jobs defines the actual workloads to be executed by a given engine.
*/
struct vpu_job_queue_entry {
- u64 batch_buf_addr; /**< Address of VPU commands batch buffer */
- u32 job_id; /**< Job ID */
- u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
- u64 root_page_table_addr; /**< Address of root page table to use for this job */
- u64 root_page_table_update_counter; /**< Page tables update events counter */
+ /** Address of VPU commands batch buffer */
+ u64 batch_buf_addr;
+ /** Job ID */
+ u32 job_id;
+ /** Flags bit field, see VPU_JOB_FLAGS_* above */
+ u32 flags;
+ /**
+ * Doorbell ring timestamp taken by KMD from SoC's global system clock, in
+ * microseconds. NPU can convert this value to its own fixed clock's timebase,
+ * to match other profiling timestamps.
+ */
+ u64 doorbell_timestamp;
+ /** Extra id for job tracking, used only in the firmware perf traces */
+ u64 host_tracking_id;
+ /** Address of the primary preemption buffer to use for this job */
u64 primary_preempt_buf_addr;
- /**< Address of the primary preemption buffer to use for this job */
+ /** Size of the primary preemption buffer to use for this job */
u32 primary_preempt_buf_size;
- /**< Size of the primary preemption buffer to use for this job */
+ /** Size of secondary preemption buffer to use for this job */
u32 secondary_preempt_buf_size;
- /**< Size of secondary preemption buffer to use for this job */
+ /** Address of secondary preemption buffer to use for this job */
u64 secondary_preempt_buf_addr;
- /**< Address of secondary preemption buffer to use for this job */
- u8 reserved_0[VPU_JOB_RESERVED_BYTES];
+ u64 reserved_0;
};
-/*
+/**
+ * Inline command format.
+ * Inline commands are the commands executed at scheduler level (typically,
+ * synchronization directives). Inline command and job objects must be of
+ * the same size and have flags field at same offset.
+ */
+struct vpu_inline_cmd {
+ u64 reserved_0;
+ /** Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */
+ u32 type;
+ /** Flags bit field, see VPU_JOB_FLAGS_* above. */
+ u32 flags;
+ /** Inline command payload. Depends on inline command type. */
+ union payload {
+ /** Fence (wait and signal) commands' payload. */
+ struct fence {
+ /** Fence object handle. */
+ u64 fence_handle;
+ /** User VA of the current fence value. */
+ u64 current_value_va;
+ /** User VA of the monitored fence value (read-only). */
+ u64 monitored_value_va;
+ /** Value to wait for or write in fence location. */
+ u64 value;
+ /** User VA of the log buffer in which to add log entry on completion. */
+ u64 log_buffer_va;
+ /** NPU private data. */
+ u64 npu_private_data;
+ } fence;
+ /**
+ * Other commands do not have a payload:
+ * Payload definition for future inline commands can be inserted here.
+ */
+ u64 reserved_1[6];
+ } payload;
+};
+
+/**
+ * Job queue slots can be populated either with job objects or inline command objects.
+ */
+union vpu_jobq_slot {
+ struct vpu_job_queue_entry job;
+ struct vpu_inline_cmd inline_cmd;
+};
+
+/**
* Job queue control registers.
*/
struct vpu_job_queue_header {
u32 engine_idx;
u32 head;
u32 tail;
- u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES];
+ u32 flags;
+ /** Set to 1 to indicate priority_band field is valid */
+ u32 priority_band_valid;
+ /**
+ * Priority for the work of this job queue, valid only if the HWS is NOT used
+ * and the @ref priority_band_valid is set to 1. It is applied only during
+ * the @ref VPU_JSM_MSG_REGISTER_DB message processing.
+ * The device firmware might use the priority_band to optimize the power
+ * management logic, but it will not affect the order of jobs.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
+ u32 priority_band;
+ /** Inside realtime band assigns a further priority, limited to 0..31 range */
+ u32 realtime_priority_level;
+ u32 reserved_0[9];
};
/*
@@ -175,7 +341,7 @@ struct vpu_job_queue_header {
*/
struct vpu_job_queue {
struct vpu_job_queue_header header;
- struct vpu_job_queue_entry job[];
+ union vpu_jobq_slot slot[];
};
/**
@@ -190,18 +356,16 @@ enum vpu_trace_entity_type {
VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
};
-/*
+/**
* HWS specific log buffer header details.
* Total size is 32 bytes.
*/
struct vpu_hws_log_buffer_header {
- /* Written by VPU after adding a log entry. Initialised by host to 0. */
+ /** Written by VPU after adding a log entry. Initialised by host to 0. */
u32 first_free_entry_index;
- /* Incremented by VPU every time the VPU overwrites the 0th entry;
- * initialised by host to 0.
- */
+ /** Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */
u32 wraparound_count;
- /*
+ /**
* This is the number of buffers that can be stored in the log buffer provided by the host.
* It is written by host before passing buffer to VPU. VPU should consider it read-only.
*/
@@ -209,14 +373,14 @@ struct vpu_hws_log_buffer_header {
u64 reserved[2];
};
-/*
+/**
* HWS specific log buffer entry details.
* Total size is 32 bytes.
*/
struct vpu_hws_log_buffer_entry {
- /* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+ /** VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
u64 vpu_timestamp;
- /*
+ /**
* Operation type:
* 0 - context state change
* 1 - queue new work
@@ -226,21 +390,111 @@ struct vpu_hws_log_buffer_entry {
*/
u32 operation_type;
u32 reserved;
- /* Operation data depends on operation type */
+ /** Operation data depends on operation type */
u64 operation_data[2];
};
+/* Native fence log buffer types. */
+enum vpu_hws_native_fence_log_type {
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1,
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2
+};
+
+/** HWS native fence log buffer header. */
+struct vpu_hws_native_fence_log_header {
+ union {
+ struct {
+ /** Index of the first free entry in buffer. */
+ u32 first_free_entry_idx;
+ /**
+ * Incremented whenever the NPU wraps around the buffer and writes
+ * to the first entry again.
+ */
+ u32 wraparound_count;
+ };
+ /** Field allowing atomic update of both fields above. */
+ u64 atomic_wraparound_and_entry_idx;
+ };
+ /** Log buffer type, see enum vpu_hws_native_fence_log_type. */
+ u64 type;
+ /** Allocated number of entries in the log buffer. */
+ u64 entry_nb;
+ u64 reserved[2];
+};
+
+/** Native fence log operation types. */
+enum vpu_hws_native_fence_log_op {
+ VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0,
+ VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1
+};
+
+/** HWS native fence log entry. */
+struct vpu_hws_native_fence_log_entry {
+ /** Newly signaled/unblocked fence value. */
+ u64 fence_value;
+ /** Native fence object handle to which this operation belongs. */
+ u64 fence_handle;
+ /** Operation type, see enum vpu_hws_native_fence_log_op. */
+ u64 op_type;
+ u64 reserved_0;
+ /**
+ * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence
+ * wait was started (in NPU SysTime).
+ */
+ u64 fence_wait_start_ts;
+ u64 reserved_1;
+ /** Timestamp at which fence operation was completed (in NPU SysTime). */
+ u64 fence_end_ts;
+};
+
+/** Native fence log buffer. */
+struct vpu_hws_native_fence_log_buffer {
+ struct vpu_hws_native_fence_log_header header;
+ struct vpu_hws_native_fence_log_entry entry[];
+};
+
/*
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
+ /** Unsupported command */
VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
- /* IPC Host -> Device, Async commands */
+
+ /** IPC Host -> Device, base id for async commands */
VPU_JSM_MSG_ASYNC_CMD = 0x1100,
+ /**
+ * Reset engine. The NPU cancels all the jobs currently executing on the target
+ * engine making the engine become idle and then does a HW reset, before returning
+ * to the host.
+ * @see struct vpu_ipc_msg_payload_engine_reset
+ */
VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
+ /**
+ * Preempt engine. The NPU stops (preempts) all the jobs currently
+ * executing on the target engine making the engine become idle and ready to
+ * execute new jobs.
+ * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of
+ * the target engine, but it stops processing them (until the queue doorbell
+ * is rung again); the host is responsible to reset the job queue, either
+ * after preemption or when resubmitting jobs to the queue.
+ * @see vpu_ipc_msg_payload_engine_preempt
+ */
VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
+ /**
+ * OS scheduling doorbell register command
+ * @see vpu_ipc_msg_payload_register_db
+ */
VPU_JSM_MSG_REGISTER_DB = 0x1102,
+ /**
+ * OS scheduling doorbell unregister command
+ * @see vpu_ipc_msg_payload_unregister_db
+ */
VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
+ /**
+ * Query engine heartbeat. Heartbeat is expected to increase monotonically
+ * and increase while work is being progressed by NPU.
+ * @see vpu_ipc_msg_payload_query_engine_hb
+ */
VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104,
VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105,
VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106,
@@ -266,6 +520,7 @@ enum vpu_ipc_msg_type {
* aborted and removed from internal scheduling queues. All doorbells assigned
* to the host_ssid are unregistered and any internal FW resources belonging to
* the host_ssid are released.
+ * @see vpu_ipc_msg_payload_ssid_release
*/
VPU_JSM_MSG_SSID_RELEASE = 0x110e,
/**
@@ -293,56 +548,114 @@ enum vpu_ipc_msg_type {
* @see vpu_jsm_metric_streamer_start
*/
VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112,
- /** Control command: Priority band setup */
+ /**
+ * Control command: Priority band setup
+ * @see vpu_ipc_msg_payload_hws_priority_band_setup
+ */
VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113,
- /** Control command: Create command queue */
+ /**
+ * Control command: Create command queue
+ * @see vpu_ipc_msg_payload_hws_create_cmdq
+ */
VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114,
- /** Control command: Destroy command queue */
+ /**
+ * Control command: Destroy command queue
+ * @see vpu_ipc_msg_payload_hws_destroy_cmdq
+ */
VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115,
- /** Control command: Set context scheduling properties */
+ /**
+ * Control command: Set context scheduling properties
+ * @see vpu_ipc_msg_payload_hws_set_context_sched_properties
+ */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116,
- /*
+ /**
* Register a doorbell to notify VPU of new work. The doorbell may later be
* deallocated or reassigned to another context.
+ * @see vpu_jsm_hws_register_db
*/
VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
- /** Control command: Log buffer setting */
+ /**
+ * Control command: Log buffer setting
+ * @see vpu_ipc_msg_payload_hws_set_scheduling_log
+ */
VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
- /* Control command: Suspend command queue. */
+ /**
+ * Control command: Suspend command queue.
+ * @see vpu_ipc_msg_payload_hws_suspend_cmdq
+ */
VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
- /* Control command: Resume command queue */
+ /**
+ * Control command: Resume command queue
+ * @see vpu_ipc_msg_payload_hws_resume_cmdq
+ */
VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
- /* Control command: Resume engine after reset */
+ /**
+ * Control command: Resume engine after reset
+ * @see vpu_ipc_msg_payload_hws_resume_engine
+ */
VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
- /* Control command: Enable survivability/DCT mode */
+ /**
+ * Control command: Enable survivability/DCT mode
+ * @see vpu_ipc_msg_payload_pwr_dct_control
+ */
VPU_JSM_MSG_DCT_ENABLE = 0x111c,
- /* Control command: Disable survivability/DCT mode */
+ /**
+ * Control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE = 0x111d,
/**
* Dump VPU state. To be used for debug purposes only.
- * NOTE: Please introduce new ASYNC commands before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC commands before this one.
*/
VPU_JSM_MSG_STATE_DUMP = 0x11FF,
- /* IPC Host -> Device, General commands */
+
+ /** IPC Host -> Device, base id for general commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
- VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
+ /** Unsupported command */
+ VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD,
/**
* Control dyndbg behavior by executing a dyndbg command; equivalent to
- * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
+ * Linux command:
+ * @verbatim echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control @endverbatim
+ * @see vpu_ipc_msg_payload_dyndbg_control
*/
VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
/**
* Perform the save procedure for the D0i3 entry
*/
VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
- /* IPC Device -> Host, Job completion */
+
+ /**
+ * IPC Device -> Host, Job completion
+ * @see struct vpu_ipc_msg_payload_job_done
+ */
VPU_JSM_MSG_JOB_DONE = 0x2100,
+ /**
+ * IPC Device -> Host, Fence signalled
+ * @see vpu_ipc_msg_payload_native_fence_signalled
+ */
+ VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101,
+
/* IPC Device -> Host, Async command completion */
VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
+ /**
+ * IPC Device -> Host, engine reset complete
+ * @see vpu_ipc_msg_payload_engine_reset_done
+ */
VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
+ /**
+ * Preempt complete message
+ * @see vpu_ipc_msg_payload_engine_preempt_done
+ */
VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201,
VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202,
VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203,
+ /**
+ * Response to query engine heartbeat.
+ * @see vpu_ipc_msg_payload_query_engine_hb_done
+ */
VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204,
VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205,
VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206,
@@ -359,7 +672,10 @@ enum vpu_ipc_msg_type {
VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c,
/** Response to VPU_JSM_MSG_TRACE_GET_NAME. */
VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d,
- /** Response to VPU_JSM_MSG_SSID_RELEASE. */
+ /**
+ * Response to VPU_JSM_MSG_SSID_RELEASE.
+ * @see vpu_ipc_msg_payload_ssid_release
+ */
VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e,
/**
* Response to VPU_JSM_MSG_METRIC_STREAMER_START.
@@ -389,39 +705,74 @@ enum vpu_ipc_msg_type {
/**
* Asynchronous event sent from the VPU to the host either when the current
* metric buffer is full or when the VPU has collected a multiple of
- * @notify_sample_count samples as indicated through the start command
- * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected
- * metric data.
+ * @ref vpu_jsm_metric_streamer_start::notify_sample_count samples as indicated
+ * through the start command (VPU_JSM_MSG_METRIC_STREAMER_START). Returns
+ * information about collected metric data.
* @see vpu_jsm_metric_streamer_done
*/
VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213,
- /** Response to control command: Priority band setup */
+ /**
+ * Response to control command: Priority band setup
+ * @see vpu_ipc_msg_payload_hws_priority_band_setup
+ */
VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214,
- /** Response to control command: Create command queue */
+ /**
+ * Response to control command: Create command queue
+ * @see vpu_ipc_msg_payload_hws_create_cmdq_rsp
+ */
VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215,
- /** Response to control command: Destroy command queue */
+ /**
+ * Response to control command: Destroy command queue
+ * @see vpu_ipc_msg_payload_hws_destroy_cmdq
+ */
VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
- /** Response to control command: Set context scheduling properties */
+ /**
+ * Response to control command: Set context scheduling properties
+ * @see vpu_ipc_msg_payload_hws_set_context_sched_properties
+ */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
- /** Response to control command: Log buffer setting */
+ /**
+ * Response to control command: Log buffer setting
+ * @see vpu_ipc_msg_payload_hws_set_scheduling_log
+ */
VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
- /* IPC Device -> Host, HWS notify index entry of log buffer written */
+ /**
+ * IPC Device -> Host, HWS notify index entry of log buffer written
+ * @see vpu_ipc_msg_payload_hws_scheduling_log_notification
+ */
VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
- /* IPC Device -> Host, HWS completion of a context suspend request */
+ /**
+ * IPC Device -> Host, HWS completion of a context suspend request
+ * @see vpu_ipc_msg_payload_hws_suspend_cmdq
+ */
VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
- /* Response to control command: Resume command queue */
+ /**
+ * Response to control command: Resume command queue
+ * @see vpu_ipc_msg_payload_hws_resume_cmdq
+ */
VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
- /* Response to control command: Resume engine command response */
+ /**
+ * Response to control command: Resume engine command response
+ * @see vpu_ipc_msg_payload_hws_resume_engine
+ */
VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
- /* Response to control command: Enable survivability/DCT mode */
+ /**
+ * Response to control command: Enable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
- /* Response to control command: Disable survivability/DCT mode */
+ /**
+ * Response to control command: Disable survivability/DCT mode
+ * This command has no payload
+ */
VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
/**
* Response to state dump control command.
- * NOTE: Please introduce new ASYNC responses before this one. *
+ * This command has no payload.
+ * NOTE: Please introduce new ASYNC responses before this one.
*/
VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
+
/* IPC Device -> Host, General command completion */
VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
@@ -436,57 +787,66 @@ enum vpu_ipc_msg_type {
enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
-/*
- * Host <-> LRT IPC message payload definitions
+/**
+ * Engine reset request payload
+ * @see VPU_JSM_MSG_ENGINE_RESET
*/
struct vpu_ipc_msg_payload_engine_reset {
- /* Engine to be reset. */
+ /** Engine to be reset. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
+/**
+ * Engine preemption request struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT
+ */
struct vpu_ipc_msg_payload_engine_preempt {
- /* Engine to be preempted. */
+ /** Engine to be preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
-/*
- * @brief Register doorbell command structure.
+/**
+ * Register doorbell command structure.
* This structure supports doorbell registration for only OS scheduling.
* @see VPU_JSM_MSG_REGISTER_DB
*/
struct vpu_ipc_msg_payload_register_db {
- /* Index of the doorbell to register. */
+ /** Index of the doorbell to register. */
u32 db_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
- /* Virtual address in Global GTT pointing to the start of job queue. */
+ /** Virtual address in Global GTT pointing to the start of job queue. */
u64 jobq_base;
- /* Size of the job queue in bytes. */
+ /** Size of the job queue in bytes. */
u32 jobq_size;
- /* Host sub-stream ID for the context assigned to the doorbell. */
+ /** Host sub-stream ID for the context assigned to the doorbell. */
u32 host_ssid;
};
/**
- * @brief Unregister doorbell command structure.
+ * Unregister doorbell command structure.
* Request structure to unregister a doorbell for both HW and OS scheduling.
* @see VPU_JSM_MSG_UNREGISTER_DB
*/
struct vpu_ipc_msg_payload_unregister_db {
- /* Index of the doorbell to unregister. */
+ /** Index of the doorbell to unregister. */
u32 db_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
+/**
+ * Heartbeat request structure
+ * @see VPU_JSM_MSG_QUERY_ENGINE_HB
+ */
struct vpu_ipc_msg_payload_query_engine_hb {
- /* Engine to return heartbeat value. */
+ /** Engine to return heartbeat value. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -506,10 +866,14 @@ struct vpu_ipc_msg_payload_power_level {
u32 reserved_0;
};
+/**
+ * Structure for requesting ssid release
+ * @see VPU_JSM_MSG_SSID_RELEASE
+ */
struct vpu_ipc_msg_payload_ssid_release {
- /* Host sub-stream ID for the context to be released. */
+ /** Host sub-stream ID for the context to be released. */
u32 host_ssid;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -535,7 +899,7 @@ struct vpu_jsm_metric_streamer_start {
u64 sampling_rate;
/**
* If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message
- * after every @notify_sample_count samples is collected or dropped by the VPU.
+ * after every @ref notify_sample_count samples is collected or dropped by the VPU.
* If set to UINT_MAX the VPU will only generate a notification when the metric
* buffer is full. If set to 0 the VPU will never generate a notification.
*/
@@ -545,9 +909,9 @@ struct vpu_jsm_metric_streamer_start {
* Address and size of the buffer where the VPU will write metric data. The
* VPU writes all counters from enabled metric groups one after another. If
* there is no space left to write data at the next sample period the VPU
- * will switch to the next buffer (@see next_buffer_addr) and will optionally
- * send a notification to the host driver if @notify_sample_count is non-zero.
- * If @next_buffer_addr is NULL the VPU will stop collecting metric data.
+ * will switch to the next buffer (@ref next_buffer_addr) and will optionally
+ * send a notification to the host driver if @ref notify_sample_count is non-zero.
+ * If @ref next_buffer_addr is NULL the VPU will stop collecting metric data.
*/
u64 buffer_addr;
u64 buffer_size;
@@ -577,12 +941,22 @@ struct vpu_jsm_metric_streamer_update {
/** Metric group mask that identifies metric streamer instance. */
u64 metric_group_mask;
/**
- * Address and size of the buffer where the VPU will write metric data. If
- * the buffer address is 0 or same as the currently used buffer the VPU will
- * continue writing metric data to the current buffer. In this case the
- * buffer size is ignored and the size of the current buffer is unchanged.
- * If the address is non-zero and differs from the current buffer address the
- * VPU will immediately switch data collection to the new buffer.
+ * Address and size of the buffer where the VPU will write metric data.
+ * This member dictates how the update operation should perform:
+ * 1. client needs information about the number of collected samples and the
+ * amount of data written to the current buffer
+ * 2. client wants to switch to a new buffer
+ *
+ * Case 1. is identified by the buffer address being 0 or the same as the
+ * currently used buffer address. In this case the buffer size is ignored and
+ * the size of the current buffer is unchanged. The VPU will return an update
+ * in the vpu_jsm_metric_streamer_done structure. The internal writing position
+ * into the buffer is not changed.
+ *
+ * Case 2. is identified by the address being non-zero and differs from the
+ * current buffer address. The VPU will immediately switch data collection to
+ * the new buffer. Then the VPU will return an update in the
+ * vpu_jsm_metric_streamer_done structure.
*/
u64 buffer_addr;
u64 buffer_size;
@@ -600,53 +974,80 @@ struct vpu_jsm_metric_streamer_update {
u64 next_buffer_size;
};
-struct vpu_ipc_msg_payload_blob_deinit {
- /* 64-bit unique ID for the blob to be de-initialized. */
- u64 blob_id;
-};
-
+/**
+ * Device -> host job completion message.
+ * @see VPU_JSM_MSG_JOB_DONE
+ */
struct vpu_ipc_msg_payload_job_done {
- /* Engine to which the job was submitted. */
+ /** Engine to which the job was submitted. */
u32 engine_idx;
- /* Index of the doorbell to which the job was submitted */
+ /** Index of the doorbell to which the job was submitted */
u32 db_idx;
- /* ID of the completed job */
+ /** ID of the completed job */
u32 job_id;
- /* Status of the completed job */
+ /** Status of the completed job */
u32 job_status;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
+ u64 cmdq_id;
+};
+
+/**
+ * Notification message upon native fence signalling.
+ * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED
+ */
+struct vpu_ipc_msg_payload_native_fence_signalled {
+ /** Engine ID. */
+ u32 engine_idx;
+ /** Host SSID. */
+ u32 host_ssid;
+ /** CMDQ ID */
u64 cmdq_id;
+ /** Fence object handle. */
+ u64 fence_handle;
};
+/**
+ * vpu_ipc_msg_payload_engine_reset_done will contain an array of this structure
+ * which contains which queues caused reset if FW was able to detect any error.
+ * @see vpu_ipc_msg_payload_engine_reset_done
+ */
struct vpu_jsm_engine_reset_context {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /* See VPU_ENGINE_RESET_CONTEXT_* defines */
+ /** See VPU_ENGINE_RESET_CONTEXT_* defines */
u64 flags;
};
+/**
+ * Engine reset response.
+ * @see VPU_JSM_MSG_ENGINE_RESET_DONE
+ */
struct vpu_ipc_msg_payload_engine_reset_done {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Number of impacted contexts */
+ /** Number of impacted contexts */
u32 num_impacted_contexts;
- /* Array of impacted command queue ids and their flags */
+ /** Array of impacted command queue ids and their flags */
struct vpu_jsm_engine_reset_context
impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS];
};
+/**
+ * Preemption response struct
+ * @see VPU_JSM_MSG_ENGINE_PREEMPT_DONE
+ */
struct vpu_ipc_msg_payload_engine_preempt_done {
- /* Engine preempted. */
+ /** Engine preempted. */
u32 engine_idx;
- /* ID of the preemption request. */
+ /** ID of the preemption request. */
u32 preempt_id;
};
@@ -675,12 +1076,16 @@ struct vpu_ipc_msg_payload_unregister_db_done {
u32 reserved_0;
};
+/**
+ * Structure for heartbeat response
+ * @see VPU_JSM_MSG_QUERY_ENGINE_HB_DONE
+ */
struct vpu_ipc_msg_payload_query_engine_hb_done {
- /* Engine returning heartbeat value. */
+ /** Engine returning heartbeat value. */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
- /* Heartbeat value. */
+ /** Heartbeat value. */
u64 heartbeat;
};
@@ -700,12 +1105,10 @@ struct vpu_ipc_msg_payload_get_power_level_count_done {
u8 power_limit[16];
};
-struct vpu_ipc_msg_payload_blob_deinit_done {
- /* 64-bit unique ID for the blob de-initialized. */
- u64 blob_id;
-};
-
-/* HWS priority band setup request / response */
+/**
+ * HWS priority band setup request / response
+ * @see VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP
+ */
struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Grace period in 100ns units when preempting another priority band for
@@ -715,6 +1118,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Default quantum in 100ns units for scheduling across processes
* within a priority band
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
@@ -727,17 +1131,27 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* in situations when it's starved by the focus band.
*/
u32 normal_band_percentage;
- /* Reserved */
- u32 reserved_0;
+ /*
+ * TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
+ */
+ u32 tdr_timeout;
+ /* Non-interactive queue timeout for no progress of heartbeat in milliseconds.
+ * Default value of 0 meaning no timeout.
+ */
+ u32 non_interactive_no_progress_timeout;
+ /*
+ * Non-interactive queue upper limit timeout value in milliseconds. Default
+ * value of 0 meaning no timeout.
+ */
+ u32 non_interactive_timeout;
};
-/*
+/**
* @brief HWS create command queue request.
* Host will create a command queue via this command.
* Note: Cmdq group is a handle of an object which
* may contain one or more command queues.
* @see VPU_JSM_MSG_CREATE_CMD_QUEUE
- * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
*/
struct vpu_ipc_msg_payload_hws_create_cmdq {
/* Process id */
@@ -758,60 +1172,73 @@ struct vpu_ipc_msg_payload_hws_create_cmdq {
u32 reserved_0;
};
-/*
- * @brief HWS create command queue response.
- * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+/**
+ * HWS create command queue response.
* @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
*/
struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
- /* Process id */
+ /** Process id */
u64 process_id;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Engine for which queue is being created */
+ /** Engine for which queue is being created */
u32 engine_idx;
- /* Command queue group */
+ /** Command queue group */
u64 cmdq_group;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/* HWS destroy command queue request / response */
+/**
+ * HWS destroy command queue request / response
+ * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE
+ * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_destroy_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/* HWS set context scheduling properties request / response */
+/**
+ * HWS set context scheduling properties request / response
+ * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES
+ * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP
+ */
struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /* Priority band to assign to work of this context */
+ /**
+ * Priority band to assign to work of this context.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
u32 priority_band;
- /* Inside realtime band assigns a further priority */
+ /** Inside realtime band assigns a further priority */
u32 realtime_priority_level;
- /* Priority relative to other contexts in the same process */
+ /** Priority relative to other contexts in the same process */
s32 in_process_priority;
- /* Zero padding / Reserved */
+ /** Zero padding / Reserved */
u32 reserved_1;
- /* Context quantum relative to other contexts of same priority in the same process */
+ /**
+ * Context quantum relative to other contexts of same priority in the same process
+ * Minimum value supported by NPU is 1ms (10000 in 100ns units).
+ */
u64 context_quantum;
- /* Grace period when preempting context of the same priority within the same process */
+ /** Grace period when preempting context of the same priority within the same process */
u64 grace_period_same_priority;
- /* Grace period when preempting context of a lower priority within the same process */
+ /** Grace period when preempting context of a lower priority within the same process */
u64 grace_period_lower_priority;
};
-/*
- * @brief Register doorbell command structure.
+/**
+ * Register doorbell command structure.
* This structure supports doorbell registration for both HW and OS scheduling.
* Note: Queue base and size are added here so that the same structure can be used for
* OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored
@@ -820,27 +1247,27 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
* @see VPU_JSM_MSG_HWS_REGISTER_DB
*/
struct vpu_jsm_hws_register_db {
- /* Index of the doorbell to register. */
+ /** Index of the doorbell to register. */
u32 db_id;
- /* Host sub-stream ID for the context assigned to the doorbell. */
+ /** Host sub-stream ID for the context assigned to the doorbell. */
u32 host_ssid;
- /* ID of the command queue associated with the doorbell. */
+ /** ID of the command queue associated with the doorbell. */
u64 cmdq_id;
- /* Virtual address pointing to the start of command queue. */
+ /** Virtual address pointing to the start of command queue. */
u64 cmdq_base;
- /* Size of the command queue in bytes. */
+ /** Size of the command queue in bytes. */
u64 cmdq_size;
};
-/*
- * @brief Structure to set another buffer to be used for scheduling-related logging.
+/**
+ * Structure to set another buffer to be used for scheduling-related logging.
* The size of the logging buffer and the number of entries is defined as part of the
* buffer itself as described next.
* The log buffer received from the host is made up of;
- * - header: 32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ * - header: 32 bytes in size, as shown in @ref vpu_hws_log_buffer_header.
* The header contains the number of log entries in the buffer.
* - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in
- * 'struct vpu_hws_log_buffer_entry'.
+ * @ref vpu_hws_log_buffer_entry.
* The entry contains the VPU timestamp, operation type and data.
* The host should provide the notify index value of log buffer to VPU. This is a
* value defined within the log buffer and when written to will generate the
@@ -854,24 +1281,30 @@ struct vpu_jsm_hws_register_db {
* @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
*/
struct vpu_ipc_msg_payload_hws_set_scheduling_log {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /*
+ /**
* VPU log buffer virtual address.
* Set to 0 to disable logging for this engine.
*/
u64 vpu_log_buffer_va;
- /*
+ /**
* Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
* is generated when an event log is written to this index.
*/
u64 notify_index;
+ /**
+ * Field is now deprecated, will be removed when KMD is updated to support removal
+ */
+ u32 enable_extra_events;
+ /** Zero Padding */
+ u32 reserved_0;
};
-/*
- * @brief The scheduling log notification is generated by VPU when it writes
+/**
+ * The scheduling log notification is generated by VPU when it writes
* an event into the log buffer at the notify_index. VPU notifies host with
* VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
* message from VPU to host.
@@ -879,14 +1312,14 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log {
* @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
*/
struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
- /* Engine ordinal */
+ /** Engine ordinal */
u32 engine_idx;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
};
-/*
- * @brief HWS suspend command queue request and done structure.
+/**
+ * HWS suspend command queue request and done structure.
* Host will request the suspend of contexts and VPU will;
* - Suspend all work on this context
* - Preempt any running work
@@ -905,21 +1338,21 @@ struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
* @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
*/
struct vpu_ipc_msg_payload_hws_suspend_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
- /*
+ /**
* Suspend fence value - reported by the VPU suspend context
* completed once suspend is complete.
*/
u64 suspend_fence_value;
};
-/*
- * @brief HWS Resume command queue request / response structure.
+/**
+ * HWS Resume command queue request / response structure.
* Host will request the resume of a context;
* - VPU will resume all work on this context
* - Scheduler will allow this context to be scheduled
@@ -927,25 +1360,25 @@ struct vpu_ipc_msg_payload_hws_suspend_cmdq {
* @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
*/
struct vpu_ipc_msg_payload_hws_resume_cmdq {
- /* Host SSID */
+ /** Host SSID */
u32 host_ssid;
- /* Zero Padding */
+ /** Zero Padding */
u32 reserved_0;
- /* Command queue id */
+ /** Command queue id */
u64 cmdq_id;
};
-/*
- * @brief HWS Resume engine request / response structure.
- * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+/**
+ * HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until an engine resume.
* Host shall send this command to resume scheduling of any valid queue.
- * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_ENGINE_RESUME
* @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
*/
struct vpu_ipc_msg_payload_hws_resume_engine {
- /* Engine to be resumed */
+ /** Engine to be resumed */
u32 engine_idx;
- /* Reserved */
+ /** Reserved */
u32 reserved_0;
};
@@ -1079,7 +1512,7 @@ struct vpu_jsm_metric_streamer_done {
/**
* Metric group description placed in the metric buffer after successful completion
* of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more
- * @vpu_jsm_metric_counter_descriptor records.
+ * @ref vpu_jsm_metric_counter_descriptor records.
* @see VPU_JSM_MSG_METRIC_STREAMER_INFO
*/
struct vpu_jsm_metric_group_descriptor {
@@ -1166,29 +1599,24 @@ struct vpu_jsm_metric_counter_descriptor {
};
/**
- * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests.
+ * Payload for @ref VPU_JSM_MSG_DYNDBG_CONTROL requests.
*
- * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug
- * feature, which allows developers to selectively enable / disable MVLOG_DEBUG
- * messages. This is equivalent to the Dynamic Debug functionality provided by
- * Linux
- * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html)
- * The host can control Dynamic Debug behavior by sending dyndbg commands, which
- * have the same syntax as Linux
- * dyndbg commands.
+ * VPU_JSM_MSG_DYNDBG_CONTROL requests are used to control the VPU FW dynamic debug
+ * feature, which allows developers to selectively enable/disable code to obtain
+ * additional FW information. This is equivalent to the dynamic debug functionality
+ * provided by Linux. The host can control dynamic debug behavior by sending dyndbg
+ * commands, using the same syntax as for Linux dynamic debug commands.
*
- * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host
- * still has to set the logging level to MVLOG_DEBUG, using the
- * VPU_JSM_MSG_TRACE_SET_CONFIG command.
+ * @see https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html.
*
- * The host can see the current dynamic debug configuration by executing a
- * special 'show' command. The dyndbg configuration will be printed to the
- * configured logging destination using MVLOG_INFO logging level.
+ * NOTE:
+ * As the dynamic debug feature uses MVLOG messages to provide information, the host
+ * must first set the logging level to MVLOG_DEBUG, using the @ref VPU_JSM_MSG_TRACE_SET_CONFIG
+ * command.
*/
struct vpu_ipc_msg_payload_dyndbg_control {
/**
- * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated
- * string.
+ * Dyndbg command to be executed.
*/
char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
};
@@ -1209,7 +1637,7 @@ struct vpu_ipc_msg_payload_pwr_d0i3_enter {
};
/**
- * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ * Payload for @ref VPU_JSM_MSG_DCT_ENABLE message.
*
* Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
* corresponding to a 85% duty cycle. This payload allows the host to tune these
@@ -1235,10 +1663,10 @@ union vpu_ipc_msg_payload {
struct vpu_jsm_metric_streamer_start metric_streamer_start;
struct vpu_jsm_metric_streamer_stop metric_streamer_stop;
struct vpu_jsm_metric_streamer_update metric_streamer_update;
- struct vpu_ipc_msg_payload_blob_deinit blob_deinit;
struct vpu_ipc_msg_payload_ssid_release ssid_release;
struct vpu_jsm_hws_register_db hws_register_db;
struct vpu_ipc_msg_payload_job_done job_done;
+ struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled;
struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done;
struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done;
struct vpu_ipc_msg_payload_register_db_done register_db_done;
@@ -1246,7 +1674,6 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done;
struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done;
struct vpu_jsm_metric_streamer_done metric_streamer_done;
- struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done;
struct vpu_ipc_msg_payload_trace_config trace_config;
struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability;
struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
@@ -1267,28 +1694,28 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
};
-/*
- * Host <-> LRT IPC message base structure.
+/**
+ * Host <-> NPU IPC message base structure.
*
* NOTE: All instances of this object must be aligned on a 64B boundary
* to allow proper handling of VPU cache operations.
*/
struct vpu_jsm_msg {
- /* Reserved */
+ /** Reserved */
u64 reserved_0;
- /* Message type, see vpu_ipc_msg_type enum. */
+ /** Message type, see @ref vpu_ipc_msg_type. */
u32 type;
- /* Buffer status, see vpu_ipc_msg_status enum. */
+ /** Buffer status, see @ref vpu_ipc_msg_status. */
u32 status;
- /*
+ /**
* Request ID, provided by the host in a request message and passed
* back by VPU in the response message.
*/
u32 request_id;
- /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
+ /** Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
u32 result;
u64 reserved_1;
- /* Message payload depending on message type, see vpu_ipc_msg_payload union. */
+ /** Message payload depending on message type, see vpu_ipc_msg_payload union. */
union vpu_ipc_msg_payload payload;
};
diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
index a9f866230058..116e42d152ca 100644
--- a/drivers/accel/qaic/Kconfig
+++ b/drivers/accel/qaic/Kconfig
@@ -8,8 +8,8 @@ config DRM_ACCEL_QAIC
depends on DRM_ACCEL
depends on PCI && HAS_IOMEM
depends on MHI_BUS
- depends on MMU
select CRC32
+ select WANT_DEV_COREDUMP
help
Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are
designed to accelerate Deep Learning inference workloads.
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 35e883515629..71f727b74da3 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -10,6 +10,9 @@ qaic-y := \
qaic_control.o \
qaic_data.o \
qaic_drv.o \
+ qaic_ras.o \
+ qaic_ssr.o \
+ qaic_sysfs.o \
qaic_timesync.o \
sahara.o
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index ada9b1eb0787..13a14c6c6168 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -20,6 +20,11 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
module_param(mhi_timeout_ms, uint, 0600);
MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
+static const char *fw_image_paths[FAMILY_MAX] = {
+ [FAMILY_AIC100] = "qcom/aic100/sbl.bin",
+ [FAMILY_AIC200] = "qcom/aic200/sbl.bin",
+};
+
static const struct mhi_channel_config aic100_channels[] = {
{
.name = "QAIC_LOOPBACK",
@@ -405,6 +410,329 @@ static const struct mhi_channel_config aic100_channels[] = {
.auto_queue = false,
.wake_capable = false,
},
+ {
+ .name = "IPCR",
+ .num = 24,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 25,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = true,
+ .wake_capable = false,
+ },
+};
+
+static const struct mhi_channel_config aic200_channels[] = {
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 0,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 1,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 2,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 3,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 6,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 7,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 10,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 11,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 12,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 13,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 14,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 15,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 16,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 17,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TIMESYNC_PERIODIC",
+ .num = 22,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TIMESYNC_PERIODIC",
+ .num = 23,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 24,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 25,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = true,
+ .wake_capable = false,
+ },
};
static struct mhi_event_config aic100_events[] = {
@@ -422,16 +750,44 @@ static struct mhi_event_config aic100_events[] = {
},
};
-static struct mhi_controller_config aic100_config = {
- .max_channels = 128,
- .timeout_ms = 0, /* controlled by mhi_timeout */
- .buf_len = 0,
- .num_channels = ARRAY_SIZE(aic100_channels),
- .ch_cfg = aic100_channels,
- .num_events = ARRAY_SIZE(aic100_events),
- .event_cfg = aic100_events,
- .use_bounce_buf = false,
- .m2_no_db = false,
+static struct mhi_event_config aic200_events[] = {
+ {
+ .num_elements = 32,
+ .irq_moderation_ms = 0,
+ .irq = 0,
+ .channel = U32_MAX,
+ .priority = 1,
+ .mode = MHI_DB_BRST_DISABLE,
+ .data_type = MHI_ER_CTRL,
+ .hardware_event = false,
+ .client_managed = false,
+ .offload_channel = false,
+ },
+};
+
+static struct mhi_controller_config mhi_cntrl_configs[] = {
+ [FAMILY_AIC100] = {
+ .max_channels = 128,
+ .timeout_ms = 0, /* controlled by mhi_timeout */
+ .buf_len = 0,
+ .num_channels = ARRAY_SIZE(aic100_channels),
+ .ch_cfg = aic100_channels,
+ .num_events = ARRAY_SIZE(aic100_events),
+ .event_cfg = aic100_events,
+ .use_bounce_buf = false,
+ .m2_no_db = false,
+ },
+ [FAMILY_AIC200] = {
+ .max_channels = 128,
+ .timeout_ms = 0, /* controlled by mhi_timeout */
+ .buf_len = 0,
+ .num_channels = ARRAY_SIZE(aic200_channels),
+ .ch_cfg = aic200_channels,
+ .num_events = ARRAY_SIZE(aic200_events),
+ .event_cfg = aic200_events,
+ .use_bounce_buf = false,
+ .m2_no_db = false,
+ },
};
static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out)
@@ -513,8 +869,9 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
}
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq, bool shared_msi)
+ int mhi_irq, bool shared_msi, int family)
{
+ struct mhi_controller_config mhi_config = mhi_cntrl_configs[family];
struct mhi_controller *mhi_cntrl;
int ret;
@@ -549,11 +906,18 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */
mhi_cntrl->irq_flags = IRQF_SHARED;
- mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
+ mhi_cntrl->fw_image = fw_image_paths[family];
+
+ if (family == FAMILY_AIC200) {
+ mhi_cntrl->name = "AIC200";
+ mhi_cntrl->seg_len = SZ_512K;
+ } else {
+ mhi_cntrl->name = "AIC100";
+ }
/* use latest configured timeout */
- aic100_config.timeout_ms = mhi_timeout_ms;
- ret = mhi_register_controller(mhi_cntrl, &aic100_config);
+ mhi_config.timeout_ms = mhi_timeout_ms;
+ ret = mhi_register_controller(mhi_cntrl, &mhi_config);
if (ret) {
pci_err(pci_dev, "mhi_register_controller failed %d\n", ret);
return ERR_PTR(ret);
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
index 500e7f4af2af..8939f6ae185e 100644
--- a/drivers/accel/qaic/mhi_controller.h
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -8,7 +8,7 @@
#define MHICONTROLLERQAIC_H_
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq, bool shared_msi);
+ int mhi_irq, bool shared_msi, int family);
void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 02561b6cecc6..fa7a8155658c 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -21,6 +21,7 @@
#define QAIC_DBC_BASE SZ_128K
#define QAIC_DBC_SIZE SZ_4K
+#define QAIC_SSR_DBC_SENTINEL U32_MAX /* No ongoing SSR sentinel */
#define QAIC_NO_PARTITION -1
@@ -32,6 +33,12 @@
#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev)
+enum aic_families {
+ FAMILY_AIC100,
+ FAMILY_AIC200,
+ FAMILY_MAX,
+};
+
enum __packed dev_states {
/* Device is offline or will be very soon */
QAIC_OFFLINE,
@@ -41,6 +48,22 @@ enum __packed dev_states {
QAIC_ONLINE,
};
+enum dbc_states {
+ /* DBC is free and can be activated */
+ DBC_STATE_IDLE,
+ /* DBC is activated and a workload is running on device */
+ DBC_STATE_ASSIGNED,
+ /* Sub-system associated with this workload has crashed and it will shutdown soon */
+ DBC_STATE_BEFORE_SHUTDOWN,
+ /* Sub-system associated with this workload has crashed and it has shutdown */
+ DBC_STATE_AFTER_SHUTDOWN,
+ /* Sub-system associated with this workload is shutdown and it will be powered up soon */
+ DBC_STATE_BEFORE_POWER_UP,
+ /* Sub-system associated with this workload is now powered up */
+ DBC_STATE_AFTER_POWER_UP,
+ DBC_STATE_MAX,
+};
+
extern bool datapath_polling;
struct qaic_user {
@@ -91,6 +114,8 @@ struct dma_bridge_chan {
* response queue's head and tail pointer of this DBC.
*/
void __iomem *dbc_base;
+ /* Synchronizes access to Request queue's head and tail pointer */
+ struct mutex req_lock;
/* Head of list where each node is a memory handle queued in request queue */
struct list_head xfer_list;
/* Synchronizes DBC readers during cleanup */
@@ -106,6 +131,8 @@ struct dma_bridge_chan {
unsigned int irq;
/* Polling work item to simulate interrupts */
struct work_struct poll_work;
+ /* Represents various states of this DBC from enum dbc_states */
+ unsigned int state;
};
struct qaic_device {
@@ -113,10 +140,10 @@ struct qaic_device {
struct pci_dev *pdev;
/* Req. ID of request that will be queued next in MHI control device */
u32 next_seq_num;
- /* Base address of bar 0 */
- void __iomem *bar_0;
- /* Base address of bar 2 */
- void __iomem *bar_2;
+ /* Base address of the MHI bar */
+ void __iomem *bar_mhi;
+ /* Base address of the DBCs bar */
+ void __iomem *bar_dbc;
/* Controller structure for MHI devices */
struct mhi_controller *mhi_cntrl;
/* MHI control channel device */
@@ -153,6 +180,8 @@ struct qaic_device {
struct mhi_device *qts_ch;
/* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */
struct workqueue_struct *qts_wq;
+ /* MHI "QAIC_TIMESYNC_PERIODIC" channel device */
+ struct mhi_device *mqts_ch;
/* Head of list of page allocated by MHI bootlog device */
struct list_head bootlog;
/* MHI bootlog channel device */
@@ -161,6 +190,22 @@ struct qaic_device {
struct workqueue_struct *bootlog_wq;
/* Synchronizes access of pages in MHI bootlog device */
struct mutex bootlog_mutex;
+ /* MHI RAS channel device */
+ struct mhi_device *ras_ch;
+ /* Correctable error count */
+ unsigned int ce_count;
+ /* Un-correctable error count */
+ unsigned int ue_count;
+ /* Un-correctable non-fatal error count */
+ unsigned int ue_nf_count;
+ /* MHI SSR channel device */
+ struct mhi_device *ssr_ch;
+ /* Work queue for tasks related to MHI SSR device */
+ struct workqueue_struct *ssr_wq;
+ /* Buffer to collect SSR crashdump via SSR MHI channel */
+ void *ssr_mhi_buf;
+ /* DBC which is under SSR. Sentinel U32_MAX would mean that no SSR in progress */
+ u32 ssr_dbc;
};
struct qaic_drm_device {
@@ -179,6 +224,8 @@ struct qaic_drm_device {
struct list_head users;
/* Synchronizes access to users list */
struct mutex users_mutex;
+ /* Pointer to array of DBC sysfs attributes */
+ void *sysfs_attrs;
};
struct qaic_bo {
@@ -207,8 +254,6 @@ struct qaic_bo {
bool sliced;
/* Request ID of this BO if it is queued for execution */
u16 req_id;
- /* Handle assigned to this BO */
- u32 handle;
/* Wait on this for completion of DMA transfer of this BO */
struct completion xfer_done;
/*
@@ -303,6 +348,13 @@ int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm
int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
-void irq_polling_work(struct work_struct *work);
+void qaic_irq_polling_work(struct work_struct *work);
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id);
+void qaic_dbc_exit_ssr(struct qaic_device *qdev);
+
+/* qaic_sysfs.c */
+int qaic_sysfs_init(struct qaic_drm_device *qddev);
+void qaic_sysfs_remove(struct qaic_drm_device *qddev);
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state);
#endif /* _QAIC_H_ */
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 9e8a8cbadf6b..428d8f65bff3 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -17,6 +17,7 @@
#include <linux/overflow.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/workqueue.h>
@@ -30,7 +31,7 @@
#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */
#define QAIC_DBC_Q_GAP SZ_256
#define QAIC_DBC_Q_BUF_ALIGN SZ_4K
-#define QAIC_MANAGE_EXT_MSG_LENGTH SZ_64K /* Max DMA message length */
+#define QAIC_MANAGE_WIRE_MSG_LENGTH SZ_64K /* Max DMA message length */
#define QAIC_WRAPPER_MAX_SIZE SZ_4K
#define QAIC_MHI_RETRY_WAIT_MS 100
#define QAIC_MHI_RETRY_MAX 20
@@ -309,6 +310,7 @@ static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resou
enable_dbc(qdev, dbc_id, usr);
qdev->dbc[dbc_id].in_use = true;
resources->buf = NULL;
+ set_dbc_state(qdev, dbc_id, DBC_STATE_ASSIGNED);
}
}
@@ -367,7 +369,7 @@ static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrap
if (in_trans->hdr.len % 8 != 0)
return -EINVAL;
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_EXT_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers,
@@ -407,7 +409,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
return -EINVAL;
remaining = in_trans->size - resources->xferred_dma_size;
if (remaining == 0)
- return 0;
+ return -EINVAL;
if (check_add_overflow(xfer_start_addr, remaining, &end))
return -EINVAL;
@@ -495,8 +497,8 @@ static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wr
nents = sgt->nents;
nents_dma = nents;
- *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
- for_each_sgtable_sg(sgt, sg, i) {
+ *size = QAIC_MANAGE_WIRE_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
+ for_each_sgtable_dma_sg(sgt, sg, i) {
*size -= sizeof(*asp);
/* Save 1K for possible follow-up transactions. */
if (*size < SZ_1K) {
@@ -576,7 +578,7 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
/* There should be enough space to hold at least one ASP entry. */
if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) >
- QAIC_MANAGE_EXT_MSG_LENGTH)
+ QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOMEM;
xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
@@ -645,7 +647,7 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
if (!in_trans->queue_size)
@@ -655,8 +657,9 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
return -EINVAL;
nelem = in_trans->queue_size;
- size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem;
- if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size())
+ if (check_mul_overflow((u32)(get_dbc_req_elem_size() + get_dbc_rsp_elem_size()),
+ nelem,
+ &size))
return -EINVAL;
if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size)
@@ -729,7 +732,7 @@ static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_l
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
@@ -810,7 +813,7 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
}
if (ret)
- break;
+ goto out;
}
if (user_len != user_msg->len)
@@ -921,6 +924,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len
}
release_dbc(qdev, dbc_id);
+ set_dbc_state(qdev, dbc_id, DBC_STATE_IDLE);
*msg_len += sizeof(*in_trans);
return 0;
@@ -1052,7 +1056,7 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
init_completion(&elem.xfer_done);
if (likely(!qdev->cntl_lost_buf)) {
/*
- * The max size of request to device is QAIC_MANAGE_EXT_MSG_LENGTH.
+ * The max size of request to device is QAIC_MANAGE_WIRE_MSG_LENGTH.
* The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH.
*/
out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL);
@@ -1079,7 +1083,6 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
list_for_each_entry(w, &wrappers->list, list) {
kref_get(&w->ref_count);
- retry_count = 0;
ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
if (ret) {
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index e86e71c1cdd8..60cb4d65d48e 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
@@ -165,16 +166,17 @@ static void free_slice(struct kref *kref)
drm_gem_object_put(&slice->bo->base);
sg_free_table(slice->sgt);
kfree(slice->sgt);
- kfree(slice->reqs);
+ kvfree(slice->reqs);
kfree(slice);
}
static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
struct sg_table *sgt_in, u64 size, u64 offset)
{
- int total_len, len, nents, offf = 0, offl = 0;
struct scatterlist *sg, *sgn, *sgf, *sgl;
+ unsigned int len, nents, offf, offl;
struct sg_table *sgt;
+ size_t total_len;
int ret, j;
/* find out number of relevant nents needed for this mem */
@@ -182,9 +184,11 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl
sgf = NULL;
sgl = NULL;
nents = 0;
+ offf = 0;
+ offl = 0;
size = size ? size : PAGE_SIZE;
- for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) {
+ for_each_sgtable_dma_sg(sgt_in, sg, j) {
len = sg_dma_len(sg);
if (!len)
@@ -221,7 +225,7 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl
/* copy relevant sg node and fix page and length */
sgn = sgf;
- for_each_sgtable_sg(sgt, sg, j) {
+ for_each_sgtable_dma_sg(sgt, sg, j) {
memcpy(sg, sgn, sizeof(*sg));
if (sgn == sgf) {
sg_dma_address(sg) += offf;
@@ -301,7 +305,7 @@ static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice,
* fence.
*/
dev_addr = req->dev_addr;
- for_each_sgtable_sg(slice->sgt, sg, i) {
+ for_each_sgtable_dma_sg(slice->sgt, sg, i) {
slice->reqs[i].cmd = cmd;
slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
sg_dma_address(sg) : dev_addr);
@@ -401,7 +405,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
goto free_sgt;
}
- slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
+ slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
if (!slice->reqs) {
ret = -ENOMEM;
goto free_slice;
@@ -427,7 +431,7 @@ static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
return 0;
free_req:
- kfree(slice->reqs);
+ kvfree(slice->reqs);
free_slice:
kfree(slice);
free_sgt:
@@ -554,6 +558,7 @@ static bool invalid_sem(struct qaic_sem *sem)
static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
u32 count, u64 total_size)
{
+ u64 total;
int i;
for (i = 0; i < count; i++) {
@@ -563,7 +568,8 @@ static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_
invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
return -EINVAL;
- if (slice_ent[i].offset + slice_ent[i].size > total_size)
+ if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) ||
+ total > total_size)
return -EINVAL;
}
@@ -604,7 +610,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc
struct scatterlist *sg;
int ret = 0;
- if (obj->import_attach)
+ if (drm_gem_is_imported(obj))
return -EINVAL;
for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
@@ -625,7 +631,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
{
struct qaic_bo *bo = to_qaic_bo(obj);
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
/* DMABUF/PRIME Path */
drm_prime_gem_destroy(obj, NULL);
} else {
@@ -638,8 +644,36 @@ static void qaic_free_object(struct drm_gem_object *obj)
kfree(bo);
}
+static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+ struct scatterlist *sg, *sg_in;
+ struct sg_table *sgt, *sgt_in;
+ int i;
+
+ sgt_in = bo->sgt;
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt)
+ return ERR_PTR(-ENOMEM);
+
+ if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg = sgt->sgl;
+ for_each_sgtable_sg(sgt_in, sg_in, i) {
+ memcpy(sg, sg_in, sizeof(*sg));
+ sg = sg_next(sg);
+ }
+
+ return sgt;
+}
+
static const struct drm_gem_object_funcs qaic_gem_funcs = {
.free = qaic_free_object,
+ .get_sg_table = qaic_get_sg_table,
.print_info = qaic_gem_print_info,
.mmap = qaic_gem_object_mmap,
.vm_ops = &drm_vm_ops,
@@ -726,7 +760,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (ret)
goto free_bo;
- bo->handle = args->handle;
drm_gem_object_put(obj);
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
@@ -865,7 +898,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
{
int ret;
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
ret = qaic_prepare_import_bo(bo, hdr);
else
ret = qaic_prepare_export_bo(qdev, bo, hdr);
@@ -889,7 +922,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b
static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
{
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
qaic_unprepare_import_bo(bo);
else
qaic_unprepare_export_bo(qdev, bo);
@@ -949,8 +982,9 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (args->hdr.count == 0)
return -EINVAL;
- arg_size = args->hdr.count * sizeof(*slice_ent);
- if (arg_size / args->hdr.count != sizeof(*slice_ent))
+ if (check_mul_overflow((unsigned long)args->hdr.count,
+ (unsigned long)sizeof(*slice_ent),
+ &arg_size))
return -EINVAL;
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
@@ -980,18 +1014,12 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
user_data = u64_to_user_ptr(args->data);
- slice_ent = kzalloc(arg_size, GFP_KERNEL);
- if (!slice_ent) {
- ret = -EINVAL;
+ slice_ent = memdup_user(user_data, arg_size);
+ if (IS_ERR(slice_ent)) {
+ ret = PTR_ERR(slice_ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(slice_ent, user_data, arg_size);
- if (ret) {
- ret = -EFAULT;
- goto free_slice_ent;
- }
-
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
@@ -1019,6 +1047,11 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
ret = qaic_prepare_bo(qdev, bo, &args->hdr);
if (ret)
goto unlock_ch_srcu;
@@ -1296,8 +1329,6 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
int usr_rcu_id, qdev_rcu_id;
struct qaic_device *qdev;
struct qaic_user *usr;
- u8 __user *user_data;
- unsigned long n;
u64 received_ts;
u32 queue_level;
u64 submit_ts;
@@ -1310,20 +1341,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
received_ts = ktime_get_ns();
size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
- n = (unsigned long)size * args->hdr.count;
- if (args->hdr.count == 0 || n / args->hdr.count != size)
+ if (args->hdr.count == 0)
return -EINVAL;
- user_data = u64_to_user_ptr(args->data);
-
- exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
- if (!exec)
- return -ENOMEM;
-
- if (copy_from_user(exec, user_data, n)) {
- ret = -EFAULT;
- goto free_exec;
- }
+ exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
+ if (IS_ERR(exec))
+ return PTR_ERR(exec);
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1352,13 +1375,22 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
goto release_ch_rcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto release_ch_rcu;
+ }
+
+ ret = mutex_lock_interruptible(&dbc->req_lock);
+ if (ret)
+ goto release_ch_rcu;
+
head = readl(dbc->dbc_base + REQHP_OFF);
tail = readl(dbc->dbc_base + REQTP_OFF);
if (head == U32_MAX || tail == U32_MAX) {
/* PCI link error */
ret = -ENODEV;
- goto release_ch_rcu;
+ goto unlock_req_lock;
}
queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
@@ -1366,11 +1398,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
head, &tail);
if (ret)
- goto release_ch_rcu;
+ goto unlock_req_lock;
/* Finalize commit to hardware */
submit_ts = ktime_get_ns();
writel(tail, dbc->dbc_base + REQTP_OFF);
+ mutex_unlock(&dbc->req_lock);
update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
submit_ts, queue_level);
@@ -1378,13 +1411,15 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
if (datapath_polling)
schedule_work(&dbc->poll_work);
+unlock_req_lock:
+ if (ret)
+ mutex_unlock(&dbc->req_lock);
release_ch_rcu:
srcu_read_unlock(&dbc->ch_lock, rcu_id);
unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
-free_exec:
kfree(exec);
return ret;
}
@@ -1479,7 +1514,7 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-void irq_polling_work(struct work_struct *work)
+void qaic_irq_polling_work(struct work_struct *work)
{
struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
unsigned long flags;
@@ -1697,6 +1732,11 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_ch_srcu;
}
+ if (dbc->id == qdev->ssr_dbc) {
+ ret = -EPIPE;
+ goto unlock_ch_srcu;
+ }
+
obj = drm_gem_object_lookup(file_priv, args->handle);
if (!obj) {
ret = -ENOENT;
@@ -1717,6 +1757,9 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
if (!dbc->usr)
ret = -EPERM;
+ if (dbc->id == qdev->ssr_dbc)
+ ret = -EPIPE;
+
put_obj:
drm_gem_object_put(obj);
unlock_ch_srcu:
@@ -1737,7 +1780,8 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
struct qaic_device *qdev;
struct qaic_user *usr;
struct qaic_bo *bo;
- int ret, i;
+ int ret = 0;
+ int i;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1758,18 +1802,12 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto unlock_dev_srcu;
}
- ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
- if (!ent) {
- ret = -EINVAL;
+ ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
goto unlock_dev_srcu;
}
- ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
- if (ret) {
- ret = -EFAULT;
- goto free_ent;
- }
-
for (i = 0; i < args->hdr.count; i++) {
obj = drm_gem_object_lookup(file_priv, ent[i].handle);
if (!obj) {
@@ -1777,6 +1815,16 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
goto free_ent;
}
bo = to_qaic_bo(obj);
+ if (!bo->sliced) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
+ if (bo->dbc->id != args->hdr.dbc_id) {
+ drm_gem_object_put(obj);
+ ret = -EINVAL;
+ goto free_ent;
+ }
/*
* perf stats ioctl is called before wait ioctl is complete then
* the latency information is invalid.
@@ -1915,6 +1963,17 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
}
+static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
+{
+ empty_xfer_list(qdev, dbc);
+ synchronize_srcu(&dbc->ch_lock);
+ /*
+ * Threads holding channel lock, may add more elements in the xfer_list.
+ * Flush out these elements from xfer_list.
+ */
+ empty_xfer_list(qdev, dbc);
+}
+
int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
{
if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
@@ -1929,7 +1988,7 @@ int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
* enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
* user. Add user context back to DBC to enable it. This function trusts the
* DBC ID passed and expects the DBC to be disabled.
- * @qdev: Qranium device handle
+ * @qdev: qaic device handle
* @dbc_id: ID of the DBC
* @usr: User context
*/
@@ -1943,13 +2002,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
dbc->usr = NULL;
- empty_xfer_list(qdev, dbc);
- synchronize_srcu(&dbc->ch_lock);
- /*
- * Threads holding channel lock, may add more elements in the xfer_list.
- * Flush out these elements from xfer_list.
- */
- empty_xfer_list(qdev, dbc);
+ sync_empty_xfer_list(qdev, dbc);
}
void release_dbc(struct qaic_device *qdev, u32 dbc_id)
@@ -1990,3 +2043,30 @@ void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
*head = readl(dbc->dbc_base + REQHP_OFF);
*tail = readl(dbc->dbc_base + REQTP_OFF);
}
+
+/*
+ * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ * @dbc_id: ID of the DBC which will enter SSR
+ *
+ * The device will automatically deactivate the workload as not
+ * all errors can be silently recovered. The user will be
+ * notified and will need to decide the required recovery
+ * action to take.
+ */
+void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id)
+{
+ qdev->ssr_dbc = dbc_id;
+ release_dbc(qdev, dbc_id);
+}
+
+/*
+ * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID.
+ * @qdev: qaic device handle
+ *
+ * The DBC returns to an operational state and begins accepting work after exiting SSR.
+ */
+void qaic_dbc_exit_ssr(struct qaic_device *qdev)
+{
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+}
diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c
index 20b653d99e52..8dc4fe5bb560 100644
--- a/drivers/accel/qaic/qaic_debugfs.c
+++ b/drivers/accel/qaic/qaic_debugfs.c
@@ -64,20 +64,9 @@ static int bootlog_show(struct seq_file *s, void *unused)
return 0;
}
-static int bootlog_fops_open(struct inode *inode, struct file *file)
-{
- return single_open(file, bootlog_show, inode->i_private);
-}
+DEFINE_SHOW_ATTRIBUTE(bootlog);
-static const struct file_operations bootlog_fops = {
- .owner = THIS_MODULE,
- .open = bootlog_fops_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int read_dbc_fifo_size(struct seq_file *s, void *unused)
+static int fifo_size_show(struct seq_file *s, void *unused)
{
struct dma_bridge_chan *dbc = s->private;
@@ -85,20 +74,9 @@ static int read_dbc_fifo_size(struct seq_file *s, void *unused)
return 0;
}
-static int fifo_size_open(struct inode *inode, struct file *file)
-{
- return single_open(file, read_dbc_fifo_size, inode->i_private);
-}
-
-static const struct file_operations fifo_size_fops = {
- .owner = THIS_MODULE,
- .open = fifo_size_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(fifo_size);
-static int read_dbc_queued(struct seq_file *s, void *unused)
+static int queued_show(struct seq_file *s, void *unused)
{
struct dma_bridge_chan *dbc = s->private;
u32 tail = 0, head = 0;
@@ -115,18 +93,7 @@ static int read_dbc_queued(struct seq_file *s, void *unused)
return 0;
}
-static int queued_open(struct inode *inode, struct file *file)
-{
- return single_open(file, read_dbc_queued, inode->i_private);
-}
-
-static const struct file_operations queued_fops = {
- .owner = THIS_MODULE,
- .open = queued_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(queued);
void qaic_debugfs_init(struct qaic_drm_device *qddev)
{
@@ -251,6 +218,9 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
if (ret)
goto destroy_workqueue;
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->bootlog_ch = mhi_dev;
+
for (i = 0; i < BOOTLOG_POOL_SIZE; i++) {
msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL);
if (!msg) {
@@ -266,14 +236,11 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
goto mhi_unprepare;
}
- dev_set_drvdata(&mhi_dev->dev, qdev);
- qdev->bootlog_ch = mhi_dev;
return 0;
mhi_unprepare:
mhi_unprepare_from_transfer(mhi_dev);
destroy_workqueue:
- flush_workqueue(qdev->bootlog_wq);
destroy_workqueue(qdev->bootlog_wq);
out:
return ret;
@@ -286,7 +253,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev)
qdev = dev_get_drvdata(&mhi_dev->dev);
mhi_unprepare_from_transfer(qdev->bootlog_ch);
- flush_workqueue(qdev->bootlog_wq);
destroy_workqueue(qdev->bootlog_wq);
qdev->bootlog_ch = NULL;
}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 580b29ed1902..4c70bd949d53 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -29,17 +29,53 @@
#include "mhi_controller.h"
#include "qaic.h"
#include "qaic_debugfs.h"
+#include "qaic_ras.h"
+#include "qaic_ssr.h"
#include "qaic_timesync.h"
#include "sahara.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
-#define PCI_DEV_AIC100 0xa100
+#define PCI_DEVICE_ID_QCOM_AIC080 0xa080
+#define PCI_DEVICE_ID_QCOM_AIC100 0xa100
+#define PCI_DEVICE_ID_QCOM_AIC200 0xa110
#define QAIC_NAME "qaic"
#define QAIC_DESC "Qualcomm Cloud AI Accelerators"
#define CNTL_MAJOR 5
#define CNTL_MINOR 0
+struct qaic_device_config {
+ /* Indicates the AIC family the device belongs to */
+ int family;
+ /* A bitmask representing the available BARs */
+ int bar_mask;
+ /* An index value used to identify the MHI controller BAR */
+ unsigned int mhi_bar_idx;
+ /* An index value used to identify the DBCs BAR */
+ unsigned int dbc_bar_idx;
+};
+
+static const struct qaic_device_config aic080_config = {
+ .family = FAMILY_AIC100,
+ .bar_mask = BIT(0) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 0,
+ .dbc_bar_idx = 2,
+};
+
+static const struct qaic_device_config aic100_config = {
+ .family = FAMILY_AIC100,
+ .bar_mask = BIT(0) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 0,
+ .dbc_bar_idx = 2,
+};
+
+static const struct qaic_device_config aic200_config = {
+ .family = FAMILY_AIC200,
+ .bar_mask = BIT(0) | BIT(1) | BIT(2) | BIT(4),
+ .mhi_bar_idx = 1,
+ .dbc_bar_idx = 2,
+};
+
bool datapath_polling;
module_param(datapath_polling, bool, 0400);
MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
@@ -53,12 +89,12 @@ static void qaicm_wq_release(struct drm_device *dev, void *res)
destroy_workqueue(wq);
}
-static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *fmt)
+static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *name)
{
struct workqueue_struct *wq;
int ret;
- wq = alloc_workqueue(fmt, WQ_UNBOUND, 0);
+ wq = alloc_workqueue("%s", WQ_UNBOUND, 0, name);
if (!wq)
return ERR_PTR(-ENOMEM);
ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq);
@@ -207,7 +243,6 @@ static const struct drm_driver qaic_accel_driver = {
.name = QAIC_NAME,
.desc = QAIC_DESC,
- .date = "20190618",
.fops = &qaic_accel_fops,
.open = qaic_open,
@@ -236,6 +271,13 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
return ret;
}
+ ret = qaic_sysfs_init(qddev);
+ if (ret) {
+ drm_dev_unregister(drm);
+ pci_dbg(qdev->pdev, "qaic_sysfs_init failed %d\n", ret);
+ return ret;
+ }
+
qaic_debugfs_init(qddev);
return ret;
@@ -247,6 +289,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
struct drm_device *drm = to_drm(qddev);
struct qaic_user *usr;
+ qaic_sysfs_remove(qddev);
drm_dev_unregister(drm);
qddev->partition_id = 0;
/*
@@ -348,11 +391,13 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
qaic_notify_reset(qdev);
/* start tearing things down */
+ qaic_clean_up_ssr(qdev);
for (i = 0; i < qdev->num_dbc; ++i)
release_dbc(qdev, i);
}
-static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
+static struct qaic_device *create_qdev(struct pci_dev *pdev,
+ const struct qaic_device_config *config)
{
struct device *dev = &pdev->dev;
struct qaic_drm_device *qddev;
@@ -365,12 +410,10 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
return NULL;
qdev->dev_state = QAIC_OFFLINE;
- if (id->device == PCI_DEV_AIC100) {
- qdev->num_dbc = 16;
- qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
- if (!qdev->dbc)
- return NULL;
- }
+ qdev->num_dbc = 16;
+ qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
+ if (!qdev->dbc)
+ return NULL;
qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
if (IS_ERR(qddev))
@@ -398,11 +441,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts");
if (IS_ERR(qdev->qts_wq))
return NULL;
+ qdev->ssr_wq = qaicm_wq_init(drm, "qaic_ssr");
+ if (IS_ERR(qdev->ssr_wq))
+ return NULL;
ret = qaicm_srcu_init(drm, &qdev->dev_lock);
if (ret)
return NULL;
+ ret = qaic_ssr_init(qdev, drm);
+ if (ret)
+ pci_info(pdev, "QAIC SSR crashdump collection not supported.\n");
+
qdev->qddev = qddev;
qdev->pdev = pdev;
qddev->qdev = qdev;
@@ -421,22 +471,26 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
return NULL;
init_waitqueue_head(&qdev->dbc[i].dbc_release);
INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
+ ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock);
+ if (ret)
+ return NULL;
}
return qdev;
}
-static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
+static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev,
+ const struct qaic_device_config *config)
{
int bars;
int ret;
- bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & 0x3f;
/* make sure the device has the expected BARs */
- if (bars != (BIT(0) | BIT(2) | BIT(4))) {
- pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n",
- __func__, bars);
+ if (bars != config->bar_mask) {
+ pci_dbg(pdev, "%s: expected BARs %#x not found in device. Found %#x\n",
+ __func__, config->bar_mask, bars);
return -EINVAL;
}
@@ -447,17 +501,15 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret)
return ret;
- ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX);
- if (ret)
- return ret;
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
- qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]);
- if (IS_ERR(qdev->bar_0))
- return PTR_ERR(qdev->bar_0);
+ qdev->bar_mhi = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->mhi_bar_idx]);
+ if (IS_ERR(qdev->bar_mhi))
+ return PTR_ERR(qdev->bar_mhi);
- qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]);
- if (IS_ERR(qdev->bar_2))
- return PTR_ERR(qdev->bar_2);
+ qdev->bar_dbc = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->dbc_bar_idx]);
+ if (IS_ERR(qdev->bar_dbc))
+ return PTR_ERR(qdev->bar_dbc);
/* Managed release since we use pcim_enable_device above */
pci_set_master(pdev);
@@ -467,14 +519,15 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
{
+ int irq_count = qdev->num_dbc + 1;
int mhi_irq;
int ret;
int i;
/* Managed release since we use pcim_enable_device */
- ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
+ ret = pci_alloc_irq_vectors(pdev, irq_count, irq_count, PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (ret == -ENOSPC) {
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (ret < 0)
return ret;
@@ -487,7 +540,8 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
* interrupted, it shouldn't race with itself.
*/
qdev->single_msi = true;
- pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
+ pci_info(pdev, "Allocating %d MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n",
+ irq_count);
} else if (ret < 0) {
return ret;
}
@@ -508,7 +562,7 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
if (!qdev->single_msi)
disable_irq_nosync(qdev->dbc[i].irq);
- INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
+ INIT_WORK(&qdev->dbc[i].poll_work, qaic_irq_polling_work);
}
}
@@ -517,21 +571,22 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct qaic_device_config *config = (struct qaic_device_config *)id->driver_data;
struct qaic_device *qdev;
int mhi_irq;
int ret;
int i;
- qdev = create_qdev(pdev, id);
+ qdev = create_qdev(pdev, config);
if (!qdev)
return -ENOMEM;
- ret = init_pci(qdev, pdev);
+ ret = init_pci(qdev, pdev, config);
if (ret)
return ret;
for (i = 0; i < qdev->num_dbc; ++i)
- qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
+ qdev->dbc[i].dbc_base = qdev->bar_dbc + QAIC_DBC_OFF(i);
mhi_irq = init_msi(qdev, pdev);
if (mhi_irq < 0)
@@ -541,8 +596,8 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
return ret;
- qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
- qdev->single_msi);
+ qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_mhi, mhi_irq,
+ qdev->single_msi, config->family);
if (IS_ERR(qdev->mhi_cntrl)) {
ret = PTR_ERR(qdev->mhi_cntrl);
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
@@ -609,7 +664,9 @@ static struct mhi_driver qaic_mhi_driver = {
};
static const struct pci_device_id qaic_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
+ { PCI_DEVICE_DATA(QCOM, AIC080, (kernel_ulong_t)&aic080_config), },
+ { PCI_DEVICE_DATA(QCOM, AIC100, (kernel_ulong_t)&aic100_config), },
+ { PCI_DEVICE_DATA(QCOM, AIC200, (kernel_ulong_t)&aic200_config), },
{ }
};
MODULE_DEVICE_TABLE(pci, qaic_ids);
@@ -620,6 +677,92 @@ static const struct pci_error_handlers qaic_pci_err_handler = {
.reset_done = qaic_pci_reset_done,
};
+static bool qaic_is_under_reset(struct qaic_device *qdev)
+{
+ int rcu_id;
+ bool ret;
+
+ rcu_id = srcu_read_lock(&qdev->dev_lock);
+ ret = qdev->dev_state != QAIC_ONLINE;
+ srcu_read_unlock(&qdev->dev_lock, rcu_id);
+ return ret;
+}
+
+static bool qaic_data_path_busy(struct qaic_device *qdev)
+{
+ bool ret = false;
+ int dev_rcu_id;
+ int i;
+
+ dev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->dev_state != QAIC_ONLINE) {
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return false;
+ }
+ for (i = 0; i < qdev->num_dbc; i++) {
+ struct dma_bridge_chan *dbc = &qdev->dbc[i];
+ unsigned long flags;
+ int ch_rcu_id;
+
+ ch_rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (!dbc->usr || !dbc->in_use) {
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ continue;
+ }
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ ret = !list_empty(&dbc->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ srcu_read_unlock(&dbc->ch_lock, ch_rcu_id);
+ if (ret)
+ break;
+ }
+ srcu_read_unlock(&qdev->dev_lock, dev_rcu_id);
+ return ret;
+}
+
+static int qaic_pm_suspend(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ dev_dbg(dev, "Suspending..\n");
+ if (qaic_data_path_busy(qdev)) {
+ dev_dbg(dev, "Device's datapath is busy. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ if (qaic_is_under_reset(qdev)) {
+ dev_dbg(dev, "Device is under reset. Aborting suspend..\n");
+ return -EBUSY;
+ }
+ qaic_mqts_ch_stop_timer(qdev->mqts_ch);
+ qaic_pci_reset_prepare(qdev->pdev);
+ pci_save_state(qdev->pdev);
+ pci_disable_device(qdev->pdev);
+ pci_set_power_state(qdev->pdev, PCI_D3hot);
+ return 0;
+}
+
+static int qaic_pm_resume(struct device *dev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+ int ret;
+
+ dev_dbg(dev, "Resuming..\n");
+ pci_set_power_state(qdev->pdev, PCI_D0);
+ pci_restore_state(qdev->pdev);
+ ret = pci_enable_device(qdev->pdev);
+ if (ret) {
+ dev_err(dev, "pci_enable_device failed on resume %d\n", ret);
+ return ret;
+ }
+ pci_set_master(qdev->pdev);
+ qaic_pci_reset_done(qdev->pdev);
+ return 0;
+}
+
+static const struct dev_pm_ops qaic_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(qaic_pm_suspend, qaic_pm_resume)
+};
+
static struct pci_driver qaic_pci_driver = {
.name = QAIC_NAME,
.id_table = qaic_ids,
@@ -627,6 +770,9 @@ static struct pci_driver qaic_pci_driver = {
.remove = qaic_pci_remove,
.shutdown = qaic_pci_shutdown,
.err_handler = &qaic_pci_err_handler,
+ .driver = {
+ .pm = pm_sleep_ptr(&qaic_pm_ops),
+ },
};
static int __init qaic_init(void)
@@ -659,8 +805,19 @@ static int __init qaic_init(void)
if (ret)
pr_debug("qaic: qaic_bootlog_register failed %d\n", ret);
+ ret = qaic_ras_register();
+ if (ret)
+ pr_debug("qaic: qaic_ras_register failed %d\n", ret);
+ ret = qaic_ssr_register();
+ if (ret) {
+ pr_debug("qaic: qaic_ssr_register failed %d\n", ret);
+ goto free_bootlog;
+ }
+
return 0;
+free_bootlog:
+ qaic_bootlog_unregister();
free_mhi:
mhi_driver_unregister(&qaic_mhi_driver);
free_pci:
@@ -686,6 +843,8 @@ static void __exit qaic_exit(void)
* reinitializing the link_up state after the cleanup is done.
*/
link_up = true;
+ qaic_ssr_unregister();
+ qaic_ras_unregister();
qaic_bootlog_unregister();
qaic_timesync_deinit();
sahara_unregister();
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
new file mode 100644
index 000000000000..f1d52a710136
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
+
+#include <asm/byteorder.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+
+#include "qaic.h"
+#include "qaic_ras.h"
+
+#define MAGIC 0x55AA
+#define VERSION 0x2
+#define HDR_SZ 12
+#define NUM_TEMP_LVL 3
+#define POWER_BREAK BIT(0)
+
+enum msg_type {
+ MSG_PUSH, /* async push from device */
+ MSG_REQ, /* sync request to device */
+ MSG_RESP, /* sync response from device */
+};
+
+enum err_type {
+ CE, /* correctable error */
+ UE, /* uncorrectable error */
+ UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */
+ ERR_TYPE_MAX,
+};
+
+static const char * const err_type_str[] = {
+ [CE] = "Correctable",
+ [UE] = "Uncorrectable",
+ [UE_NF] = "Uncorrectable Non-Fatal",
+};
+
+static const char * const err_class_str[] = {
+ [CE] = "Warning",
+ [UE] = "Fatal",
+ [UE_NF] = "Warning",
+};
+
+enum err_source {
+ SOC_MEM,
+ PCIE,
+ DDR,
+ SYS_BUS1,
+ SYS_BUS2,
+ NSP_MEM,
+ TSENS,
+};
+
+static const char * const err_src_str[TSENS + 1] = {
+ [SOC_MEM] = "SoC Memory",
+ [PCIE] = "PCIE",
+ [DDR] = "DDR",
+ [SYS_BUS1] = "System Bus source 1",
+ [SYS_BUS2] = "System Bus source 2",
+ [NSP_MEM] = "NSP Memory",
+ [TSENS] = "Temperature Sensors",
+};
+
+struct ras_data {
+ /* header start */
+ /* Magic number to validate the message */
+ u16 magic;
+ /* RAS version number */
+ u16 ver;
+ u32 seq_num;
+ /* RAS message type */
+ u8 type;
+ u8 id;
+ /* Size of RAS message without the header in byte */
+ u16 len;
+ /* header end */
+ s32 result;
+ /*
+ * Error source
+ * 0 : SoC Memory
+ * 1 : PCIE
+ * 2 : DDR
+ * 3 : System Bus source 1
+ * 4 : System Bus source 2
+ * 5 : NSP Memory
+ * 6 : Temperature Sensors
+ */
+ u32 source;
+ /*
+ * Stores the error type, there are three types of error in RAS
+ * 0 : correctable error (CE)
+ * 1 : uncorrectable error (UE)
+ * 2 : uncorrectable error that is non-fatal (UE_NF)
+ */
+ u32 err_type;
+ u32 err_threshold;
+ u32 ce_count;
+ u32 ue_count;
+ u32 intr_num;
+ /* Data specific to error source */
+ u8 syndrome[64];
+} __packed;
+
+struct soc_mem_syndrome {
+ u64 error_address[8];
+} __packed;
+
+struct nsp_mem_syndrome {
+ u32 error_address[8];
+ u8 nsp_id;
+} __packed;
+
+struct ddr_syndrome {
+ u32 count;
+ u32 irq_status;
+ u32 data_31_0[2];
+ u32 data_63_32[2];
+ u32 data_95_64[2];
+ u32 data_127_96[2];
+ u32 addr_lsb;
+ u16 addr_msb;
+ u16 parity_bits;
+ u16 instance;
+ u16 err_type;
+} __packed;
+
+struct tsens_syndrome {
+ u32 threshold_type;
+ s32 temp;
+} __packed;
+
+struct sysbus1_syndrome {
+ u32 slave;
+ u32 err_type;
+ u16 addr[8];
+ u8 instance;
+} __packed;
+
+struct sysbus2_syndrome {
+ u32 lsb3;
+ u32 msb3;
+ u32 lsb2;
+ u32 msb2;
+ u32 ext_id;
+ u16 path;
+ u16 op_type;
+ u16 len;
+ u16 redirect;
+ u8 valid;
+ u8 word_error;
+ u8 non_secure;
+ u8 opc;
+ u8 error_code;
+ u8 trans_type;
+ u8 addr_space;
+ u8 instance;
+} __packed;
+
+struct pcie_syndrome {
+ /* CE info */
+ u32 bad_tlp;
+ u32 bad_dllp;
+ u32 replay_rollover;
+ u32 replay_timeout;
+ u32 rx_err;
+ u32 internal_ce_count;
+ /* UE_NF info */
+ u32 fc_timeout;
+ u32 poison_tlp;
+ u32 ecrc_err;
+ u32 unsupported_req;
+ u32 completer_abort;
+ u32 completion_timeout;
+ /* UE info */
+ u32 addr;
+ u8 index;
+ /*
+ * Flag to indicate specific event of PCIe
+ * BIT(0): Power break (low power)
+ * BIT(1) to BIT(7): Reserved
+ */
+ u8 flag;
+} __packed;
+
+static const char * const threshold_type_str[NUM_TEMP_LVL] = {
+ [0] = "lower",
+ [1] = "upper",
+ [2] = "critical",
+};
+
+static void ras_msg_to_cpu(struct ras_data *msg)
+{
+ struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+ struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+ struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+ struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+ struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+ struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+ struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+ int i;
+
+ le16_to_cpus(&msg->magic);
+ le16_to_cpus(&msg->ver);
+ le32_to_cpus(&msg->seq_num);
+ le16_to_cpus(&msg->len);
+ le32_to_cpus(&msg->result);
+ le32_to_cpus(&msg->source);
+ le32_to_cpus(&msg->err_type);
+ le32_to_cpus(&msg->err_threshold);
+ le32_to_cpus(&msg->ce_count);
+ le32_to_cpus(&msg->ue_count);
+ le32_to_cpus(&msg->intr_num);
+
+ switch (msg->source) {
+ case SOC_MEM:
+ for (i = 0; i < 8; i++)
+ le64_to_cpus(&soc_syndrome->error_address[i]);
+ break;
+ case PCIE:
+ le32_to_cpus(&pcie_syndrome->bad_tlp);
+ le32_to_cpus(&pcie_syndrome->bad_dllp);
+ le32_to_cpus(&pcie_syndrome->replay_rollover);
+ le32_to_cpus(&pcie_syndrome->replay_timeout);
+ le32_to_cpus(&pcie_syndrome->rx_err);
+ le32_to_cpus(&pcie_syndrome->internal_ce_count);
+ le32_to_cpus(&pcie_syndrome->fc_timeout);
+ le32_to_cpus(&pcie_syndrome->poison_tlp);
+ le32_to_cpus(&pcie_syndrome->ecrc_err);
+ le32_to_cpus(&pcie_syndrome->unsupported_req);
+ le32_to_cpus(&pcie_syndrome->completer_abort);
+ le32_to_cpus(&pcie_syndrome->completion_timeout);
+ le32_to_cpus(&pcie_syndrome->addr);
+ break;
+ case DDR:
+ le16_to_cpus(&ddr_syndrome->instance);
+ le16_to_cpus(&ddr_syndrome->err_type);
+ le32_to_cpus(&ddr_syndrome->count);
+ le32_to_cpus(&ddr_syndrome->irq_status);
+ le32_to_cpus(&ddr_syndrome->data_31_0[0]);
+ le32_to_cpus(&ddr_syndrome->data_31_0[1]);
+ le32_to_cpus(&ddr_syndrome->data_63_32[0]);
+ le32_to_cpus(&ddr_syndrome->data_63_32[1]);
+ le32_to_cpus(&ddr_syndrome->data_95_64[0]);
+ le32_to_cpus(&ddr_syndrome->data_95_64[1]);
+ le32_to_cpus(&ddr_syndrome->data_127_96[0]);
+ le32_to_cpus(&ddr_syndrome->data_127_96[1]);
+ le16_to_cpus(&ddr_syndrome->parity_bits);
+ le16_to_cpus(&ddr_syndrome->addr_msb);
+ le32_to_cpus(&ddr_syndrome->addr_lsb);
+ break;
+ case SYS_BUS1:
+ le32_to_cpus(&sysbus1_syndrome->slave);
+ le32_to_cpus(&sysbus1_syndrome->err_type);
+ for (i = 0; i < 8; i++)
+ le16_to_cpus(&sysbus1_syndrome->addr[i]);
+ break;
+ case SYS_BUS2:
+ le16_to_cpus(&sysbus2_syndrome->op_type);
+ le16_to_cpus(&sysbus2_syndrome->len);
+ le16_to_cpus(&sysbus2_syndrome->redirect);
+ le16_to_cpus(&sysbus2_syndrome->path);
+ le32_to_cpus(&sysbus2_syndrome->ext_id);
+ le32_to_cpus(&sysbus2_syndrome->lsb2);
+ le32_to_cpus(&sysbus2_syndrome->msb2);
+ le32_to_cpus(&sysbus2_syndrome->lsb3);
+ le32_to_cpus(&sysbus2_syndrome->msb3);
+ break;
+ case NSP_MEM:
+ for (i = 0; i < 8; i++)
+ le32_to_cpus(&nsp_syndrome->error_address[i]);
+ break;
+ case TSENS:
+ le32_to_cpus(&tsens_syndrome->threshold_type);
+ le32_to_cpus(&tsens_syndrome->temp);
+ break;
+ }
+}
+
+static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg)
+{
+ struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+ struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+ struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+ struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+ struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+ struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+ struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+ char *class;
+ char *level;
+
+ if (msg->magic != MAGIC) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic);
+ return;
+ }
+
+ if (!msg->ver || msg->ver > VERSION) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver);
+ return;
+ }
+
+ if (msg->type != MSG_PUSH) {
+ pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n");
+ return;
+ }
+
+ if (msg->len != sizeof(*msg) - HDR_SZ) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len);
+ return;
+ }
+
+ if (msg->err_type >= ERR_TYPE_MAX) {
+ pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type);
+ return;
+ }
+
+ if (msg->err_type == UE)
+ level = KERN_ERR;
+ else
+ level = KERN_WARNING;
+
+ switch (msg->source) {
+ case SOC_MEM:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ soc_syndrome->error_address[0],
+ soc_syndrome->error_address[1],
+ soc_syndrome->error_address[2],
+ soc_syndrome->error_address[3],
+ soc_syndrome->error_address[4],
+ soc_syndrome->error_address[5],
+ soc_syndrome->error_address[6],
+ soc_syndrome->error_address[7]);
+ break;
+ case PCIE:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold);
+
+ switch (msg->err_type) {
+ case CE:
+ /*
+ * Modeled after AER prints. This continues the dev_printk() from a few
+ * lines up. We reduce duplication of code, but also avoid re-printing the
+ * PCI device info so that the end result looks uniform to the log user.
+ */
+ printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"),
+ pcie_syndrome->bad_tlp,
+ pcie_syndrome->bad_dllp,
+ pcie_syndrome->replay_rollover,
+ pcie_syndrome->replay_timeout,
+ pcie_syndrome->rx_err,
+ pcie_syndrome->internal_ce_count);
+ if (msg->ver > 0x1)
+ pr_warn(" Power break %s\n",
+ pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF");
+ break;
+ case UE:
+ printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"),
+ pcie_syndrome->index, pcie_syndrome->addr);
+ break;
+ case UE_NF:
+ printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"),
+ pcie_syndrome->fc_timeout,
+ pcie_syndrome->poison_tlp,
+ pcie_syndrome->ecrc_err,
+ pcie_syndrome->unsupported_req,
+ pcie_syndrome->completer_abort,
+ pcie_syndrome->completion_timeout);
+ break;
+ default:
+ break;
+ }
+ break;
+ case DDR:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ ddr_syndrome->instance,
+ ddr_syndrome->count,
+ ddr_syndrome->data_31_0[1],
+ ddr_syndrome->data_31_0[0],
+ ddr_syndrome->data_63_32[1],
+ ddr_syndrome->data_63_32[0],
+ ddr_syndrome->data_95_64[1],
+ ddr_syndrome->data_95_64[0],
+ ddr_syndrome->data_127_96[1],
+ ddr_syndrome->data_127_96[0],
+ ddr_syndrome->parity_bits,
+ ddr_syndrome->addr_msb,
+ ddr_syndrome->addr_lsb);
+ break;
+ case SYS_BUS1:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ sysbus1_syndrome->instance,
+ sysbus1_syndrome->slave ? "Slave" : "Master",
+ sysbus1_syndrome->err_type,
+ sysbus1_syndrome->addr[0],
+ sysbus1_syndrome->addr[1],
+ sysbus1_syndrome->addr[2],
+ sysbus1_syndrome->addr[3],
+ sysbus1_syndrome->addr[4],
+ sysbus1_syndrome->addr[5],
+ sysbus1_syndrome->addr[6],
+ sysbus1_syndrome->addr[7]);
+ break;
+ case SYS_BUS2:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ sysbus2_syndrome->instance,
+ sysbus2_syndrome->valid,
+ sysbus2_syndrome->word_error,
+ sysbus2_syndrome->non_secure,
+ sysbus2_syndrome->opc,
+ sysbus2_syndrome->error_code,
+ sysbus2_syndrome->trans_type,
+ sysbus2_syndrome->addr_space,
+ sysbus2_syndrome->op_type,
+ sysbus2_syndrome->len,
+ sysbus2_syndrome->redirect,
+ sysbus2_syndrome->path,
+ sysbus2_syndrome->ext_id,
+ sysbus2_syndrome->lsb2,
+ sysbus2_syndrome->msb2,
+ sysbus2_syndrome->lsb3,
+ sysbus2_syndrome->msb3);
+ break;
+ case NSP_MEM:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ nsp_syndrome->nsp_id,
+ nsp_syndrome->error_address[0],
+ nsp_syndrome->error_address[1],
+ nsp_syndrome->error_address[2],
+ nsp_syndrome->error_address[3],
+ nsp_syndrome->error_address[4],
+ nsp_syndrome->error_address[5],
+ nsp_syndrome->error_address[6],
+ nsp_syndrome->error_address[7]);
+ break;
+ case TSENS:
+ if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n",
+ tsens_syndrome->threshold_type);
+ break;
+ }
+
+ if (msg->err_type)
+ class = "Fatal";
+ else if (tsens_syndrome->threshold_type)
+ class = "Critical";
+ else
+ class = "Warning";
+
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n",
+ class,
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ threshold_type_str[tsens_syndrome->threshold_type],
+ tsens_syndrome->temp);
+ break;
+ }
+
+ /* Uncorrectable errors are fatal */
+ if (msg->err_type == UE)
+ mhi_soc_reset(qdev->mhi_cntrl);
+
+ switch (msg->err_type) {
+ case CE:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ce_count++;
+ break;
+ case UE:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ue_count++;
+ break;
+ case UE_NF:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ue_nf_count++;
+ break;
+ default:
+ /* not possible */
+ break;
+ }
+}
+
+static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return sysfs_emit(buf, "%d\n", qdev->ce_count);
+}
+
+static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return sysfs_emit(buf, "%d\n", qdev->ue_count);
+}
+
+static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return sysfs_emit(buf, "%d\n", qdev->ue_nf_count);
+}
+
+static DEVICE_ATTR_RO(ce_count);
+static DEVICE_ATTR_RO(ue_count);
+static DEVICE_ATTR_RO(ue_nonfatal_count);
+
+static struct attribute *ras_attrs[] = {
+ &dev_attr_ce_count.attr,
+ &dev_attr_ue_count.attr,
+ &dev_attr_ue_nonfatal_count.attr,
+ NULL,
+};
+
+static struct attribute_group ras_group = {
+ .attrs = ras_attrs,
+};
+
+static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+ struct ras_data *resp;
+ int ret;
+
+ ret = mhi_prepare_for_transfer(mhi_dev);
+ if (ret)
+ return ret;
+
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ return -ENOMEM;
+ }
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT);
+ if (ret) {
+ kfree(resp);
+ mhi_unprepare_from_transfer(mhi_dev);
+ return ret;
+ }
+
+ ret = device_add_group(&qdev->pdev->dev, &ras_group);
+ if (ret) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret);
+ return ret;
+ }
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->ras_ch = mhi_dev;
+
+ return ret;
+}
+
+static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev)
+{
+ struct qaic_device *qdev;
+
+ qdev = dev_get_drvdata(&mhi_dev->dev);
+ qdev->ras_ch = NULL;
+ device_remove_group(&qdev->pdev->dev, &ras_group);
+ mhi_unprepare_from_transfer(mhi_dev);
+}
+
+static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {}
+
+static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ras_data *msg = mhi_result->buf_addr;
+ int ret;
+
+ if (mhi_result->transaction_status) {
+ kfree(msg);
+ return;
+ }
+
+ ras_msg_to_cpu(msg);
+ decode_ras_msg(qdev, msg);
+
+ ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT);
+ if (ret) {
+ dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret);
+ kfree(msg);
+ }
+}
+
+static const struct mhi_device_id qaic_ras_mhi_match_table[] = {
+ { .chan = "QAIC_STATUS", },
+ {},
+};
+
+static struct mhi_driver qaic_ras_mhi_driver = {
+ .id_table = qaic_ras_mhi_match_table,
+ .remove = qaic_ras_mhi_remove,
+ .probe = qaic_ras_mhi_probe,
+ .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_ras",
+ },
+};
+
+int qaic_ras_register(void)
+{
+ return mhi_driver_register(&qaic_ras_mhi_driver);
+}
+
+void qaic_ras_unregister(void)
+{
+ mhi_driver_unregister(&qaic_ras_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ras.h b/drivers/accel/qaic/qaic_ras.h
new file mode 100644
index 000000000000..d44a4eeeb060
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */
+
+#ifndef __QAIC_RAS_H__
+#define __QAIC_RAS_H__
+
+int qaic_ras_register(void);
+void qaic_ras_unregister(void);
+
+#endif /* __QAIC_RAS_H__ */
diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c
new file mode 100644
index 000000000000..9b662d690371
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <asm/byteorder.h>
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/devcoredump.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+#include <linux/workqueue.h>
+
+#include "qaic.h"
+#include "qaic_ssr.h"
+
+#define SSR_RESP_MSG_SZ 32
+#define SSR_MHI_BUF_SIZE SZ_64K
+#define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump))
+#define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp))
+
+#define DEBUG_TRANSFER_INFO BIT(0)
+#define DEBUG_TRANSFER_INFO_RSP BIT(1)
+#define MEMORY_READ BIT(2)
+#define MEMORY_READ_RSP BIT(3)
+#define DEBUG_TRANSFER_DONE BIT(4)
+#define DEBUG_TRANSFER_DONE_RSP BIT(5)
+#define SSR_EVENT BIT(8)
+#define SSR_EVENT_RSP BIT(9)
+
+#define SSR_EVENT_NACK BIT(0)
+#define BEFORE_SHUTDOWN BIT(1)
+#define AFTER_SHUTDOWN BIT(2)
+#define BEFORE_POWER_UP BIT(3)
+#define AFTER_POWER_UP BIT(4)
+
+struct debug_info_table {
+ /* Save preferences. Default is mandatory */
+ u64 save_perf;
+ /* Base address of the debug region */
+ u64 mem_base;
+ /* Size of debug region in bytes */
+ u64 len;
+ /* Description */
+ char desc[20];
+ /* Filename of debug region */
+ char filename[20];
+};
+
+struct _ssr_hdr {
+ __le32 cmd;
+ __le32 len;
+ __le32 dbc_id;
+};
+
+struct ssr_hdr {
+ u32 cmd;
+ u32 len;
+ u32 dbc_id;
+};
+
+struct ssr_debug_transfer_info {
+ struct ssr_hdr hdr;
+ u32 resv;
+ u64 tbl_addr;
+ u64 tbl_len;
+} __packed;
+
+struct ssr_debug_transfer_info_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_memory_read {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ __le64 addr;
+ __le64 len;
+} __packed;
+
+struct ssr_memory_read_rsp {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+ u8 data[];
+} __packed;
+
+struct ssr_debug_transfer_done {
+ struct _ssr_hdr hdr;
+ __le32 resv;
+} __packed;
+
+struct ssr_debug_transfer_done_rsp {
+ struct _ssr_hdr hdr;
+ __le32 ret;
+} __packed;
+
+struct ssr_event {
+ struct ssr_hdr hdr;
+ u32 event;
+} __packed;
+
+struct ssr_event_rsp {
+ struct _ssr_hdr hdr;
+ __le32 event;
+} __packed;
+
+struct ssr_resp {
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[] __aligned(8);
+};
+
+/* SSR crashdump book keeping structure */
+struct ssr_dump_info {
+ /* DBC associated with this SSR crashdump */
+ struct dma_bridge_chan *dbc;
+ /*
+ * It will be used when we complete the crashdump download and switch
+ * to waiting on SSR events
+ */
+ struct ssr_resp *resp;
+ /* MEMORY READ request MHI buffer.*/
+ struct ssr_memory_read *read_buf_req;
+ /* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */
+ bool read_buf_req_queued;
+ /* Address of table in host */
+ void *tbl_addr;
+ /* Total size of table */
+ u64 tbl_len;
+ /* Offset of table(->tbl_addr) where the new chunk will be dumped */
+ u64 tbl_off;
+ /* Address of table in device/target */
+ u64 tbl_addr_dev;
+ /* Ptr to the entire dump */
+ void *dump_addr;
+ /* Entire crashdump size */
+ u64 dump_sz;
+ /* Offset of crashdump(->dump_addr) where the new chunk will be dumped */
+ u64 dump_off;
+ /* Points to the table entry we are currently downloading */
+ struct debug_info_table *tbl_ent;
+ /* Offset in the current table entry(->tbl_ent) for next chuck */
+ u64 tbl_ent_off;
+};
+
+struct ssr_crashdump {
+ /*
+ * Points to a book keeping struct maintained by MHI SSR device while
+ * downloading a SSR crashdump. It is NULL when crashdump downloading
+ * not in progress.
+ */
+ struct ssr_dump_info *dump_info;
+ /* Work struct to schedule work coming on QAIC_SSR channel */
+ struct work_struct work;
+ /* Root struct of device, used to access device resources */
+ struct qaic_device *qdev;
+ /* Buffer used by MHI for transfer requests */
+ u8 data[];
+};
+
+#define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef
+#define QAIC_SSR_DUMP_V1_VER 1
+struct dump_file_meta {
+ u64 magic;
+ u64 version;
+ u64 size; /* Total size of the entire dump */
+ u64 tbl_len; /* Length of the table in byte */
+};
+
+/*
+ * Layout of crashdump
+ * +------------------------------------------+
+ * | Crashdump Meta structure |
+ * | type: struct dump_file_meta |
+ * +------------------------------------------+
+ * | Crashdump Table |
+ * | type: array of struct debug_info_table |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ * | Crashdump |
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ */
+
+static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash)
+{
+ struct ssr_dump_info *dump_info = ssr_crash->dump_info;
+
+ ssr_crash->dump_info = NULL;
+ if (!dump_info)
+ return;
+ if (!dump_info->read_buf_req_queued)
+ kfree(dump_info->read_buf_req);
+ vfree(dump_info->tbl_addr);
+ vfree(dump_info->dump_addr);
+ kfree(dump_info);
+}
+
+void qaic_clean_up_ssr(struct qaic_device *qdev)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+
+ if (!ssr_crash)
+ return;
+
+ qaic_dbc_exit_ssr(qdev);
+ free_ssr_dump_info(ssr_crash);
+}
+
+static int alloc_dump(struct ssr_dump_info *dump_info)
+{
+ struct debug_info_table *tbl_ent = dump_info->tbl_addr;
+ struct dump_file_meta *dump_meta;
+ u64 tbl_sz_lp = 0;
+ u64 dump_size = 0;
+
+ while (tbl_sz_lp < dump_info->tbl_len) {
+ le64_to_cpus(&tbl_ent->save_perf);
+ le64_to_cpus(&tbl_ent->mem_base);
+ le64_to_cpus(&tbl_ent->len);
+
+ if (tbl_ent->len == 0)
+ return -EINVAL;
+
+ dump_size += tbl_ent->len;
+ tbl_ent++;
+ tbl_sz_lp += sizeof(*tbl_ent);
+ }
+
+ dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta);
+ dump_info->dump_addr = vzalloc(dump_info->dump_sz);
+ if (!dump_info->dump_addr)
+ return -ENOMEM;
+
+ /* Copy crashdump meta and table */
+ dump_meta = dump_info->dump_addr;
+ dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC;
+ dump_meta->version = QAIC_SSR_DUMP_V1_VER;
+ dump_meta->size = dump_info->dump_sz;
+ dump_meta->tbl_len = dump_info->tbl_len;
+ memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len);
+ /* Offset by crashdump meta and table (copied above) */
+ dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta);
+
+ return 0;
+}
+
+static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id)
+{
+ struct ssr_debug_transfer_done *xfer_done;
+ int ret;
+
+ xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL);
+ if (!xfer_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE);
+ xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done));
+ xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT);
+ if (ret)
+ goto free_xfer_done;
+
+ return 0;
+
+free_xfer_done:
+ kfree(xfer_done);
+out:
+ return ret;
+}
+
+static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct ssr_memory_read *read_buf_req;
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ dump_info = ssr_crash->dump_info;
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE,
+ MHI_EOT);
+ if (ret)
+ goto out;
+
+ read_buf_req = dump_info->read_buf_req;
+ read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ);
+ read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req));
+ read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc);
+ read_buf_req->addr = cpu_to_le64(dest_addr);
+ read_buf_req->len = cpu_to_le64(dest_len);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req),
+ MHI_EOT);
+ if (!ret)
+ dump_info->read_buf_req_queued = true;
+
+out:
+ return ret;
+}
+
+static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ if (len > dump_info->tbl_len - dump_info->tbl_off)
+ return -EINVAL;
+
+ memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len);
+ dump_info->tbl_off += len;
+
+ /* Entire table has been downloaded, alloc dump memory */
+ if (dump_info->tbl_off == dump_info->tbl_len) {
+ dump_info->tbl_ent = dump_info->tbl_addr;
+ return alloc_dump(dump_info);
+ }
+
+ return 0;
+}
+
+static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len)
+{
+ struct debug_info_table *tbl_ent;
+
+ tbl_ent = dump_info->tbl_ent;
+
+ if (len > tbl_ent->len - dump_info->tbl_ent_off)
+ return -EINVAL;
+
+ memcpy(dump_info->dump_addr + dump_info->dump_off, data, len);
+ dump_info->dump_off += len;
+ dump_info->tbl_ent_off += len;
+
+ /*
+ * Current segment (a entry in table) of the crashdump is complete,
+ * move to next one
+ */
+ if (tbl_ent->len == dump_info->tbl_ent_off) {
+ dump_info->tbl_ent++;
+ dump_info->tbl_ent_off = 0;
+ }
+
+ return 0;
+}
+
+static void ssr_dump_worker(struct work_struct *work)
+{
+ struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work);
+ struct qaic_device *qdev = ssr_crash->qdev;
+ struct ssr_memory_read_rsp *mem_rd_resp;
+ struct debug_info_table *tbl_ent;
+ struct ssr_dump_info *dump_info;
+ u64 dest_addr, dest_len;
+ struct _ssr_hdr *_hdr;
+ struct ssr_hdr hdr;
+ u64 data_len;
+ int ret;
+
+ mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data;
+ _hdr = &mem_rd_resp->hdr;
+ hdr.cmd = le32_to_cpu(_hdr->cmd);
+ hdr.len = le32_to_cpu(_hdr->len);
+ hdr.dbc_id = le32_to_cpu(_hdr->dbc_id);
+
+ if (hdr.dbc_id != qdev->ssr_dbc)
+ goto reset_device;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ goto reset_device;
+
+ if (hdr.cmd != MEMORY_READ_RSP)
+ goto free_dump_info;
+
+ if (hdr.len > SSR_MEM_READ_DATA_SIZE)
+ goto free_dump_info;
+
+ data_len = hdr.len - sizeof(*mem_rd_resp);
+
+ if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */
+ ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len);
+ else /* Chunk belongs to crashdump */
+ ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len);
+
+ if (ret)
+ goto free_dump_info;
+
+ if (dump_info->tbl_off < dump_info->tbl_len) {
+ /* Continue downloading table */
+ dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else if (dump_info->dump_off < dump_info->dump_sz) {
+ /* Continue downloading crashdump */
+ tbl_ent = dump_info->tbl_ent;
+ dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off;
+ dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off);
+ ret = mem_read_req(qdev, dest_addr, dest_len);
+ } else {
+ /* Crashdump download complete */
+ ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id);
+ }
+
+ /* Most likely a MHI xfer has failed */
+ if (ret)
+ goto free_dump_info;
+
+ return;
+
+free_dump_info:
+ /* Free the allocated memory */
+ free_ssr_dump_info(ssr_crash);
+reset_device:
+ /*
+ * After subsystem crashes in device crashdump collection begins but
+ * something went wrong while collecting crashdump, now instead of
+ * handling this error we just reset the device as the best effort has
+ * been made
+ */
+ mhi_soc_reset(qdev->mhi_cntrl);
+}
+
+static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_dump_info *dump_info;
+ int ret;
+
+ le64_to_cpus(&debug_info->tbl_len);
+ le64_to_cpus(&debug_info->tbl_addr);
+
+ if (debug_info->tbl_len == 0 ||
+ debug_info->tbl_len % sizeof(struct debug_info_table) != 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Allocate SSR crashdump book keeping structure */
+ dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL);
+ if (!dump_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Buffer used to send MEMORY READ request to device via MHI */
+ dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL);
+ if (!dump_info->read_buf_req) {
+ ret = -ENOMEM;
+ goto free_dump_info;
+ }
+
+ /* Crashdump meta table buffer */
+ dump_info->tbl_addr = vzalloc(debug_info->tbl_len);
+ if (!dump_info->tbl_addr) {
+ ret = -ENOMEM;
+ goto free_read_buf_req;
+ }
+
+ dump_info->tbl_addr_dev = debug_info->tbl_addr;
+ dump_info->tbl_len = debug_info->tbl_len;
+
+ return dump_info;
+
+free_read_buf_req:
+ kfree(dump_info->read_buf_req);
+free_dump_info:
+ kfree(dump_info);
+out:
+ return ERR_PTR(ret);
+}
+
+static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_info *debug_info)
+{
+ struct ssr_debug_transfer_info_rsp *debug_rsp;
+ struct ssr_crashdump *ssr_crash = NULL;
+ int ret = 0, ret2;
+
+ debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL);
+ if (!debug_rsp)
+ return -ENOMEM;
+
+ if (!qdev->ssr_mhi_buf) {
+ ret = -ENOMEM;
+ goto send_rsp;
+ }
+
+ if (dbc->state != DBC_STATE_BEFORE_POWER_UP) {
+ ret = -EINVAL;
+ goto send_rsp;
+ }
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ ssr_crash->dump_info = alloc_dump_info(qdev, debug_info);
+ if (IS_ERR(ssr_crash->dump_info)) {
+ ret = PTR_ERR(ssr_crash->dump_info);
+ ssr_crash->dump_info = NULL;
+ }
+
+send_rsp:
+ debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP);
+ debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp));
+ debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id);
+ /*
+ * 0 = Return an ACK confirming the host is ready to download crashdump
+ * 1 = Return an NACK confirming the host is not ready to download crashdump
+ */
+ debug_rsp->ret = cpu_to_le32(ret ? 1 : 0);
+
+ ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT);
+ if (ret2) {
+ free_ssr_dump_info(ssr_crash);
+ kfree(debug_rsp);
+ return ret2;
+ }
+
+ return ret;
+}
+
+static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
+ struct ssr_debug_transfer_done_rsp *xfer_rsp)
+{
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ u32 status = le32_to_cpu(xfer_rsp->ret);
+ struct device *dev = &qdev->pdev->dev;
+ struct ssr_dump_info *dump_info;
+
+ dump_info = ssr_crash->dump_info;
+ if (!dump_info)
+ return;
+
+ if (status) {
+ free_ssr_dump_info(ssr_crash);
+ return;
+ }
+
+ dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL);
+ /* dev_coredumpv will free dump_info->dump_addr */
+ dump_info->dump_addr = NULL;
+ free_ssr_dump_info(ssr_crash);
+}
+
+static void ssr_worker(struct work_struct *work)
+{
+ struct ssr_resp *resp = container_of(work, struct ssr_resp, work);
+ struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data;
+ struct ssr_dump_info *dump_info = NULL;
+ struct qaic_device *qdev = resp->qdev;
+ struct ssr_crashdump *ssr_crash;
+ struct ssr_event_rsp *event_rsp;
+ struct dma_bridge_chan *dbc;
+ struct ssr_event *event;
+ u32 ssr_event_ack;
+ int ret;
+
+ le32_to_cpus(&hdr->cmd);
+ le32_to_cpus(&hdr->len);
+ le32_to_cpus(&hdr->dbc_id);
+
+ if (hdr->len > SSR_RESP_MSG_SZ)
+ goto out;
+
+ if (hdr->dbc_id >= qdev->num_dbc)
+ goto out;
+
+ dbc = &qdev->dbc[hdr->dbc_id];
+
+ switch (hdr->cmd) {
+ case DEBUG_TRANSFER_INFO:
+ ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data);
+ if (ret)
+ break;
+
+ ssr_crash = qdev->ssr_mhi_buf;
+ dump_info = ssr_crash->dump_info;
+ dump_info->dbc = dbc;
+ dump_info->resp = resp;
+
+ /* Start by downloading debug table */
+ ret = mem_read_req(qdev, dump_info->tbl_addr_dev,
+ min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE));
+ if (ret) {
+ free_ssr_dump_info(ssr_crash);
+ break;
+ }
+
+ /*
+ * Till now everything went fine, which means that we will be
+ * collecting crashdump chunk by chunk. Do not queue a response
+ * buffer for SSR cmds till the crashdump is complete.
+ */
+ return;
+ case SSR_EVENT:
+ event = (struct ssr_event *)hdr;
+ le32_to_cpus(&event->event);
+ ssr_event_ack = event->event;
+ ssr_crash = qdev->ssr_mhi_buf;
+
+ switch (event->event) {
+ case BEFORE_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN);
+ qaic_dbc_enter_ssr(qdev, hdr->dbc_id);
+ break;
+ case AFTER_SHUTDOWN:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN);
+ break;
+ case BEFORE_POWER_UP:
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP);
+ break;
+ case AFTER_POWER_UP:
+ /*
+ * If dump info is a non NULL value it means that we
+ * have received this SSR event while downloading a
+ * crashdump for this DBC is still in progress. NACK
+ * the SSR event
+ */
+ if (ssr_crash && ssr_crash->dump_info) {
+ free_ssr_dump_info(ssr_crash);
+ ssr_event_ack = SSR_EVENT_NACK;
+ break;
+ }
+
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP);
+ break;
+ default:
+ break;
+ }
+
+ event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL);
+ if (!event_rsp)
+ break;
+
+ event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP);
+ event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp));
+ event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id);
+ event_rsp->event = cpu_to_le32(ssr_event_ack);
+
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp),
+ MHI_EOT);
+ if (ret)
+ kfree(event_rsp);
+
+ if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) {
+ qaic_dbc_exit_ssr(qdev);
+ set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE);
+ }
+
+ break;
+ case DEBUG_TRANSFER_DONE_RSP:
+ dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr);
+ break;
+ default:
+ break;
+ }
+
+out:
+ ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret)
+ kfree(resp);
+}
+
+static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+ struct ssr_resp *resp;
+ int ret;
+
+ ret = mhi_prepare_for_transfer(mhi_dev);
+ if (ret)
+ return ret;
+
+ resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL);
+ if (!resp) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ return -ENOMEM;
+ }
+
+ resp->qdev = qdev;
+ INIT_WORK(&resp->work, ssr_worker);
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
+ if (ret) {
+ kfree(resp);
+ mhi_unprepare_from_transfer(mhi_dev);
+ return ret;
+ }
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->ssr_ch = mhi_dev;
+
+ return 0;
+}
+
+static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev)
+{
+ struct qaic_device *qdev;
+
+ qdev = dev_get_drvdata(&mhi_dev->dev);
+ mhi_unprepare_from_transfer(qdev->ssr_ch);
+ qdev->ssr_ch = NULL;
+}
+
+static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ struct _ssr_hdr *hdr = mhi_result->buf_addr;
+ struct ssr_dump_info *dump_info;
+
+ if (mhi_result->transaction_status) {
+ kfree(mhi_result->buf_addr);
+ return;
+ }
+
+ /*
+ * MEMORY READ is used to download crashdump. And crashdump is
+ * downloaded chunk by chunk in a series of MEMORY READ SSR commands.
+ * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ
+ * request buffer, we allocate only one such buffer and free it only
+ * once.
+ */
+ if (le32_to_cpu(hdr->cmd) == MEMORY_READ) {
+ dump_info = ssr_crash->dump_info;
+ if (dump_info) {
+ dump_info->read_buf_req_queued = false;
+ return;
+ }
+ }
+
+ kfree(mhi_result->buf_addr);
+}
+
+static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data);
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
+ bool memory_read_rsp = false;
+
+ if (ssr_crash && ssr_crash->data == mhi_result->buf_addr)
+ memory_read_rsp = true;
+
+ if (mhi_result->transaction_status) {
+ /* Do not free SSR crashdump buffer as it allocated via managed APIs */
+ if (!memory_read_rsp)
+ kfree(resp);
+ return;
+ }
+
+ if (memory_read_rsp)
+ queue_work(qdev->ssr_wq, &ssr_crash->work);
+ else
+ queue_work(qdev->ssr_wq, &resp->work);
+}
+
+static const struct mhi_device_id qaic_ssr_mhi_match_table[] = {
+ { .chan = "QAIC_SSR", },
+ {},
+};
+
+static struct mhi_driver qaic_ssr_mhi_driver = {
+ .id_table = qaic_ssr_mhi_match_table,
+ .remove = qaic_ssr_mhi_remove,
+ .probe = qaic_ssr_mhi_probe,
+ .ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_ssr",
+ },
+};
+
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm)
+{
+ struct ssr_crashdump *ssr_crash;
+
+ qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
+
+ /*
+ * Device requests only one SSR at a time. So allocating only one
+ * buffer to download crashdump is good enough.
+ */
+ ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL);
+ if (!ssr_crash)
+ return -ENOMEM;
+
+ ssr_crash->qdev = qdev;
+ INIT_WORK(&ssr_crash->work, ssr_dump_worker);
+ qdev->ssr_mhi_buf = ssr_crash;
+
+ return 0;
+}
+
+int qaic_ssr_register(void)
+{
+ return mhi_driver_register(&qaic_ssr_mhi_driver);
+}
+
+void qaic_ssr_unregister(void)
+{
+ mhi_driver_unregister(&qaic_ssr_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ssr.h b/drivers/accel/qaic/qaic_ssr.h
new file mode 100644
index 000000000000..97ccff305750
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ssr.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __QAIC_SSR_H__
+#define __QAIC_SSR_H__
+
+struct drm_device;
+struct qaic_device;
+
+int qaic_ssr_register(void);
+void qaic_ssr_unregister(void);
+void qaic_clean_up_ssr(struct qaic_device *qdev);
+int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm);
+#endif /* __QAIC_SSR_H__ */
diff --git a/drivers/accel/qaic/qaic_sysfs.c b/drivers/accel/qaic/qaic_sysfs.c
new file mode 100644
index 000000000000..e0afb0ffb589
--- /dev/null
+++ b/drivers/accel/qaic/qaic_sysfs.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2025, The Linux Foundation. All rights reserved. */
+
+#include <drm/drm_file.h>
+#include <drm/drm_managed.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+
+#include "qaic.h"
+
+#define NAME_LEN 14
+
+struct dbc_attribute {
+ struct device_attribute dev_attr;
+ u32 dbc_id;
+ char name[NAME_LEN];
+};
+
+static ssize_t dbc_state_show(struct device *dev, struct device_attribute *a, char *buf)
+{
+ struct dbc_attribute *dbc_attr = container_of(a, struct dbc_attribute, dev_attr);
+ struct drm_minor *minor = dev_get_drvdata(dev);
+ struct qaic_device *qdev;
+
+ qdev = to_qaic_device(minor->dev);
+ return sysfs_emit(buf, "%d\n", qdev->dbc[dbc_attr->dbc_id].state);
+}
+
+void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state)
+{
+ struct device *kdev = to_accel_kdev(qdev->qddev);
+ char *envp[3] = {};
+ char state_str[16];
+ char id_str[12];
+
+ envp[0] = id_str;
+ envp[1] = state_str;
+
+ if (state >= DBC_STATE_MAX)
+ return;
+ if (dbc_id >= qdev->num_dbc)
+ return;
+ if (state == qdev->dbc[dbc_id].state)
+ return;
+
+ scnprintf(id_str, ARRAY_SIZE(id_str), "DBC_ID=%d", dbc_id);
+ scnprintf(state_str, ARRAY_SIZE(state_str), "DBC_STATE=%d", state);
+
+ qdev->dbc[dbc_id].state = state;
+ kobject_uevent_env(&kdev->kobj, KOBJ_CHANGE, envp);
+}
+
+int qaic_sysfs_init(struct qaic_drm_device *qddev)
+{
+ struct device *kdev = to_accel_kdev(qddev);
+ struct drm_device *drm = to_drm(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ struct dbc_attribute *dbc_attrs;
+ int i, ret;
+
+ dbc_attrs = drmm_kcalloc(drm, num_dbc, sizeof(*dbc_attrs), GFP_KERNEL);
+ if (!dbc_attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < num_dbc; ++i) {
+ struct dbc_attribute *dbc_attr = &dbc_attrs[i];
+
+ sysfs_attr_init(&dbc_attr->dev_attr.attr);
+ dbc_attr->dbc_id = i;
+ scnprintf(dbc_attr->name, NAME_LEN, "dbc%d_state", i);
+ dbc_attr->dev_attr.attr.name = dbc_attr->name;
+ dbc_attr->dev_attr.attr.mode = 0444;
+ dbc_attr->dev_attr.show = dbc_state_show;
+ ret = sysfs_create_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; ++j) {
+ dbc_attr = &dbc_attrs[j];
+ sysfs_remove_file(&kdev->kobj, &dbc_attr->dev_attr.attr);
+ }
+ drmm_kfree(drm, dbc_attrs);
+ return ret;
+ }
+ }
+
+ qddev->sysfs_attrs = dbc_attrs;
+ return 0;
+}
+
+void qaic_sysfs_remove(struct qaic_drm_device *qddev)
+{
+ struct dbc_attribute *dbc_attrs = qddev->sysfs_attrs;
+ struct device *kdev = to_accel_kdev(qddev);
+ u32 num_dbc = qddev->qdev->num_dbc;
+ int i;
+
+ if (!dbc_attrs)
+ return;
+
+ qddev->sysfs_attrs = NULL;
+ for (i = 0; i < num_dbc; ++i)
+ sysfs_remove_file(&kdev->kobj, &dbc_attrs[i].dev_attr.attr);
+ drmm_kfree(to_drm(qddev), dbc_attrs);
+}
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 301f4462d51b..8af2475f4f36 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -129,7 +129,7 @@ static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_resu
static void qaic_timesync_timer(struct timer_list *t)
{
- struct mqts_dev *mqtsdev = from_timer(mqtsdev, t, timer);
+ struct mqts_dev *mqtsdev = timer_container_of(mqtsdev, t, timer);
struct qts_host_time_sync_msg_data *sync_msg;
u64 device_qtimer_us;
u64 device_qtimer;
@@ -171,6 +171,13 @@ mod_timer:
dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret);
}
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev)
+{
+ struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+ timer_delete_sync(&mqtsdev->timer);
+}
+
static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
@@ -201,11 +208,12 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi
goto free_sync_msg;
/* Qtimer register pointer */
- mqtsdev->qtimer_addr = qdev->bar_0 + QTIMER_REG_OFFSET;
+ mqtsdev->qtimer_addr = qdev->bar_mhi + QTIMER_REG_OFFSET;
timer_setup(timer, qaic_timesync_timer, 0);
timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
add_timer(timer);
dev_set_drvdata(&mhi_dev->dev, mqtsdev);
+ qdev->mqts_ch = mhi_dev;
return 0;
@@ -221,7 +229,8 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev)
{
struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
- del_timer_sync(&mqtsdev->timer);
+ mqtsdev->qdev->mqts_ch = NULL;
+ timer_delete_sync(&mqtsdev->timer);
mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
kfree(mqtsdev->sync_msg);
kfree(mqtsdev);
diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h
index 851b7acd43bb..77b9c2b55057 100644
--- a/drivers/accel/qaic/qaic_timesync.h
+++ b/drivers/accel/qaic/qaic_timesync.h
@@ -6,6 +6,9 @@
#ifndef __QAIC_TIMESYNC_H__
#define __QAIC_TIMESYNC_H__
+#include <linux/mhi.h>
+
int qaic_timesync_init(void);
void qaic_timesync_deinit(void);
+void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev);
#endif /* __QAIC_TIMESYNC_H__ */
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index bf94bbab6be5..fd3c3b2d1fd3 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+#include <linux/devcoredump.h>
#include <linux/firmware.h>
#include <linux/limits.h>
#include <linux/mhi.h>
@@ -9,6 +10,7 @@
#include <linux/mod_devicetable.h>
#include <linux/overflow.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include "sahara.h"
@@ -36,12 +38,14 @@
#define SAHARA_PACKET_MAX_SIZE 0xffffU /* MHI_MAX_MTU */
#define SAHARA_TRANSFER_MAX_SIZE 0x80000
+#define SAHARA_READ_MAX_SIZE 0xfff0U /* Avoid unaligned requests */
#define SAHARA_NUM_TX_BUF DIV_ROUND_UP(SAHARA_TRANSFER_MAX_SIZE,\
SAHARA_PACKET_MAX_SIZE)
#define SAHARA_IMAGE_ID_NONE U32_MAX
#define SAHARA_VERSION 2
#define SAHARA_SUCCESS 0
+#define SAHARA_TABLE_ENTRY_STR_LEN 20
#define SAHARA_MODE_IMAGE_TX_PENDING 0x0
#define SAHARA_MODE_IMAGE_TX_COMPLETE 0x1
@@ -53,6 +57,8 @@
#define SAHARA_END_OF_IMAGE_LENGTH 0x10
#define SAHARA_DONE_LENGTH 0x8
#define SAHARA_RESET_LENGTH 0x8
+#define SAHARA_MEM_DEBUG64_LENGTH 0x18
+#define SAHARA_MEM_READ64_LENGTH 0x18
struct sahara_packet {
__le32 cmd;
@@ -80,21 +86,102 @@ struct sahara_packet {
__le32 image;
__le32 status;
} end_of_image;
+ struct {
+ __le64 table_address;
+ __le64 table_length;
+ } memory_debug64;
+ struct {
+ __le64 memory_address;
+ __le64 memory_length;
+ } memory_read64;
};
};
+struct sahara_debug_table_entry64 {
+ __le64 type;
+ __le64 address;
+ __le64 length;
+ char description[SAHARA_TABLE_ENTRY_STR_LEN];
+ char filename[SAHARA_TABLE_ENTRY_STR_LEN];
+};
+
+struct sahara_dump_table_entry {
+ u64 type;
+ u64 address;
+ u64 length;
+ char description[SAHARA_TABLE_ENTRY_STR_LEN];
+ char filename[SAHARA_TABLE_ENTRY_STR_LEN];
+};
+
+#define SAHARA_DUMP_V1_MAGIC 0x1234567890abcdef
+#define SAHARA_DUMP_V1_VER 1
+struct sahara_memory_dump_meta_v1 {
+ u64 magic;
+ u64 version;
+ u64 dump_size;
+ u64 table_size;
+};
+
+/*
+ * Layout of crashdump provided to user via devcoredump
+ * +------------------------------------------+
+ * | Crashdump Meta structure |
+ * | type: struct sahara_memory_dump_meta_v1 |
+ * +------------------------------------------+
+ * | Crashdump Table |
+ * | type: array of struct |
+ * | sahara_dump_table_entry |
+ * | |
+ * | |
+ * +------------------------------------------+
+ * | Crashdump |
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ *
+ * First is the metadata header. Userspace can use the magic number to verify
+ * the content type, and then check the version for the rest of the format.
+ * New versions should keep the magic number location/value, and version
+ * location, but increment the version value.
+ *
+ * For v1, the metadata lists the size of the entire dump (header + table +
+ * dump) and the size of the table. Then the dump image table, which describes
+ * the contents of the dump. Finally all the images are listed in order, with
+ * no deadspace in between. Userspace can use the sizes listed in the image
+ * table to reconstruct the individual images.
+ */
+
struct sahara_context {
struct sahara_packet *tx[SAHARA_NUM_TX_BUF];
struct sahara_packet *rx;
- struct work_struct work;
+ struct work_struct fw_work;
+ struct work_struct dump_work;
+ struct work_struct read_data_work;
struct mhi_device *mhi_dev;
- const char **image_table;
+ const char * const *image_table;
u32 table_size;
u32 active_image_id;
const struct firmware *firmware;
+ u64 dump_table_address;
+ u64 dump_table_length;
+ size_t rx_size;
+ size_t rx_size_requested;
+ void *mem_dump;
+ size_t mem_dump_sz;
+ struct sahara_dump_table_entry *dump_image;
+ u64 dump_image_offset;
+ void *mem_dump_freespace;
+ u64 dump_images_left;
+ u32 read_data_offset;
+ u32 read_data_length;
+ bool is_mem_dump_mode;
+ bool non_streaming;
};
-static const char *aic100_image_table[] = {
+static const char * const aic100_image_table[] = {
[1] = "qcom/aic100/fw1.bin",
[2] = "qcom/aic100/fw2.bin",
[4] = "qcom/aic100/fw4.bin",
@@ -105,6 +192,42 @@ static const char *aic100_image_table[] = {
[10] = "qcom/aic100/fw10.bin",
};
+static const char * const aic200_image_table[] = {
+ [5] = "qcom/aic200/uefi.elf",
+ [12] = "qcom/aic200/aic200-nsp.bin",
+ [23] = "qcom/aic200/aop.mbn",
+ [32] = "qcom/aic200/tz.mbn",
+ [33] = "qcom/aic200/hypvm.mbn",
+ [38] = "qcom/aic200/xbl_config.elf",
+ [39] = "qcom/aic200/aic200_abl.elf",
+ [40] = "qcom/aic200/apdp.mbn",
+ [41] = "qcom/aic200/devcfg.mbn",
+ [42] = "qcom/aic200/sec.elf",
+ [43] = "qcom/aic200/aic200-hlos.elf",
+ [49] = "qcom/aic200/shrm.elf",
+ [50] = "qcom/aic200/cpucp.elf",
+ [51] = "qcom/aic200/aop_devcfg.mbn",
+ [54] = "qcom/aic200/qupv3fw.elf",
+ [57] = "qcom/aic200/cpucp_dtbs.elf",
+ [62] = "qcom/aic200/uefi_dtbs.elf",
+ [63] = "qcom/aic200/xbl_ac_config.mbn",
+ [64] = "qcom/aic200/tz_ac_config.mbn",
+ [65] = "qcom/aic200/hyp_ac_config.mbn",
+ [66] = "qcom/aic200/pdp.elf",
+ [67] = "qcom/aic200/pdp_cdb.elf",
+ [68] = "qcom/aic200/sdi.mbn",
+ [69] = "qcom/aic200/dcd.mbn",
+ [73] = "qcom/aic200/gearvm.mbn",
+ [74] = "qcom/aic200/sti.bin",
+ [76] = "qcom/aic200/tz_qti_config.mbn",
+ [78] = "qcom/aic200/pvs.bin",
+};
+
+static bool is_streaming(struct sahara_context *context)
+{
+ return !context->non_streaming;
+}
+
static int sahara_find_image(struct sahara_context *context, u32 image_id)
{
int ret;
@@ -153,6 +276,10 @@ static void sahara_send_reset(struct sahara_context *context)
{
int ret;
+ context->is_mem_dump_mode = false;
+ context->read_data_offset = 0;
+ context->read_data_length = 0;
+
context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -186,7 +313,8 @@ static void sahara_hello(struct sahara_context *context)
}
if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING &&
- le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE) {
+ le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE &&
+ le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) {
dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n",
le32_to_cpu(context->rx->hello.mode));
return;
@@ -205,9 +333,39 @@ static void sahara_hello(struct sahara_context *context)
dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
}
+static int read_data_helper(struct sahara_context *context, int buf_index)
+{
+ enum mhi_flags mhi_flag;
+ u32 pkt_data_len;
+ int ret;
+
+ pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE);
+
+ memcpy(context->tx[buf_index],
+ &context->firmware->data[context->read_data_offset],
+ pkt_data_len);
+
+ context->read_data_offset += pkt_data_len;
+ context->read_data_length -= pkt_data_len;
+
+ if (is_streaming(context) || !context->read_data_length)
+ mhi_flag = MHI_EOT;
+ else
+ mhi_flag = MHI_CHAIN;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
+ context->tx[buf_index], pkt_data_len, mhi_flag);
+ if (ret) {
+ dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static void sahara_read_data(struct sahara_context *context)
{
- u32 image_id, data_offset, data_len, pkt_data_len;
+ u32 image_id, data_offset, data_len;
int ret;
int i;
@@ -243,7 +401,7 @@ static void sahara_read_data(struct sahara_context *context)
* and is not needed here on error.
*/
- if (data_len > SAHARA_TRANSFER_MAX_SIZE) {
+ if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) {
dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n",
data_len, SAHARA_TRANSFER_MAX_SIZE);
sahara_send_reset(context);
@@ -264,22 +422,18 @@ static void sahara_read_data(struct sahara_context *context)
return;
}
- for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) {
- pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE);
-
- memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len);
+ context->read_data_offset = data_offset;
+ context->read_data_length = data_len;
- data_offset += pkt_data_len;
- data_len -= pkt_data_len;
+ if (is_streaming(context)) {
+ schedule_work(&context->read_data_work);
+ return;
+ }
- ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
- context->tx[i], pkt_data_len,
- !data_len ? MHI_EOT : MHI_CHAIN);
- if (ret) {
- dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n",
- ret);
- return;
- }
+ for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) {
+ ret = read_data_helper(context, i);
+ if (ret)
+ break;
}
}
@@ -320,9 +474,70 @@ static void sahara_end_of_image(struct sahara_context *context)
dev_dbg(&context->mhi_dev->dev, "Unable to send done response %d\n", ret);
}
+static void sahara_memory_debug64(struct sahara_context *context)
+{
+ int ret;
+
+ dev_dbg(&context->mhi_dev->dev,
+ "MEMORY DEBUG64 cmd received. length:%d table_address:%#llx table_length:%#llx\n",
+ le32_to_cpu(context->rx->length),
+ le64_to_cpu(context->rx->memory_debug64.table_address),
+ le64_to_cpu(context->rx->memory_debug64.table_length));
+
+ if (le32_to_cpu(context->rx->length) != SAHARA_MEM_DEBUG64_LENGTH) {
+ dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - length %d\n",
+ le32_to_cpu(context->rx->length));
+ return;
+ }
+
+ context->dump_table_address = le64_to_cpu(context->rx->memory_debug64.table_address);
+ context->dump_table_length = le64_to_cpu(context->rx->memory_debug64.table_length);
+
+ if (context->dump_table_length % sizeof(struct sahara_debug_table_entry64) != 0 ||
+ !context->dump_table_length) {
+ dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - table length %lld\n",
+ context->dump_table_length);
+ return;
+ }
+
+ /*
+ * From this point, the protocol flips. We make memory_read requests to
+ * the device, and the device responds with the raw data. If the device
+ * has an error, it will send an End of Image command. First we need to
+ * request the memory dump table so that we know where all the pieces
+ * of the dump are that we can consume.
+ */
+
+ context->is_mem_dump_mode = true;
+
+ /*
+ * Assume that the table is smaller than our MTU so that we can read it
+ * in one shot. The spec does not put an upper limit on the table, but
+ * no known device will exceed this.
+ */
+ if (context->dump_table_length > SAHARA_PACKET_MAX_SIZE) {
+ dev_err(&context->mhi_dev->dev, "Memory dump table length %lld exceeds supported size. Discarding dump\n",
+ context->dump_table_length);
+ sahara_send_reset(context);
+ return;
+ }
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_table_address);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(context->dump_table_length);
+
+ context->rx_size_requested = context->dump_table_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to send read for dump table %d\n", ret);
+}
+
static void sahara_processing(struct work_struct *work)
{
- struct sahara_context *context = container_of(work, struct sahara_context, work);
+ struct sahara_context *context = container_of(work, struct sahara_context, fw_work);
int ret;
switch (le32_to_cpu(context->rx->cmd)) {
@@ -338,6 +553,12 @@ static void sahara_processing(struct work_struct *work)
case SAHARA_DONE_RESP_CMD:
/* Intentional do nothing as we don't need to exit an app */
break;
+ case SAHARA_RESET_RESP_CMD:
+ /* Intentional do nothing as we don't need to exit an app */
+ break;
+ case SAHARA_MEM_DEBUG64_CMD:
+ sahara_memory_debug64(context);
+ break;
default:
dev_err(&context->mhi_dev->dev, "Unknown command %d\n",
le32_to_cpu(context->rx->cmd));
@@ -350,6 +571,229 @@ static void sahara_processing(struct work_struct *work)
dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret);
}
+static void sahara_parse_dump_table(struct sahara_context *context)
+{
+ struct sahara_dump_table_entry *image_out_table;
+ struct sahara_debug_table_entry64 *dev_table;
+ struct sahara_memory_dump_meta_v1 *dump_meta;
+ u64 table_nents;
+ u64 dump_length;
+ u64 mul_bytes;
+ int ret;
+ u64 i;
+
+ table_nents = context->dump_table_length / sizeof(*dev_table);
+ context->dump_images_left = table_nents;
+ dump_length = 0;
+
+ dev_table = (struct sahara_debug_table_entry64 *)(context->rx);
+ for (i = 0; i < table_nents; ++i) {
+ /* Do not trust the device, ensure the strings are terminated */
+ dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
+ dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
+
+ if (check_add_overflow(dump_length,
+ le64_to_cpu(dev_table[i].length),
+ &dump_length)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ dev_dbg(&context->mhi_dev->dev,
+ "Memory dump table entry %lld type: %lld address: %#llx length: %#llx description: \"%s\" filename \"%s\"\n",
+ i,
+ le64_to_cpu(dev_table[i].type),
+ le64_to_cpu(dev_table[i].address),
+ le64_to_cpu(dev_table[i].length),
+ dev_table[i].description,
+ dev_table[i].filename);
+ }
+
+ if (check_add_overflow(dump_length, (u64)sizeof(*dump_meta), &dump_length)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ if (check_mul_overflow((u64)sizeof(*image_out_table), table_nents, &mul_bytes)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ if (check_add_overflow(dump_length, mul_bytes, &dump_length)) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ context->mem_dump_sz = dump_length;
+ context->mem_dump = vzalloc(dump_length);
+ if (!context->mem_dump) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ /* Populate the dump metadata and table for userspace */
+ dump_meta = context->mem_dump;
+ dump_meta->magic = SAHARA_DUMP_V1_MAGIC;
+ dump_meta->version = SAHARA_DUMP_V1_VER;
+ dump_meta->dump_size = dump_length;
+ dump_meta->table_size = context->dump_table_length;
+
+ image_out_table = context->mem_dump + sizeof(*dump_meta);
+ for (i = 0; i < table_nents; ++i) {
+ image_out_table[i].type = le64_to_cpu(dev_table[i].type);
+ image_out_table[i].address = le64_to_cpu(dev_table[i].address);
+ image_out_table[i].length = le64_to_cpu(dev_table[i].length);
+ strscpy(image_out_table[i].description, dev_table[i].description,
+ SAHARA_TABLE_ENTRY_STR_LEN);
+ strscpy(image_out_table[i].filename,
+ dev_table[i].filename,
+ SAHARA_TABLE_ENTRY_STR_LEN);
+ }
+
+ context->mem_dump_freespace = &image_out_table[i];
+
+ /* Done parsing the table, switch to image dump mode */
+ context->dump_table_length = 0;
+
+ /* Request the first chunk of the first image */
+ context->dump_image = &image_out_table[0];
+ dump_length = min_t(u64, context->dump_image->length, SAHARA_READ_MAX_SIZE);
+ /* Avoid requesting EOI sized data so that we can identify errors */
+ if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
+ dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
+
+ context->dump_image_offset = dump_length;
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_image->address);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length);
+
+ context->rx_size_requested = dump_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to send read for dump content %d\n", ret);
+}
+
+static void sahara_parse_dump_image(struct sahara_context *context)
+{
+ u64 dump_length;
+ int ret;
+
+ memcpy(context->mem_dump_freespace, context->rx, context->rx_size);
+ context->mem_dump_freespace += context->rx_size;
+
+ if (context->dump_image_offset >= context->dump_image->length) {
+ /* Need to move to next image */
+ context->dump_image++;
+ context->dump_images_left--;
+ context->dump_image_offset = 0;
+
+ if (!context->dump_images_left) {
+ /* Dump done */
+ dev_coredumpv(context->mhi_dev->mhi_cntrl->cntrl_dev,
+ context->mem_dump,
+ context->mem_dump_sz,
+ GFP_KERNEL);
+ context->mem_dump = NULL;
+ sahara_send_reset(context);
+ return;
+ }
+ }
+
+ /* Get next image chunk */
+ dump_length = context->dump_image->length - context->dump_image_offset;
+ dump_length = min_t(u64, dump_length, SAHARA_READ_MAX_SIZE);
+ /* Avoid requesting EOI sized data so that we can identify errors */
+ if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
+ dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address =
+ cpu_to_le64(context->dump_image->address + context->dump_image_offset);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length);
+
+ context->dump_image_offset += dump_length;
+ context->rx_size_requested = dump_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev,
+ "Unable to send read for dump content %d\n", ret);
+}
+
+static void sahara_dump_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, dump_work);
+ int ret;
+
+ /*
+ * We should get the expected raw data, but if the device has an error
+ * it is supposed to send EOI with an error code.
+ */
+ if (context->rx_size != context->rx_size_requested &&
+ context->rx_size != SAHARA_END_OF_IMAGE_LENGTH) {
+ dev_err(&context->mhi_dev->dev,
+ "Unexpected response to read_data. Expected size: %#zx got: %#zx\n",
+ context->rx_size_requested,
+ context->rx_size);
+ goto error;
+ }
+
+ if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH &&
+ le32_to_cpu(context->rx->cmd) == SAHARA_END_OF_IMAGE_CMD) {
+ dev_err(&context->mhi_dev->dev,
+ "Unexpected EOI response to read_data. Status: %d\n",
+ le32_to_cpu(context->rx->end_of_image.status));
+ goto error;
+ }
+
+ if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH &&
+ le32_to_cpu(context->rx->cmd) != SAHARA_END_OF_IMAGE_CMD) {
+ dev_err(&context->mhi_dev->dev,
+ "Invalid EOI response to read_data. CMD: %d\n",
+ le32_to_cpu(context->rx->cmd));
+ goto error;
+ }
+
+ /*
+ * Need to know if we received the dump table, or part of a dump image.
+ * Since we get raw data, we cannot tell from the data itself. Instead,
+ * we use the stored dump_table_length, which we zero after we read and
+ * process the entire table.
+ */
+ if (context->dump_table_length)
+ sahara_parse_dump_table(context);
+ else
+ sahara_parse_dump_image(context);
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx,
+ SAHARA_PACKET_MAX_SIZE, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret);
+
+ return;
+
+error:
+ vfree(context->mem_dump);
+ context->mem_dump = NULL;
+ sahara_send_reset(context);
+}
+
+static void sahara_read_data_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, read_data_work);
+
+ read_data_helper(context, 0);
+}
+
static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct sahara_context *context;
@@ -364,27 +808,57 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
if (!context->rx)
return -ENOMEM;
+ if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+ context->image_table = aic200_image_table;
+ context->table_size = ARRAY_SIZE(aic200_image_table);
+ } else {
+ context->image_table = aic100_image_table;
+ context->table_size = ARRAY_SIZE(aic100_image_table);
+ context->non_streaming = true;
+ }
+
/*
- * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it
- * will request for READ_DATA. This is larger than
- * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to
- * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a
- * READ_DATA, it requires a transfer of the exact size requested. We
- * can use MHI_CHAIN to link multiple buffers into a single transfer
- * but the remote side will not consume the buffers until it sees an
- * EOT, thus we need to allocate enough buffers to put in the tx fifo
- * to cover an entire READ_DATA request of the max size.
+ * There are two firmware implementations for READ_DATA handling.
+ * The older "SBL" implementation defines a Sahara transfer size, and
+ * expects that the response is a single transport transfer. If the
+ * FW wants to transfer a file that is larger than the transfer size,
+ * the FW will issue multiple READ_DATA commands. For this
+ * implementation, we need to allocate enough buffers to contain the
+ * entire Sahara transfer size.
+ *
+ * The newer "XBL" implementation does not define a maximum transfer
+ * size and instead expects the data to be streamed over using the
+ * transport level MTU. The FW will issue a single READ_DATA command
+ * of whatever size, and consume multiple transport level transfers
+ * until the expected amount of data is consumed. For this
+ * implementation we only need a single buffer of the transport MTU
+ * but we'll need to be able to use it multiple times for a single
+ * READ_DATA request.
+ *
+ * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE
+ * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use
+ * MHI_CHAIN to link multiple buffers into a single transfer but the
+ * remote side will not consume the buffers until it sees an EOT, thus
+ * we need to allocate enough buffers to put in the tx fifo to cover an
+ * entire READ_DATA request of the max size.
+ *
+ * AIC200 is the XBL implementation, and so a single buffer will work.
*/
for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) {
- context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+ context->tx[i] = devm_kzalloc(&mhi_dev->dev,
+ SAHARA_PACKET_MAX_SIZE,
+ GFP_KERNEL);
if (!context->tx[i])
return -ENOMEM;
+ if (is_streaming(context))
+ break;
}
context->mhi_dev = mhi_dev;
- INIT_WORK(&context->work, sahara_processing);
- context->image_table = aic100_image_table;
- context->table_size = ARRAY_SIZE(aic100_image_table);
+ INIT_WORK(&context->fw_work, sahara_processing);
+ INIT_WORK(&context->dump_work, sahara_dump_processing);
+ INIT_WORK(&context->read_data_work, sahara_read_data_processing);
+
context->active_image_id = SAHARA_IMAGE_ID_NONE;
dev_set_drvdata(&mhi_dev->dev, context);
@@ -405,21 +879,33 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
{
struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
- cancel_work_sync(&context->work);
+ cancel_work_sync(&context->fw_work);
+ cancel_work_sync(&context->dump_work);
+ vfree(context->mem_dump);
sahara_release_image(context);
mhi_unprepare_from_transfer(mhi_dev);
}
static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
{
+ struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+
+ if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context))
+ schedule_work(&context->read_data_work);
}
static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
{
struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
- if (!mhi_result->transaction_status)
- schedule_work(&context->work);
+ if (!mhi_result->transaction_status) {
+ context->rx_size = mhi_result->bytes_xferd;
+ if (context->is_mem_dump_mode)
+ schedule_work(&context->dump_work);
+ else
+ schedule_work(&context->fw_work);
+ }
+
}
static const struct mhi_device_id sahara_mhi_match_table[] = {
diff --git a/drivers/accel/rocket/Kconfig b/drivers/accel/rocket/Kconfig
new file mode 100644
index 000000000000..16465abe0660
--- /dev/null
+++ b/drivers/accel/rocket/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_ROCKET
+ tristate "Rocket (support for Rockchip NPUs)"
+ depends on DRM_ACCEL
+ depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST
+ depends on ROCKCHIP_IOMMU || COMPILE_TEST
+ depends on MMU
+ select DRM_SCHED
+ select DRM_GEM_SHMEM_HELPER
+ help
+ Choose this option if you have a Rockchip SoC that contains a
+ compatible Neural Processing Unit (NPU), such as the RK3588. Called by
+ Rockchip either RKNN or RKNPU, it accelerates inference of neural
+ networks.
+
+ The interface exposed to userspace is described in
+ include/uapi/drm/rocket_accel.h and is used by the Rocket userspace
+ driver in Mesa3D.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rocket.
diff --git a/drivers/accel/rocket/Makefile b/drivers/accel/rocket/Makefile
new file mode 100644
index 000000000000..3713dfe223d6
--- /dev/null
+++ b/drivers/accel/rocket/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_DRM_ACCEL_ROCKET) := rocket.o
+
+rocket-y := \
+ rocket_core.o \
+ rocket_device.o \
+ rocket_drv.o \
+ rocket_gem.o \
+ rocket_job.o
diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c
new file mode 100644
index 000000000000..abe7719c1db4
--- /dev/null
+++ b/drivers/accel/rocket/rocket_core.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include "rocket_core.h"
+#include "rocket_job.h"
+
+int rocket_core_init(struct rocket_core *core)
+{
+ struct device *dev = core->dev;
+ struct platform_device *pdev = to_platform_device(dev);
+ u32 version;
+ int err = 0;
+
+ core->resets[0].id = "srst_a";
+ core->resets[1].id = "srst_h";
+ err = devm_reset_control_bulk_get_exclusive(&pdev->dev, ARRAY_SIZE(core->resets),
+ core->resets);
+ if (err)
+ return dev_err_probe(dev, err, "failed to get resets for core %d\n", core->index);
+
+ err = devm_clk_bulk_get(dev, ARRAY_SIZE(core->clks), core->clks);
+ if (err)
+ return dev_err_probe(dev, err, "failed to get clocks for core %d\n", core->index);
+
+ core->pc_iomem = devm_platform_ioremap_resource_byname(pdev, "pc");
+ if (IS_ERR(core->pc_iomem)) {
+ dev_err(dev, "couldn't find PC registers %ld\n", PTR_ERR(core->pc_iomem));
+ return PTR_ERR(core->pc_iomem);
+ }
+
+ core->cna_iomem = devm_platform_ioremap_resource_byname(pdev, "cna");
+ if (IS_ERR(core->cna_iomem)) {
+ dev_err(dev, "couldn't find CNA registers %ld\n", PTR_ERR(core->cna_iomem));
+ return PTR_ERR(core->cna_iomem);
+ }
+
+ core->core_iomem = devm_platform_ioremap_resource_byname(pdev, "core");
+ if (IS_ERR(core->core_iomem)) {
+ dev_err(dev, "couldn't find CORE registers %ld\n", PTR_ERR(core->core_iomem));
+ return PTR_ERR(core->core_iomem);
+ }
+
+ dma_set_max_seg_size(dev, UINT_MAX);
+
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+ if (err)
+ return err;
+
+ core->iommu_group = iommu_group_get(dev);
+
+ err = rocket_job_init(core);
+ if (err)
+ return err;
+
+ pm_runtime_use_autosuspend(dev);
+
+ /*
+ * As this NPU will be most often used as part of a media pipeline that
+ * ends presenting in a display, choose 50 ms (~3 frames at 60Hz) as an
+ * autosuspend delay as that will keep the device powered up while the
+ * pipeline is running.
+ */
+ pm_runtime_set_autosuspend_delay(dev, 50);
+
+ pm_runtime_enable(dev);
+
+ err = pm_runtime_resume_and_get(dev);
+ if (err) {
+ rocket_job_fini(core);
+ return err;
+ }
+
+ version = rocket_pc_readl(core, VERSION);
+ version += rocket_pc_readl(core, VERSION_NUM) & 0xffff;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+
+ dev_info(dev, "Rockchip NPU core %d version: %d\n", core->index, version);
+
+ return 0;
+}
+
+void rocket_core_fini(struct rocket_core *core)
+{
+ pm_runtime_dont_use_autosuspend(core->dev);
+ pm_runtime_disable(core->dev);
+ iommu_group_put(core->iommu_group);
+ core->iommu_group = NULL;
+ rocket_job_fini(core);
+}
+
+void rocket_core_reset(struct rocket_core *core)
+{
+ reset_control_bulk_assert(ARRAY_SIZE(core->resets), core->resets);
+
+ udelay(10);
+
+ reset_control_bulk_deassert(ARRAY_SIZE(core->resets), core->resets);
+}
diff --git a/drivers/accel/rocket/rocket_core.h b/drivers/accel/rocket/rocket_core.h
new file mode 100644
index 000000000000..f6d7382854ca
--- /dev/null
+++ b/drivers/accel/rocket/rocket_core.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#ifndef __ROCKET_CORE_H__
+#define __ROCKET_CORE_H__
+
+#include <drm/gpu_scheduler.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/mutex_types.h>
+#include <linux/reset.h>
+
+#include "rocket_registers.h"
+
+#define rocket_pc_readl(core, reg) \
+ readl((core)->pc_iomem + (REG_PC_##reg))
+#define rocket_pc_writel(core, reg, value) \
+ writel(value, (core)->pc_iomem + (REG_PC_##reg))
+
+#define rocket_cna_readl(core, reg) \
+ readl((core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS)
+#define rocket_cna_writel(core, reg, value) \
+ writel(value, (core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS)
+
+#define rocket_core_readl(core, reg) \
+ readl((core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS)
+#define rocket_core_writel(core, reg, value) \
+ writel(value, (core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS)
+
+struct rocket_core {
+ struct device *dev;
+ struct rocket_device *rdev;
+ unsigned int index;
+
+ int irq;
+ void __iomem *pc_iomem;
+ void __iomem *cna_iomem;
+ void __iomem *core_iomem;
+ struct clk_bulk_data clks[4];
+ struct reset_control_bulk_data resets[2];
+
+ struct iommu_group *iommu_group;
+
+ struct mutex job_lock;
+ struct rocket_job *in_flight_job;
+
+ spinlock_t fence_lock;
+
+ struct {
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ atomic_t pending;
+ } reset;
+
+ struct drm_gpu_scheduler sched;
+ u64 fence_context;
+ u64 emit_seqno;
+};
+
+int rocket_core_init(struct rocket_core *core);
+void rocket_core_fini(struct rocket_core *core);
+void rocket_core_reset(struct rocket_core *core);
+
+#endif
diff --git a/drivers/accel/rocket/rocket_device.c b/drivers/accel/rocket/rocket_device.c
new file mode 100644
index 000000000000..46e6ee1e72c5
--- /dev/null
+++ b/drivers/accel/rocket/rocket_device.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#include <drm/drm_drv.h>
+#include <linux/array_size.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+
+#include "rocket_device.h"
+
+struct rocket_device *rocket_device_init(struct platform_device *pdev,
+ const struct drm_driver *rocket_drm_driver)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *core_node;
+ struct rocket_device *rdev;
+ struct drm_device *ddev;
+ unsigned int num_cores = 0;
+ int err;
+
+ rdev = devm_drm_dev_alloc(dev, rocket_drm_driver, struct rocket_device, ddev);
+ if (IS_ERR(rdev))
+ return rdev;
+
+ ddev = &rdev->ddev;
+ dev_set_drvdata(dev, rdev);
+
+ for_each_compatible_node(core_node, NULL, "rockchip,rk3588-rknn-core")
+ if (of_device_is_available(core_node))
+ num_cores++;
+
+ rdev->cores = devm_kcalloc(dev, num_cores, sizeof(*rdev->cores), GFP_KERNEL);
+ if (!rdev->cores)
+ return ERR_PTR(-ENOMEM);
+
+ dma_set_max_seg_size(dev, UINT_MAX);
+
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+ if (err)
+ return ERR_PTR(err);
+
+ err = devm_mutex_init(dev, &rdev->sched_lock);
+ if (err)
+ return ERR_PTR(-ENOMEM);
+
+ err = drm_dev_register(ddev, 0);
+ if (err)
+ return ERR_PTR(err);
+
+ return rdev;
+}
+
+void rocket_device_fini(struct rocket_device *rdev)
+{
+ WARN_ON(rdev->num_cores > 0);
+
+ drm_dev_unregister(&rdev->ddev);
+}
diff --git a/drivers/accel/rocket/rocket_device.h b/drivers/accel/rocket/rocket_device.h
new file mode 100644
index 000000000000..ce662abc01d3
--- /dev/null
+++ b/drivers/accel/rocket/rocket_device.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#ifndef __ROCKET_DEVICE_H__
+#define __ROCKET_DEVICE_H__
+
+#include <drm/drm_device.h>
+#include <linux/clk.h>
+#include <linux/container_of.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+
+#include "rocket_core.h"
+
+struct rocket_device {
+ struct drm_device ddev;
+
+ struct mutex sched_lock;
+
+ struct rocket_core *cores;
+ unsigned int num_cores;
+};
+
+struct rocket_device *rocket_device_init(struct platform_device *pdev,
+ const struct drm_driver *rocket_drm_driver);
+void rocket_device_fini(struct rocket_device *rdev);
+#define to_rocket_device(drm_dev) \
+ ((struct rocket_device *)(container_of((drm_dev), struct rocket_device, ddev)))
+
+#endif /* __ROCKET_DEVICE_H__ */
diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
new file mode 100644
index 000000000000..5c0b63f0a8f0
--- /dev/null
+++ b/drivers/accel/rocket/rocket_drv.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <drm/rocket_accel.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "rocket_drv.h"
+#include "rocket_gem.h"
+#include "rocket_job.h"
+
+/*
+ * Facade device, used to expose a single DRM device to userspace, that
+ * schedules jobs to any RKNN cores in the system.
+ */
+static struct platform_device *drm_dev;
+static struct rocket_device *rdev;
+
+static void
+rocket_iommu_domain_destroy(struct kref *kref)
+{
+ struct rocket_iommu_domain *domain = container_of(kref, struct rocket_iommu_domain, kref);
+
+ iommu_domain_free(domain->domain);
+ domain->domain = NULL;
+ kfree(domain);
+}
+
+static struct rocket_iommu_domain*
+rocket_iommu_domain_create(struct device *dev)
+{
+ struct rocket_iommu_domain *domain = kmalloc(sizeof(*domain), GFP_KERNEL);
+ void *err;
+
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ domain->domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(domain->domain)) {
+ err = ERR_CAST(domain->domain);
+ kfree(domain);
+ return err;
+ }
+ kref_init(&domain->kref);
+
+ return domain;
+}
+
+struct rocket_iommu_domain *
+rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv)
+{
+ kref_get(&rocket_priv->domain->kref);
+ return rocket_priv->domain;
+}
+
+void
+rocket_iommu_domain_put(struct rocket_iommu_domain *domain)
+{
+ kref_put(&domain->kref, rocket_iommu_domain_destroy);
+}
+
+static int
+rocket_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct rocket_device *rdev = to_rocket_device(dev);
+ struct rocket_file_priv *rocket_priv;
+ u64 start, end;
+ int ret;
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ rocket_priv = kzalloc(sizeof(*rocket_priv), GFP_KERNEL);
+ if (!rocket_priv) {
+ ret = -ENOMEM;
+ goto err_put_mod;
+ }
+
+ rocket_priv->rdev = rdev;
+ rocket_priv->domain = rocket_iommu_domain_create(rdev->cores[0].dev);
+ if (IS_ERR(rocket_priv->domain)) {
+ ret = PTR_ERR(rocket_priv->domain);
+ goto err_free;
+ }
+
+ file->driver_priv = rocket_priv;
+
+ start = rocket_priv->domain->domain->geometry.aperture_start;
+ end = rocket_priv->domain->domain->geometry.aperture_end;
+ drm_mm_init(&rocket_priv->mm, start, end - start + 1);
+ mutex_init(&rocket_priv->mm_lock);
+
+ ret = rocket_job_open(rocket_priv);
+ if (ret)
+ goto err_mm_takedown;
+
+ return 0;
+
+err_mm_takedown:
+ mutex_destroy(&rocket_priv->mm_lock);
+ drm_mm_takedown(&rocket_priv->mm);
+ rocket_iommu_domain_put(rocket_priv->domain);
+err_free:
+ kfree(rocket_priv);
+err_put_mod:
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static void
+rocket_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct rocket_file_priv *rocket_priv = file->driver_priv;
+
+ rocket_job_close(rocket_priv);
+ mutex_destroy(&rocket_priv->mm_lock);
+ drm_mm_takedown(&rocket_priv->mm);
+ rocket_iommu_domain_put(rocket_priv->domain);
+ kfree(rocket_priv);
+ module_put(THIS_MODULE);
+}
+
+static const struct drm_ioctl_desc rocket_drm_driver_ioctls[] = {
+#define ROCKET_IOCTL(n, func) \
+ DRM_IOCTL_DEF_DRV(ROCKET_##n, rocket_ioctl_##func, 0)
+
+ ROCKET_IOCTL(CREATE_BO, create_bo),
+ ROCKET_IOCTL(SUBMIT, submit),
+ ROCKET_IOCTL(PREP_BO, prep_bo),
+ ROCKET_IOCTL(FINI_BO, fini_bo),
+};
+
+DEFINE_DRM_ACCEL_FOPS(rocket_accel_driver_fops);
+
+/*
+ * Rocket driver version:
+ * - 1.0 - initial interface
+ */
+static const struct drm_driver rocket_drm_driver = {
+ .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM,
+ .open = rocket_open,
+ .postclose = rocket_postclose,
+ .gem_create_object = rocket_gem_create_object,
+ .ioctls = rocket_drm_driver_ioctls,
+ .num_ioctls = ARRAY_SIZE(rocket_drm_driver_ioctls),
+ .fops = &rocket_accel_driver_fops,
+ .name = "rocket",
+ .desc = "rocket DRM",
+};
+
+static int rocket_probe(struct platform_device *pdev)
+{
+ if (rdev == NULL) {
+ /* First core probing, initialize DRM device. */
+ rdev = rocket_device_init(drm_dev, &rocket_drm_driver);
+ if (IS_ERR(rdev)) {
+ dev_err(&pdev->dev, "failed to initialize rocket device\n");
+ return PTR_ERR(rdev);
+ }
+ }
+
+ unsigned int core = rdev->num_cores;
+
+ dev_set_drvdata(&pdev->dev, rdev);
+
+ rdev->cores[core].rdev = rdev;
+ rdev->cores[core].dev = &pdev->dev;
+ rdev->cores[core].index = core;
+
+ rdev->num_cores++;
+
+ return rocket_core_init(&rdev->cores[core]);
+}
+
+static void rocket_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ for (unsigned int core = 0; core < rdev->num_cores; core++) {
+ if (rdev->cores[core].dev == dev) {
+ rocket_core_fini(&rdev->cores[core]);
+ rdev->num_cores--;
+ break;
+ }
+ }
+
+ if (rdev->num_cores == 0) {
+ /* Last core removed, deinitialize DRM device. */
+ rocket_device_fini(rdev);
+ rdev = NULL;
+ }
+}
+
+static const struct of_device_id dt_match[] = {
+ { .compatible = "rockchip,rk3588-rknn-core" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static int find_core_for_dev(struct device *dev)
+{
+ struct rocket_device *rdev = dev_get_drvdata(dev);
+
+ for (unsigned int core = 0; core < rdev->num_cores; core++) {
+ if (dev == rdev->cores[core].dev)
+ return core;
+ }
+
+ return -1;
+}
+
+static int rocket_device_runtime_resume(struct device *dev)
+{
+ struct rocket_device *rdev = dev_get_drvdata(dev);
+ int core = find_core_for_dev(dev);
+ int err = 0;
+
+ if (core < 0)
+ return -ENODEV;
+
+ err = clk_bulk_prepare_enable(ARRAY_SIZE(rdev->cores[core].clks), rdev->cores[core].clks);
+ if (err) {
+ dev_err(dev, "failed to enable (%d) clocks for core %d\n", err, core);
+ return err;
+ }
+
+ return 0;
+}
+
+static int rocket_device_runtime_suspend(struct device *dev)
+{
+ struct rocket_device *rdev = dev_get_drvdata(dev);
+ int core = find_core_for_dev(dev);
+
+ if (core < 0)
+ return -ENODEV;
+
+ if (!rocket_job_is_idle(&rdev->cores[core]))
+ return -EBUSY;
+
+ clk_bulk_disable_unprepare(ARRAY_SIZE(rdev->cores[core].clks), rdev->cores[core].clks);
+
+ return 0;
+}
+
+EXPORT_GPL_DEV_PM_OPS(rocket_pm_ops) = {
+ RUNTIME_PM_OPS(rocket_device_runtime_suspend, rocket_device_runtime_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+};
+
+static struct platform_driver rocket_driver = {
+ .probe = rocket_probe,
+ .remove = rocket_remove,
+ .driver = {
+ .name = "rocket",
+ .pm = pm_ptr(&rocket_pm_ops),
+ .of_match_table = dt_match,
+ },
+};
+
+static int __init rocket_register(void)
+{
+ drm_dev = platform_device_register_simple("rknn", -1, NULL, 0);
+ if (IS_ERR(drm_dev))
+ return PTR_ERR(drm_dev);
+
+ return platform_driver_register(&rocket_driver);
+}
+
+static void __exit rocket_unregister(void)
+{
+ platform_driver_unregister(&rocket_driver);
+
+ platform_device_unregister(drm_dev);
+}
+
+module_init(rocket_register);
+module_exit(rocket_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DRM driver for the Rockchip NPU IP");
+MODULE_AUTHOR("Tomeu Vizoso");
diff --git a/drivers/accel/rocket/rocket_drv.h b/drivers/accel/rocket/rocket_drv.h
new file mode 100644
index 000000000000..2c673bb99ccc
--- /dev/null
+++ b/drivers/accel/rocket/rocket_drv.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#ifndef __ROCKET_DRV_H__
+#define __ROCKET_DRV_H__
+
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+
+#include "rocket_device.h"
+
+extern const struct dev_pm_ops rocket_pm_ops;
+
+struct rocket_iommu_domain {
+ struct iommu_domain *domain;
+ struct kref kref;
+};
+
+struct rocket_file_priv {
+ struct rocket_device *rdev;
+
+ struct rocket_iommu_domain *domain;
+ struct drm_mm mm;
+ struct mutex mm_lock;
+
+ struct drm_sched_entity sched_entity;
+};
+
+struct rocket_iommu_domain *rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv);
+void rocket_iommu_domain_put(struct rocket_iommu_domain *domain);
+
+#endif
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
new file mode 100644
index 000000000000..624c4ecf5a34
--- /dev/null
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/drm_utils.h>
+#include <drm/rocket_accel.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+
+#include "rocket_drv.h"
+#include "rocket_gem.h"
+
+static void rocket_gem_bo_free(struct drm_gem_object *obj)
+{
+ struct rocket_gem_object *bo = to_rocket_bo(obj);
+ struct rocket_file_priv *rocket_priv = bo->driver_priv;
+ size_t unmapped;
+
+ drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
+
+ unmapped = iommu_unmap(bo->domain->domain, bo->mm.start, bo->size);
+ drm_WARN_ON(obj->dev, unmapped != bo->size);
+
+ mutex_lock(&rocket_priv->mm_lock);
+ drm_mm_remove_node(&bo->mm);
+ mutex_unlock(&rocket_priv->mm_lock);
+
+ rocket_iommu_domain_put(bo->domain);
+ bo->domain = NULL;
+
+ drm_gem_shmem_free(&bo->base);
+}
+
+static const struct drm_gem_object_funcs rocket_gem_funcs = {
+ .free = rocket_gem_bo_free,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = drm_gem_shmem_object_mmap,
+ .vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size)
+{
+ struct rocket_gem_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ obj->base.base.funcs = &rocket_gem_funcs;
+
+ return &obj->base.base;
+}
+
+int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct rocket_file_priv *rocket_priv = file->driver_priv;
+ struct drm_rocket_create_bo *args = data;
+ struct drm_gem_shmem_object *shmem_obj;
+ struct rocket_gem_object *rkt_obj;
+ struct drm_gem_object *gem_obj;
+ struct sg_table *sgt;
+ int ret;
+
+ shmem_obj = drm_gem_shmem_create(dev, args->size);
+ if (IS_ERR(shmem_obj))
+ return PTR_ERR(shmem_obj);
+
+ gem_obj = &shmem_obj->base;
+ rkt_obj = to_rocket_bo(gem_obj);
+
+ rkt_obj->driver_priv = rocket_priv;
+ rkt_obj->domain = rocket_iommu_domain_get(rocket_priv);
+ rkt_obj->size = args->size;
+ rkt_obj->offset = 0;
+
+ ret = drm_gem_handle_create(file, gem_obj, &args->handle);
+ drm_gem_object_put(gem_obj);
+ if (ret)
+ goto err;
+
+ sgt = drm_gem_shmem_get_pages_sgt(shmem_obj);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto err;
+ }
+
+ mutex_lock(&rocket_priv->mm_lock);
+ ret = drm_mm_insert_node_generic(&rocket_priv->mm, &rkt_obj->mm,
+ rkt_obj->size, PAGE_SIZE,
+ 0, 0);
+ mutex_unlock(&rocket_priv->mm_lock);
+
+ ret = iommu_map_sgtable(rocket_priv->domain->domain,
+ rkt_obj->mm.start,
+ shmem_obj->sgt,
+ IOMMU_READ | IOMMU_WRITE);
+ if (ret < 0 || ret < args->size) {
+ drm_err(dev, "failed to map buffer: size=%d request_size=%u\n",
+ ret, args->size);
+ ret = -ENOMEM;
+ goto err_remove_node;
+ }
+
+ /* iommu_map_sgtable might have aligned the size */
+ rkt_obj->size = ret;
+ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+ args->dma_address = rkt_obj->mm.start;
+
+ return 0;
+
+err_remove_node:
+ mutex_lock(&rocket_priv->mm_lock);
+ drm_mm_remove_node(&rkt_obj->mm);
+ mutex_unlock(&rocket_priv->mm_lock);
+
+err:
+ drm_gem_shmem_object_free(gem_obj);
+
+ return ret;
+}
+
+int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_rocket_prep_bo *args = data;
+ unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+ struct drm_gem_object *gem_obj;
+ struct drm_gem_shmem_object *shmem_obj;
+ long ret = 0;
+
+ if (args->reserved != 0) {
+ drm_dbg(dev, "Reserved field in drm_rocket_prep_bo struct should be 0.\n");
+ return -EINVAL;
+ }
+
+ gem_obj = drm_gem_object_lookup(file, args->handle);
+ if (!gem_obj)
+ return -ENOENT;
+
+ ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_WRITE, true, timeout);
+ if (!ret)
+ ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+ shmem_obj = &to_rocket_bo(gem_obj)->base;
+
+ dma_sync_sgtable_for_cpu(dev->dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+
+ drm_gem_object_put(gem_obj);
+
+ return ret;
+}
+
+int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_rocket_fini_bo *args = data;
+ struct drm_gem_shmem_object *shmem_obj;
+ struct rocket_gem_object *rkt_obj;
+ struct drm_gem_object *gem_obj;
+
+ if (args->reserved != 0) {
+ drm_dbg(dev, "Reserved field in drm_rocket_fini_bo struct should be 0.\n");
+ return -EINVAL;
+ }
+
+ gem_obj = drm_gem_object_lookup(file, args->handle);
+ if (!gem_obj)
+ return -ENOENT;
+
+ rkt_obj = to_rocket_bo(gem_obj);
+ shmem_obj = &rkt_obj->base;
+
+ dma_sync_sgtable_for_device(dev->dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+
+ drm_gem_object_put(gem_obj);
+
+ return 0;
+}
diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h
new file mode 100644
index 000000000000..240430334509
--- /dev/null
+++ b/drivers/accel/rocket/rocket_gem.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#ifndef __ROCKET_GEM_H__
+#define __ROCKET_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+
+struct rocket_gem_object {
+ struct drm_gem_shmem_object base;
+
+ struct rocket_file_priv *driver_priv;
+
+ struct rocket_iommu_domain *domain;
+ struct drm_mm_node mm;
+ size_t size;
+ u32 offset;
+};
+
+struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size);
+
+int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file);
+
+int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file);
+
+int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file);
+
+static inline
+struct rocket_gem_object *to_rocket_bo(struct drm_gem_object *obj)
+{
+ return container_of(to_drm_gem_shmem_obj(obj), struct rocket_gem_object, base);
+}
+
+#endif
diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c
new file mode 100644
index 000000000000..acd606160dc9
--- /dev/null
+++ b/drivers/accel/rocket/rocket_job.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#include <drm/drm_print.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/rocket_accel.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "rocket_core.h"
+#include "rocket_device.h"
+#include "rocket_drv.h"
+#include "rocket_job.h"
+#include "rocket_registers.h"
+
+#define JOB_TIMEOUT_MS 500
+
+static struct rocket_job *
+to_rocket_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct rocket_job, base);
+}
+
+static const char *rocket_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "rocket";
+}
+
+static const char *rocket_fence_get_timeline_name(struct dma_fence *fence)
+{
+ return "rockchip-npu";
+}
+
+static const struct dma_fence_ops rocket_fence_ops = {
+ .get_driver_name = rocket_fence_get_driver_name,
+ .get_timeline_name = rocket_fence_get_timeline_name,
+};
+
+static struct dma_fence *rocket_fence_create(struct rocket_core *core)
+{
+ struct dma_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock,
+ core->fence_context, ++core->emit_seqno);
+
+ return fence;
+}
+
+static int
+rocket_copy_tasks(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_rocket_job *job,
+ struct rocket_job *rjob)
+{
+ int ret = 0;
+
+ if (job->task_struct_size < sizeof(struct drm_rocket_task))
+ return -EINVAL;
+
+ rjob->task_count = job->task_count;
+
+ if (!rjob->task_count)
+ return 0;
+
+ rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL);
+ if (!rjob->tasks) {
+ drm_dbg(dev, "Failed to allocate task array\n");
+ return -ENOMEM;
+ }
+
+ for (int i = 0; i < rjob->task_count; i++) {
+ struct drm_rocket_task task = {0};
+
+ if (copy_from_user(&task,
+ u64_to_user_ptr(job->tasks) + i * job->task_struct_size,
+ sizeof(task))) {
+ drm_dbg(dev, "Failed to copy incoming tasks\n");
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ if (task.regcmd_count == 0) {
+ drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ rjob->tasks[i].regcmd = task.regcmd;
+ rjob->tasks[i].regcmd_count = task.regcmd_count;
+ }
+
+ return 0;
+
+fail:
+ kvfree(rjob->tasks);
+ return ret;
+}
+
+static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job)
+{
+ struct rocket_task *task;
+ unsigned int extra_bit;
+
+ /* Don't queue the job if a reset is in progress */
+ if (atomic_read(&core->reset.pending))
+ return;
+
+ /* GO ! */
+
+ task = &job->tasks[job->next_task_idx];
+ job->next_task_idx++;
+
+ rocket_pc_writel(core, BASE_ADDRESS, 0x1);
+
+ /* From rknpu, in the TRM this bit is marked as reserved */
+ extra_bit = 0x10000000 * core->index;
+ rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) |
+ CNA_S_POINTER_EXECUTER_PP_EN(1) |
+ CNA_S_POINTER_POINTER_PP_MODE(1) |
+ extra_bit);
+
+ rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) |
+ CORE_S_POINTER_EXECUTER_PP_EN(1) |
+ CORE_S_POINTER_POINTER_PP_MODE(1) |
+ extra_bit);
+
+ rocket_pc_writel(core, BASE_ADDRESS, task->regcmd);
+ rocket_pc_writel(core, REGISTER_AMOUNTS,
+ PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1));
+
+ rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1);
+ rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1);
+
+ rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) |
+ PC_TASK_CON_TASK_COUNT_CLEAR(1) |
+ PC_TASK_CON_TASK_NUMBER(1) |
+ PC_TASK_CON_TASK_PP_EN(1));
+
+ rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0));
+
+ rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1));
+
+ dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index);
+}
+
+static int rocket_acquire_object_fences(struct drm_gem_object **bos,
+ int bo_count,
+ struct drm_sched_job *job,
+ bool is_write)
+{
+ int i, ret;
+
+ for (i = 0; i < bo_count; i++) {
+ ret = dma_resv_reserve_fences(bos[i]->resv, 1);
+ if (ret)
+ return ret;
+
+ ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
+ is_write);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void rocket_attach_object_fences(struct drm_gem_object **bos,
+ int bo_count,
+ struct dma_fence *fence)
+{
+ int i;
+
+ for (i = 0; i < bo_count; i++)
+ dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
+}
+
+static int rocket_job_push(struct rocket_job *job)
+{
+ struct rocket_device *rdev = job->rdev;
+ struct drm_gem_object **bos;
+ struct ww_acquire_ctx acquire_ctx;
+ int ret = 0;
+
+ bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *),
+ GFP_KERNEL);
+ memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *));
+ memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *));
+
+ ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
+ if (ret)
+ goto err;
+
+ scoped_guard(mutex, &rdev->sched_lock) {
+ drm_sched_job_arm(&job->base);
+
+ job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+ ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false);
+ if (ret)
+ goto err_unlock;
+
+ ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true);
+ if (ret)
+ goto err_unlock;
+
+ kref_get(&job->refcount); /* put by scheduler job completion */
+
+ drm_sched_entity_push_job(&job->base);
+ }
+
+ rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence);
+
+err_unlock:
+ drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
+err:
+ kvfree(bos);
+
+ return ret;
+}
+
+static void rocket_job_cleanup(struct kref *ref)
+{
+ struct rocket_job *job = container_of(ref, struct rocket_job,
+ refcount);
+ unsigned int i;
+
+ rocket_iommu_domain_put(job->domain);
+
+ dma_fence_put(job->done_fence);
+ dma_fence_put(job->inference_done_fence);
+
+ if (job->in_bos) {
+ for (i = 0; i < job->in_bo_count; i++)
+ drm_gem_object_put(job->in_bos[i]);
+
+ kvfree(job->in_bos);
+ }
+
+ if (job->out_bos) {
+ for (i = 0; i < job->out_bo_count; i++)
+ drm_gem_object_put(job->out_bos[i]);
+
+ kvfree(job->out_bos);
+ }
+
+ kvfree(job->tasks);
+
+ kfree(job);
+}
+
+static void rocket_job_put(struct rocket_job *job)
+{
+ kref_put(&job->refcount, rocket_job_cleanup);
+}
+
+static void rocket_job_free(struct drm_sched_job *sched_job)
+{
+ struct rocket_job *job = to_rocket_job(sched_job);
+
+ drm_sched_job_cleanup(sched_job);
+
+ rocket_job_put(job);
+}
+
+static struct rocket_core *sched_to_core(struct rocket_device *rdev,
+ struct drm_gpu_scheduler *sched)
+{
+ unsigned int core;
+
+ for (core = 0; core < rdev->num_cores; core++) {
+ if (&rdev->cores[core].sched == sched)
+ return &rdev->cores[core];
+ }
+
+ return NULL;
+}
+
+static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
+{
+ struct rocket_job *job = to_rocket_job(sched_job);
+ struct rocket_device *rdev = job->rdev;
+ struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
+ struct dma_fence *fence = NULL;
+ int ret;
+
+ if (unlikely(job->base.s_fence->finished.error))
+ return NULL;
+
+ /*
+ * Nothing to execute: can happen if the job has finished while
+ * we were resetting the NPU.
+ */
+ if (job->next_task_idx == job->task_count)
+ return NULL;
+
+ fence = rocket_fence_create(core);
+ if (IS_ERR(fence))
+ return fence;
+
+ if (job->done_fence)
+ dma_fence_put(job->done_fence);
+ job->done_fence = dma_fence_get(fence);
+
+ ret = pm_runtime_get_sync(core->dev);
+ if (ret < 0)
+ return fence;
+
+ ret = iommu_attach_group(job->domain->domain, core->iommu_group);
+ if (ret < 0)
+ return fence;
+
+ scoped_guard(mutex, &core->job_lock) {
+ core->in_flight_job = job;
+ rocket_job_hw_submit(core, job);
+ }
+
+ return fence;
+}
+
+static void rocket_job_handle_irq(struct rocket_core *core)
+{
+ pm_runtime_mark_last_busy(core->dev);
+
+ rocket_pc_writel(core, OPERATION_ENABLE, 0x0);
+ rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff);
+
+ scoped_guard(mutex, &core->job_lock)
+ if (core->in_flight_job) {
+ if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) {
+ rocket_job_hw_submit(core, core->in_flight_job);
+ return;
+ }
+
+ iommu_detach_group(NULL, iommu_group_get(core->dev));
+ dma_fence_signal(core->in_flight_job->done_fence);
+ pm_runtime_put_autosuspend(core->dev);
+ core->in_flight_job = NULL;
+ }
+}
+
+static void
+rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
+{
+ if (!atomic_read(&core->reset.pending))
+ return;
+
+ drm_sched_stop(&core->sched, bad);
+
+ /*
+ * Remaining interrupts have been handled, but we might still have
+ * stuck jobs. Let's make sure the PM counters stay balanced by
+ * manually calling pm_runtime_put_noidle().
+ */
+ scoped_guard(mutex, &core->job_lock) {
+ if (core->in_flight_job)
+ pm_runtime_put_noidle(core->dev);
+
+ iommu_detach_group(NULL, core->iommu_group);
+
+ core->in_flight_job = NULL;
+ }
+
+ /* Proceed with reset now. */
+ rocket_core_reset(core);
+
+ /* NPU has been reset, we can clear the reset pending bit. */
+ atomic_set(&core->reset.pending, 0);
+
+ /* Restart the scheduler */
+ drm_sched_start(&core->sched, 0);
+}
+
+static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct rocket_job *job = to_rocket_job(sched_job);
+ struct rocket_device *rdev = job->rdev;
+ struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
+
+ dev_err(core->dev, "NPU job timed out");
+
+ atomic_set(&core->reset.pending, 1);
+ rocket_reset(core, sched_job);
+
+ return DRM_GPU_SCHED_STAT_RESET;
+}
+
+static void rocket_reset_work(struct work_struct *work)
+{
+ struct rocket_core *core;
+
+ core = container_of(work, struct rocket_core, reset.work);
+ rocket_reset(core, NULL);
+}
+
+static const struct drm_sched_backend_ops rocket_sched_ops = {
+ .run_job = rocket_job_run,
+ .timedout_job = rocket_job_timedout,
+ .free_job = rocket_job_free
+};
+
+static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data)
+{
+ struct rocket_core *core = data;
+
+ rocket_job_handle_irq(core);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rocket_job_irq_handler(int irq, void *data)
+{
+ struct rocket_core *core = data;
+ u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
+
+ WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
+ WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR);
+
+ if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
+ raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
+ return IRQ_NONE;
+
+ rocket_pc_writel(core, INTERRUPT_MASK, 0x0);
+
+ return IRQ_WAKE_THREAD;
+}
+
+int rocket_job_init(struct rocket_core *core)
+{
+ struct drm_sched_init_args args = {
+ .ops = &rocket_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .credit_limit = 1,
+ .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .name = dev_name(core->dev),
+ .dev = core->dev,
+ };
+ int ret;
+
+ INIT_WORK(&core->reset.work, rocket_reset_work);
+ spin_lock_init(&core->fence_lock);
+ mutex_init(&core->job_lock);
+
+ core->irq = platform_get_irq(to_platform_device(core->dev), 0);
+ if (core->irq < 0)
+ return core->irq;
+
+ ret = devm_request_threaded_irq(core->dev, core->irq,
+ rocket_job_irq_handler,
+ rocket_job_irq_handler_thread,
+ IRQF_SHARED, dev_name(core->dev),
+ core);
+ if (ret) {
+ dev_err(core->dev, "failed to request job irq");
+ return ret;
+ }
+
+ core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index);
+ if (!core->reset.wq)
+ return -ENOMEM;
+
+ core->fence_context = dma_fence_context_alloc(1);
+
+ args.timeout_wq = core->reset.wq;
+ ret = drm_sched_init(&core->sched, &args);
+ if (ret) {
+ dev_err(core->dev, "Failed to create scheduler: %d.", ret);
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ drm_sched_fini(&core->sched);
+
+ destroy_workqueue(core->reset.wq);
+ return ret;
+}
+
+void rocket_job_fini(struct rocket_core *core)
+{
+ drm_sched_fini(&core->sched);
+
+ cancel_work_sync(&core->reset.work);
+ destroy_workqueue(core->reset.wq);
+}
+
+int rocket_job_open(struct rocket_file_priv *rocket_priv)
+{
+ struct rocket_device *rdev = rocket_priv->rdev;
+ struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores,
+ sizeof(*scheds),
+ GFP_KERNEL);
+ unsigned int core;
+ int ret;
+
+ for (core = 0; core < rdev->num_cores; core++)
+ scheds[core] = &rdev->cores[core].sched;
+
+ ret = drm_sched_entity_init(&rocket_priv->sched_entity,
+ DRM_SCHED_PRIORITY_NORMAL,
+ scheds,
+ rdev->num_cores, NULL);
+ if (WARN_ON(ret))
+ return ret;
+
+ return 0;
+}
+
+void rocket_job_close(struct rocket_file_priv *rocket_priv)
+{
+ struct drm_sched_entity *entity = &rocket_priv->sched_entity;
+
+ kfree(entity->sched_list);
+ drm_sched_entity_destroy(entity);
+}
+
+int rocket_job_is_idle(struct rocket_core *core)
+{
+ /* If there are any jobs in this HW queue, we're not idle */
+ if (atomic_read(&core->sched.credit_count))
+ return false;
+
+ return true;
+}
+
+static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
+ struct drm_rocket_job *job)
+{
+ struct rocket_device *rdev = to_rocket_device(dev);
+ struct rocket_file_priv *file_priv = file->driver_priv;
+ struct rocket_job *rjob = NULL;
+ int ret = 0;
+
+ if (job->task_count == 0)
+ return -EINVAL;
+
+ rjob = kzalloc(sizeof(*rjob), GFP_KERNEL);
+ if (!rjob)
+ return -ENOMEM;
+
+ kref_init(&rjob->refcount);
+
+ rjob->rdev = rdev;
+
+ ret = drm_sched_job_init(&rjob->base,
+ &file_priv->sched_entity,
+ 1, NULL, file->client_id);
+ if (ret)
+ goto out_put_job;
+
+ ret = rocket_copy_tasks(dev, file, job, rjob);
+ if (ret)
+ goto out_cleanup_job;
+
+ ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles),
+ job->in_bo_handle_count, &rjob->in_bos);
+ if (ret)
+ goto out_cleanup_job;
+
+ rjob->in_bo_count = job->in_bo_handle_count;
+
+ ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles),
+ job->out_bo_handle_count, &rjob->out_bos);
+ if (ret)
+ goto out_cleanup_job;
+
+ rjob->out_bo_count = job->out_bo_handle_count;
+
+ rjob->domain = rocket_iommu_domain_get(file_priv);
+
+ ret = rocket_job_push(rjob);
+ if (ret)
+ goto out_cleanup_job;
+
+out_cleanup_job:
+ if (ret)
+ drm_sched_job_cleanup(&rjob->base);
+out_put_job:
+ rocket_job_put(rjob);
+
+ return ret;
+}
+
+int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_rocket_submit *args = data;
+ struct drm_rocket_job *jobs;
+ int ret = 0;
+ unsigned int i = 0;
+
+ if (args->job_count == 0)
+ return 0;
+
+ if (args->job_struct_size < sizeof(struct drm_rocket_job)) {
+ drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n");
+ return -EINVAL;
+ }
+
+ if (args->reserved != 0) {
+ drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n");
+ return -EINVAL;
+ }
+
+ jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
+ if (!jobs) {
+ drm_dbg(dev, "Failed to allocate incoming job array\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < args->job_count; i++) {
+ if (copy_from_user(&jobs[i],
+ u64_to_user_ptr(args->jobs) + i * args->job_struct_size,
+ sizeof(*jobs))) {
+ ret = -EFAULT;
+ drm_dbg(dev, "Failed to copy incoming job array\n");
+ goto exit;
+ }
+ }
+
+
+ for (i = 0; i < args->job_count; i++)
+ rocket_ioctl_submit_job(dev, file, &jobs[i]);
+
+exit:
+ kvfree(jobs);
+
+ return ret;
+}
diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h
new file mode 100644
index 000000000000..4ae00feec3b9
--- /dev/null
+++ b/drivers/accel/rocket/rocket_job.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
+
+#ifndef __ROCKET_JOB_H__
+#define __ROCKET_JOB_H__
+
+#include <drm/drm_drv.h>
+#include <drm/gpu_scheduler.h>
+
+#include "rocket_core.h"
+#include "rocket_drv.h"
+
+struct rocket_task {
+ u64 regcmd;
+ u32 regcmd_count;
+};
+
+struct rocket_job {
+ struct drm_sched_job base;
+
+ struct rocket_device *rdev;
+
+ struct drm_gem_object **in_bos;
+ struct drm_gem_object **out_bos;
+
+ u32 in_bo_count;
+ u32 out_bo_count;
+
+ struct rocket_task *tasks;
+ u32 task_count;
+ u32 next_task_idx;
+
+ /* Fence to be signaled by drm-sched once its done with the job */
+ struct dma_fence *inference_done_fence;
+
+ /* Fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ struct rocket_iommu_domain *domain;
+
+ struct kref refcount;
+};
+
+int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file);
+
+int rocket_job_init(struct rocket_core *core);
+void rocket_job_fini(struct rocket_core *core);
+int rocket_job_open(struct rocket_file_priv *rocket_priv);
+void rocket_job_close(struct rocket_file_priv *rocket_priv);
+int rocket_job_is_idle(struct rocket_core *core);
+
+#endif
diff --git a/drivers/accel/rocket/rocket_registers.h b/drivers/accel/rocket/rocket_registers.h
new file mode 100644
index 000000000000..9aef614c3470
--- /dev/null
+++ b/drivers/accel/rocket/rocket_registers.h
@@ -0,0 +1,4404 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+
+#ifndef __ROCKET_REGISTERS_XML__
+#define __ROCKET_REGISTERS_XML__
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng gen_header.py tool in this git repository:
+http://gitlab.freedesktop.org/mesa/mesa/
+git clone https://gitlab.freedesktop.org/mesa/mesa.git
+
+The rules-ng-ng source files this header was generated from are:
+
+- /home/tomeu/src/mesa/src/gallium/drivers/rocket/registers.xml ( 60076 bytes, from Wed Jun 12 10:02:25 2024)
+
+Copyright (C) 2024-2025 by the following authors:
+- Tomeu Vizoso <tomeu@tomeuvizoso.net>
+*/
+
+#define REG_PC_VERSION 0x00000000
+#define PC_VERSION_VERSION__MASK 0xffffffff
+#define PC_VERSION_VERSION__SHIFT 0
+static inline uint32_t PC_VERSION_VERSION(uint32_t val)
+{
+ return ((val) << PC_VERSION_VERSION__SHIFT) & PC_VERSION_VERSION__MASK;
+}
+
+#define REG_PC_VERSION_NUM 0x00000004
+#define PC_VERSION_NUM_VERSION_NUM__MASK 0xffffffff
+#define PC_VERSION_NUM_VERSION_NUM__SHIFT 0
+static inline uint32_t PC_VERSION_NUM_VERSION_NUM(uint32_t val)
+{
+ return ((val) << PC_VERSION_NUM_VERSION_NUM__SHIFT) & PC_VERSION_NUM_VERSION_NUM__MASK;
+}
+
+#define REG_PC_OPERATION_ENABLE 0x00000008
+#define PC_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define PC_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t PC_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_OPERATION_ENABLE_RESERVED_0__SHIFT) & PC_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define PC_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define PC_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t PC_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << PC_OPERATION_ENABLE_OP_EN__SHIFT) & PC_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_PC_BASE_ADDRESS 0x00000010
+#define PC_BASE_ADDRESS_PC_SOURCE_ADDR__MASK 0xfffffff0
+#define PC_BASE_ADDRESS_PC_SOURCE_ADDR__SHIFT 4
+static inline uint32_t PC_BASE_ADDRESS_PC_SOURCE_ADDR(uint32_t val)
+{
+ return ((val) << PC_BASE_ADDRESS_PC_SOURCE_ADDR__SHIFT) & PC_BASE_ADDRESS_PC_SOURCE_ADDR__MASK;
+}
+#define PC_BASE_ADDRESS_RESERVED_0__MASK 0x0000000e
+#define PC_BASE_ADDRESS_RESERVED_0__SHIFT 1
+static inline uint32_t PC_BASE_ADDRESS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_BASE_ADDRESS_RESERVED_0__SHIFT) & PC_BASE_ADDRESS_RESERVED_0__MASK;
+}
+#define PC_BASE_ADDRESS_PC_SEL__MASK 0x00000001
+#define PC_BASE_ADDRESS_PC_SEL__SHIFT 0
+static inline uint32_t PC_BASE_ADDRESS_PC_SEL(uint32_t val)
+{
+ return ((val) << PC_BASE_ADDRESS_PC_SEL__SHIFT) & PC_BASE_ADDRESS_PC_SEL__MASK;
+}
+
+#define REG_PC_REGISTER_AMOUNTS 0x00000014
+#define PC_REGISTER_AMOUNTS_RESERVED_0__MASK 0xffff0000
+#define PC_REGISTER_AMOUNTS_RESERVED_0__SHIFT 16
+static inline uint32_t PC_REGISTER_AMOUNTS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_REGISTER_AMOUNTS_RESERVED_0__SHIFT) & PC_REGISTER_AMOUNTS_RESERVED_0__MASK;
+}
+#define PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__MASK 0x0000ffff
+#define PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__SHIFT 0
+static inline uint32_t PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT(uint32_t val)
+{
+ return ((val) << PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__SHIFT) & PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__MASK;
+}
+
+#define REG_PC_INTERRUPT_MASK 0x00000020
+#define PC_INTERRUPT_MASK_RESERVED_0__MASK 0xffffc000
+#define PC_INTERRUPT_MASK_RESERVED_0__SHIFT 14
+static inline uint32_t PC_INTERRUPT_MASK_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_INTERRUPT_MASK_RESERVED_0__SHIFT) & PC_INTERRUPT_MASK_RESERVED_0__MASK;
+}
+#define PC_INTERRUPT_MASK_DMA_WRITE_ERROR 0x00002000
+#define PC_INTERRUPT_MASK_DMA_READ_ERROR 0x00001000
+#define PC_INTERRUPT_MASK_PPU_1 0x00000800
+#define PC_INTERRUPT_MASK_PPU_0 0x00000400
+#define PC_INTERRUPT_MASK_DPU_1 0x00000200
+#define PC_INTERRUPT_MASK_DPU_0 0x00000100
+#define PC_INTERRUPT_MASK_CORE_1 0x00000080
+#define PC_INTERRUPT_MASK_CORE_0 0x00000040
+#define PC_INTERRUPT_MASK_CNA_CSC_1 0x00000020
+#define PC_INTERRUPT_MASK_CNA_CSC_0 0x00000010
+#define PC_INTERRUPT_MASK_CNA_WEIGHT_1 0x00000008
+#define PC_INTERRUPT_MASK_CNA_WEIGHT_0 0x00000004
+#define PC_INTERRUPT_MASK_CNA_FEATURE_1 0x00000002
+#define PC_INTERRUPT_MASK_CNA_FEATURE_0 0x00000001
+
+#define REG_PC_INTERRUPT_CLEAR 0x00000024
+#define PC_INTERRUPT_CLEAR_RESERVED_0__MASK 0xffffc000
+#define PC_INTERRUPT_CLEAR_RESERVED_0__SHIFT 14
+static inline uint32_t PC_INTERRUPT_CLEAR_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_INTERRUPT_CLEAR_RESERVED_0__SHIFT) & PC_INTERRUPT_CLEAR_RESERVED_0__MASK;
+}
+#define PC_INTERRUPT_CLEAR_DMA_WRITE_ERROR 0x00002000
+#define PC_INTERRUPT_CLEAR_DMA_READ_ERROR 0x00001000
+#define PC_INTERRUPT_CLEAR_PPU_1 0x00000800
+#define PC_INTERRUPT_CLEAR_PPU_0 0x00000400
+#define PC_INTERRUPT_CLEAR_DPU_1 0x00000200
+#define PC_INTERRUPT_CLEAR_DPU_0 0x00000100
+#define PC_INTERRUPT_CLEAR_CORE_1 0x00000080
+#define PC_INTERRUPT_CLEAR_CORE_0 0x00000040
+#define PC_INTERRUPT_CLEAR_CNA_CSC_1 0x00000020
+#define PC_INTERRUPT_CLEAR_CNA_CSC_0 0x00000010
+#define PC_INTERRUPT_CLEAR_CNA_WEIGHT_1 0x00000008
+#define PC_INTERRUPT_CLEAR_CNA_WEIGHT_0 0x00000004
+#define PC_INTERRUPT_CLEAR_CNA_FEATURE_1 0x00000002
+#define PC_INTERRUPT_CLEAR_CNA_FEATURE_0 0x00000001
+
+#define REG_PC_INTERRUPT_STATUS 0x00000028
+#define PC_INTERRUPT_STATUS_RESERVED_0__MASK 0xffffc000
+#define PC_INTERRUPT_STATUS_RESERVED_0__SHIFT 14
+static inline uint32_t PC_INTERRUPT_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_INTERRUPT_STATUS_RESERVED_0__SHIFT) & PC_INTERRUPT_STATUS_RESERVED_0__MASK;
+}
+#define PC_INTERRUPT_STATUS_DMA_WRITE_ERROR 0x00002000
+#define PC_INTERRUPT_STATUS_DMA_READ_ERROR 0x00001000
+#define PC_INTERRUPT_STATUS_PPU_1 0x00000800
+#define PC_INTERRUPT_STATUS_PPU_0 0x00000400
+#define PC_INTERRUPT_STATUS_DPU_1 0x00000200
+#define PC_INTERRUPT_STATUS_DPU_0 0x00000100
+#define PC_INTERRUPT_STATUS_CORE_1 0x00000080
+#define PC_INTERRUPT_STATUS_CORE_0 0x00000040
+#define PC_INTERRUPT_STATUS_CNA_CSC_1 0x00000020
+#define PC_INTERRUPT_STATUS_CNA_CSC_0 0x00000010
+#define PC_INTERRUPT_STATUS_CNA_WEIGHT_1 0x00000008
+#define PC_INTERRUPT_STATUS_CNA_WEIGHT_0 0x00000004
+#define PC_INTERRUPT_STATUS_CNA_FEATURE_1 0x00000002
+#define PC_INTERRUPT_STATUS_CNA_FEATURE_0 0x00000001
+
+#define REG_PC_INTERRUPT_RAW_STATUS 0x0000002c
+#define PC_INTERRUPT_RAW_STATUS_RESERVED_0__MASK 0xffffc000
+#define PC_INTERRUPT_RAW_STATUS_RESERVED_0__SHIFT 14
+static inline uint32_t PC_INTERRUPT_RAW_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_INTERRUPT_RAW_STATUS_RESERVED_0__SHIFT) & PC_INTERRUPT_RAW_STATUS_RESERVED_0__MASK;
+}
+#define PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR 0x00002000
+#define PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR 0x00001000
+#define PC_INTERRUPT_RAW_STATUS_PPU_1 0x00000800
+#define PC_INTERRUPT_RAW_STATUS_PPU_0 0x00000400
+#define PC_INTERRUPT_RAW_STATUS_DPU_1 0x00000200
+#define PC_INTERRUPT_RAW_STATUS_DPU_0 0x00000100
+#define PC_INTERRUPT_RAW_STATUS_CORE_1 0x00000080
+#define PC_INTERRUPT_RAW_STATUS_CORE_0 0x00000040
+#define PC_INTERRUPT_RAW_STATUS_CNA_CSC_1 0x00000020
+#define PC_INTERRUPT_RAW_STATUS_CNA_CSC_0 0x00000010
+#define PC_INTERRUPT_RAW_STATUS_CNA_WEIGHT_1 0x00000008
+#define PC_INTERRUPT_RAW_STATUS_CNA_WEIGHT_0 0x00000004
+#define PC_INTERRUPT_RAW_STATUS_CNA_FEATURE_1 0x00000002
+#define PC_INTERRUPT_RAW_STATUS_CNA_FEATURE_0 0x00000001
+
+#define REG_PC_TASK_CON 0x00000030
+#define PC_TASK_CON_RESERVED_0__MASK 0xffffc000
+#define PC_TASK_CON_RESERVED_0__SHIFT 14
+static inline uint32_t PC_TASK_CON_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_TASK_CON_RESERVED_0__SHIFT) & PC_TASK_CON_RESERVED_0__MASK;
+}
+#define PC_TASK_CON_TASK_COUNT_CLEAR__MASK 0x00002000
+#define PC_TASK_CON_TASK_COUNT_CLEAR__SHIFT 13
+static inline uint32_t PC_TASK_CON_TASK_COUNT_CLEAR(uint32_t val)
+{
+ return ((val) << PC_TASK_CON_TASK_COUNT_CLEAR__SHIFT) & PC_TASK_CON_TASK_COUNT_CLEAR__MASK;
+}
+#define PC_TASK_CON_TASK_PP_EN__MASK 0x00001000
+#define PC_TASK_CON_TASK_PP_EN__SHIFT 12
+static inline uint32_t PC_TASK_CON_TASK_PP_EN(uint32_t val)
+{
+ return ((val) << PC_TASK_CON_TASK_PP_EN__SHIFT) & PC_TASK_CON_TASK_PP_EN__MASK;
+}
+#define PC_TASK_CON_TASK_NUMBER__MASK 0x00000fff
+#define PC_TASK_CON_TASK_NUMBER__SHIFT 0
+static inline uint32_t PC_TASK_CON_TASK_NUMBER(uint32_t val)
+{
+ return ((val) << PC_TASK_CON_TASK_NUMBER__SHIFT) & PC_TASK_CON_TASK_NUMBER__MASK;
+}
+
+#define REG_PC_TASK_DMA_BASE_ADDR 0x00000034
+#define PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__MASK 0xfffffff0
+#define PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__SHIFT 4
+static inline uint32_t PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(uint32_t val)
+{
+ return ((val) << PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__SHIFT) & PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__MASK;
+}
+#define PC_TASK_DMA_BASE_ADDR_RESERVED_0__MASK 0x0000000f
+#define PC_TASK_DMA_BASE_ADDR_RESERVED_0__SHIFT 0
+static inline uint32_t PC_TASK_DMA_BASE_ADDR_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_TASK_DMA_BASE_ADDR_RESERVED_0__SHIFT) & PC_TASK_DMA_BASE_ADDR_RESERVED_0__MASK;
+}
+
+#define REG_PC_TASK_STATUS 0x0000003c
+#define PC_TASK_STATUS_RESERVED_0__MASK 0xf0000000
+#define PC_TASK_STATUS_RESERVED_0__SHIFT 28
+static inline uint32_t PC_TASK_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PC_TASK_STATUS_RESERVED_0__SHIFT) & PC_TASK_STATUS_RESERVED_0__MASK;
+}
+#define PC_TASK_STATUS_TASK_STATUS__MASK 0x0fffffff
+#define PC_TASK_STATUS_TASK_STATUS__SHIFT 0
+static inline uint32_t PC_TASK_STATUS_TASK_STATUS(uint32_t val)
+{
+ return ((val) << PC_TASK_STATUS_TASK_STATUS__SHIFT) & PC_TASK_STATUS_TASK_STATUS__MASK;
+}
+
+#define REG_CNA_S_STATUS 0x00001000
+#define CNA_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define CNA_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t CNA_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_S_STATUS_RESERVED_0__SHIFT) & CNA_S_STATUS_RESERVED_0__MASK;
+}
+#define CNA_S_STATUS_STATUS_1__MASK 0x00030000
+#define CNA_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t CNA_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << CNA_S_STATUS_STATUS_1__SHIFT) & CNA_S_STATUS_STATUS_1__MASK;
+}
+#define CNA_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define CNA_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t CNA_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_S_STATUS_RESERVED_1__SHIFT) & CNA_S_STATUS_RESERVED_1__MASK;
+}
+#define CNA_S_STATUS_STATUS_0__MASK 0x00000003
+#define CNA_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t CNA_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << CNA_S_STATUS_STATUS_0__SHIFT) & CNA_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_CNA_S_POINTER 0x00001004
+#define CNA_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define CNA_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t CNA_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_RESERVED_0__SHIFT) & CNA_S_POINTER_RESERVED_0__MASK;
+}
+#define CNA_S_POINTER_EXECUTER__MASK 0x00010000
+#define CNA_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t CNA_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_EXECUTER__SHIFT) & CNA_S_POINTER_EXECUTER__MASK;
+}
+#define CNA_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define CNA_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t CNA_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_RESERVED_1__SHIFT) & CNA_S_POINTER_RESERVED_1__MASK;
+}
+#define CNA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define CNA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t CNA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & CNA_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define CNA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define CNA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t CNA_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & CNA_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define CNA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define CNA_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t CNA_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_POINTER_PP_MODE__SHIFT) & CNA_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define CNA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define CNA_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t CNA_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_EXECUTER_PP_EN__SHIFT) & CNA_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define CNA_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define CNA_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t CNA_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_POINTER_PP_EN__SHIFT) & CNA_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define CNA_S_POINTER_POINTER__MASK 0x00000001
+#define CNA_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t CNA_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << CNA_S_POINTER_POINTER__SHIFT) & CNA_S_POINTER_POINTER__MASK;
+}
+
+#define REG_CNA_OPERATION_ENABLE 0x00001008
+#define CNA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define CNA_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t CNA_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_OPERATION_ENABLE_RESERVED_0__SHIFT) & CNA_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define CNA_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define CNA_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t CNA_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << CNA_OPERATION_ENABLE_OP_EN__SHIFT) & CNA_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_CNA_CONV_CON1 0x0000100c
+#define CNA_CONV_CON1_RESERVED_0__MASK 0x80000000
+#define CNA_CONV_CON1_RESERVED_0__SHIFT 31
+static inline uint32_t CNA_CONV_CON1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_RESERVED_0__SHIFT) & CNA_CONV_CON1_RESERVED_0__MASK;
+}
+#define CNA_CONV_CON1_NONALIGN_DMA__MASK 0x40000000
+#define CNA_CONV_CON1_NONALIGN_DMA__SHIFT 30
+static inline uint32_t CNA_CONV_CON1_NONALIGN_DMA(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_NONALIGN_DMA__SHIFT) & CNA_CONV_CON1_NONALIGN_DMA__MASK;
+}
+#define CNA_CONV_CON1_GROUP_LINE_OFF__MASK 0x20000000
+#define CNA_CONV_CON1_GROUP_LINE_OFF__SHIFT 29
+static inline uint32_t CNA_CONV_CON1_GROUP_LINE_OFF(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_GROUP_LINE_OFF__SHIFT) & CNA_CONV_CON1_GROUP_LINE_OFF__MASK;
+}
+#define CNA_CONV_CON1_RESERVED_1__MASK 0x1ffe0000
+#define CNA_CONV_CON1_RESERVED_1__SHIFT 17
+static inline uint32_t CNA_CONV_CON1_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_RESERVED_1__SHIFT) & CNA_CONV_CON1_RESERVED_1__MASK;
+}
+#define CNA_CONV_CON1_DECONV__MASK 0x00010000
+#define CNA_CONV_CON1_DECONV__SHIFT 16
+static inline uint32_t CNA_CONV_CON1_DECONV(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_DECONV__SHIFT) & CNA_CONV_CON1_DECONV__MASK;
+}
+#define CNA_CONV_CON1_ARGB_IN__MASK 0x0000f000
+#define CNA_CONV_CON1_ARGB_IN__SHIFT 12
+static inline uint32_t CNA_CONV_CON1_ARGB_IN(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_ARGB_IN__SHIFT) & CNA_CONV_CON1_ARGB_IN__MASK;
+}
+#define CNA_CONV_CON1_RESERVED_2__MASK 0x00000c00
+#define CNA_CONV_CON1_RESERVED_2__SHIFT 10
+static inline uint32_t CNA_CONV_CON1_RESERVED_2(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_RESERVED_2__SHIFT) & CNA_CONV_CON1_RESERVED_2__MASK;
+}
+#define CNA_CONV_CON1_PROC_PRECISION__MASK 0x00000380
+#define CNA_CONV_CON1_PROC_PRECISION__SHIFT 7
+static inline uint32_t CNA_CONV_CON1_PROC_PRECISION(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_PROC_PRECISION__SHIFT) & CNA_CONV_CON1_PROC_PRECISION__MASK;
+}
+#define CNA_CONV_CON1_IN_PRECISION__MASK 0x00000070
+#define CNA_CONV_CON1_IN_PRECISION__SHIFT 4
+static inline uint32_t CNA_CONV_CON1_IN_PRECISION(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_IN_PRECISION__SHIFT) & CNA_CONV_CON1_IN_PRECISION__MASK;
+}
+#define CNA_CONV_CON1_CONV_MODE__MASK 0x0000000f
+#define CNA_CONV_CON1_CONV_MODE__SHIFT 0
+static inline uint32_t CNA_CONV_CON1_CONV_MODE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON1_CONV_MODE__SHIFT) & CNA_CONV_CON1_CONV_MODE__MASK;
+}
+
+#define REG_CNA_CONV_CON2 0x00001010
+#define CNA_CONV_CON2_RESERVED_0__MASK 0xff000000
+#define CNA_CONV_CON2_RESERVED_0__SHIFT 24
+static inline uint32_t CNA_CONV_CON2_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_RESERVED_0__SHIFT) & CNA_CONV_CON2_RESERVED_0__MASK;
+}
+#define CNA_CONV_CON2_KERNEL_GROUP__MASK 0x00ff0000
+#define CNA_CONV_CON2_KERNEL_GROUP__SHIFT 16
+static inline uint32_t CNA_CONV_CON2_KERNEL_GROUP(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_KERNEL_GROUP__SHIFT) & CNA_CONV_CON2_KERNEL_GROUP__MASK;
+}
+#define CNA_CONV_CON2_RESERVED_1__MASK 0x0000c000
+#define CNA_CONV_CON2_RESERVED_1__SHIFT 14
+static inline uint32_t CNA_CONV_CON2_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_RESERVED_1__SHIFT) & CNA_CONV_CON2_RESERVED_1__MASK;
+}
+#define CNA_CONV_CON2_FEATURE_GRAINS__MASK 0x00003ff0
+#define CNA_CONV_CON2_FEATURE_GRAINS__SHIFT 4
+static inline uint32_t CNA_CONV_CON2_FEATURE_GRAINS(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_FEATURE_GRAINS__SHIFT) & CNA_CONV_CON2_FEATURE_GRAINS__MASK;
+}
+#define CNA_CONV_CON2_RESERVED_2__MASK 0x00000008
+#define CNA_CONV_CON2_RESERVED_2__SHIFT 3
+static inline uint32_t CNA_CONV_CON2_RESERVED_2(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_RESERVED_2__SHIFT) & CNA_CONV_CON2_RESERVED_2__MASK;
+}
+#define CNA_CONV_CON2_CSC_WO_EN__MASK 0x00000004
+#define CNA_CONV_CON2_CSC_WO_EN__SHIFT 2
+static inline uint32_t CNA_CONV_CON2_CSC_WO_EN(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_CSC_WO_EN__SHIFT) & CNA_CONV_CON2_CSC_WO_EN__MASK;
+}
+#define CNA_CONV_CON2_CSC_DO_EN__MASK 0x00000002
+#define CNA_CONV_CON2_CSC_DO_EN__SHIFT 1
+static inline uint32_t CNA_CONV_CON2_CSC_DO_EN(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_CSC_DO_EN__SHIFT) & CNA_CONV_CON2_CSC_DO_EN__MASK;
+}
+#define CNA_CONV_CON2_CMD_FIFO_SRST__MASK 0x00000001
+#define CNA_CONV_CON2_CMD_FIFO_SRST__SHIFT 0
+static inline uint32_t CNA_CONV_CON2_CMD_FIFO_SRST(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON2_CMD_FIFO_SRST__SHIFT) & CNA_CONV_CON2_CMD_FIFO_SRST__MASK;
+}
+
+#define REG_CNA_CONV_CON3 0x00001014
+#define CNA_CONV_CON3_RESERVED_0__MASK 0x80000000
+#define CNA_CONV_CON3_RESERVED_0__SHIFT 31
+static inline uint32_t CNA_CONV_CON3_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_RESERVED_0__SHIFT) & CNA_CONV_CON3_RESERVED_0__MASK;
+}
+#define CNA_CONV_CON3_NN_MODE__MASK 0x70000000
+#define CNA_CONV_CON3_NN_MODE__SHIFT 28
+static inline uint32_t CNA_CONV_CON3_NN_MODE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_NN_MODE__SHIFT) & CNA_CONV_CON3_NN_MODE__MASK;
+}
+#define CNA_CONV_CON3_RESERVED_1__MASK 0x0c000000
+#define CNA_CONV_CON3_RESERVED_1__SHIFT 26
+static inline uint32_t CNA_CONV_CON3_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_RESERVED_1__SHIFT) & CNA_CONV_CON3_RESERVED_1__MASK;
+}
+#define CNA_CONV_CON3_ATROUS_Y_DILATION__MASK 0x03e00000
+#define CNA_CONV_CON3_ATROUS_Y_DILATION__SHIFT 21
+static inline uint32_t CNA_CONV_CON3_ATROUS_Y_DILATION(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_ATROUS_Y_DILATION__SHIFT) & CNA_CONV_CON3_ATROUS_Y_DILATION__MASK;
+}
+#define CNA_CONV_CON3_ATROUS_X_DILATION__MASK 0x001f0000
+#define CNA_CONV_CON3_ATROUS_X_DILATION__SHIFT 16
+static inline uint32_t CNA_CONV_CON3_ATROUS_X_DILATION(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_ATROUS_X_DILATION__SHIFT) & CNA_CONV_CON3_ATROUS_X_DILATION__MASK;
+}
+#define CNA_CONV_CON3_RESERVED_2__MASK 0x0000c000
+#define CNA_CONV_CON3_RESERVED_2__SHIFT 14
+static inline uint32_t CNA_CONV_CON3_RESERVED_2(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_RESERVED_2__SHIFT) & CNA_CONV_CON3_RESERVED_2__MASK;
+}
+#define CNA_CONV_CON3_DECONV_Y_STRIDE__MASK 0x00003800
+#define CNA_CONV_CON3_DECONV_Y_STRIDE__SHIFT 11
+static inline uint32_t CNA_CONV_CON3_DECONV_Y_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_DECONV_Y_STRIDE__SHIFT) & CNA_CONV_CON3_DECONV_Y_STRIDE__MASK;
+}
+#define CNA_CONV_CON3_DECONV_X_STRIDE__MASK 0x00000700
+#define CNA_CONV_CON3_DECONV_X_STRIDE__SHIFT 8
+static inline uint32_t CNA_CONV_CON3_DECONV_X_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_DECONV_X_STRIDE__SHIFT) & CNA_CONV_CON3_DECONV_X_STRIDE__MASK;
+}
+#define CNA_CONV_CON3_RESERVED_3__MASK 0x000000c0
+#define CNA_CONV_CON3_RESERVED_3__SHIFT 6
+static inline uint32_t CNA_CONV_CON3_RESERVED_3(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_RESERVED_3__SHIFT) & CNA_CONV_CON3_RESERVED_3__MASK;
+}
+#define CNA_CONV_CON3_CONV_Y_STRIDE__MASK 0x00000038
+#define CNA_CONV_CON3_CONV_Y_STRIDE__SHIFT 3
+static inline uint32_t CNA_CONV_CON3_CONV_Y_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_CONV_Y_STRIDE__SHIFT) & CNA_CONV_CON3_CONV_Y_STRIDE__MASK;
+}
+#define CNA_CONV_CON3_CONV_X_STRIDE__MASK 0x00000007
+#define CNA_CONV_CON3_CONV_X_STRIDE__SHIFT 0
+static inline uint32_t CNA_CONV_CON3_CONV_X_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_CONV_CON3_CONV_X_STRIDE__SHIFT) & CNA_CONV_CON3_CONV_X_STRIDE__MASK;
+}
+
+#define REG_CNA_DATA_SIZE0 0x00001020
+#define CNA_DATA_SIZE0_RESERVED_0__MASK 0xf8000000
+#define CNA_DATA_SIZE0_RESERVED_0__SHIFT 27
+static inline uint32_t CNA_DATA_SIZE0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE0_RESERVED_0__SHIFT) & CNA_DATA_SIZE0_RESERVED_0__MASK;
+}
+#define CNA_DATA_SIZE0_DATAIN_WIDTH__MASK 0x07ff0000
+#define CNA_DATA_SIZE0_DATAIN_WIDTH__SHIFT 16
+static inline uint32_t CNA_DATA_SIZE0_DATAIN_WIDTH(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE0_DATAIN_WIDTH__SHIFT) & CNA_DATA_SIZE0_DATAIN_WIDTH__MASK;
+}
+#define CNA_DATA_SIZE0_RESERVED_1__MASK 0x0000f800
+#define CNA_DATA_SIZE0_RESERVED_1__SHIFT 11
+static inline uint32_t CNA_DATA_SIZE0_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE0_RESERVED_1__SHIFT) & CNA_DATA_SIZE0_RESERVED_1__MASK;
+}
+#define CNA_DATA_SIZE0_DATAIN_HEIGHT__MASK 0x000007ff
+#define CNA_DATA_SIZE0_DATAIN_HEIGHT__SHIFT 0
+static inline uint32_t CNA_DATA_SIZE0_DATAIN_HEIGHT(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE0_DATAIN_HEIGHT__SHIFT) & CNA_DATA_SIZE0_DATAIN_HEIGHT__MASK;
+}
+
+#define REG_CNA_DATA_SIZE1 0x00001024
+#define CNA_DATA_SIZE1_RESERVED_0__MASK 0xc0000000
+#define CNA_DATA_SIZE1_RESERVED_0__SHIFT 30
+static inline uint32_t CNA_DATA_SIZE1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE1_RESERVED_0__SHIFT) & CNA_DATA_SIZE1_RESERVED_0__MASK;
+}
+#define CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__MASK 0x3fff0000
+#define CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__SHIFT 16
+static inline uint32_t CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__SHIFT) & CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__MASK;
+}
+#define CNA_DATA_SIZE1_DATAIN_CHANNEL__MASK 0x0000ffff
+#define CNA_DATA_SIZE1_DATAIN_CHANNEL__SHIFT 0
+static inline uint32_t CNA_DATA_SIZE1_DATAIN_CHANNEL(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE1_DATAIN_CHANNEL__SHIFT) & CNA_DATA_SIZE1_DATAIN_CHANNEL__MASK;
+}
+
+#define REG_CNA_DATA_SIZE2 0x00001028
+#define CNA_DATA_SIZE2_RESERVED_0__MASK 0xfffff800
+#define CNA_DATA_SIZE2_RESERVED_0__SHIFT 11
+static inline uint32_t CNA_DATA_SIZE2_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE2_RESERVED_0__SHIFT) & CNA_DATA_SIZE2_RESERVED_0__MASK;
+}
+#define CNA_DATA_SIZE2_DATAOUT_WIDTH__MASK 0x000007ff
+#define CNA_DATA_SIZE2_DATAOUT_WIDTH__SHIFT 0
+static inline uint32_t CNA_DATA_SIZE2_DATAOUT_WIDTH(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE2_DATAOUT_WIDTH__SHIFT) & CNA_DATA_SIZE2_DATAOUT_WIDTH__MASK;
+}
+
+#define REG_CNA_DATA_SIZE3 0x0000102c
+#define CNA_DATA_SIZE3_RESERVED_0__MASK 0xff000000
+#define CNA_DATA_SIZE3_RESERVED_0__SHIFT 24
+static inline uint32_t CNA_DATA_SIZE3_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE3_RESERVED_0__SHIFT) & CNA_DATA_SIZE3_RESERVED_0__MASK;
+}
+#define CNA_DATA_SIZE3_SURF_MODE__MASK 0x00c00000
+#define CNA_DATA_SIZE3_SURF_MODE__SHIFT 22
+static inline uint32_t CNA_DATA_SIZE3_SURF_MODE(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE3_SURF_MODE__SHIFT) & CNA_DATA_SIZE3_SURF_MODE__MASK;
+}
+#define CNA_DATA_SIZE3_DATAOUT_ATOMICS__MASK 0x003fffff
+#define CNA_DATA_SIZE3_DATAOUT_ATOMICS__SHIFT 0
+static inline uint32_t CNA_DATA_SIZE3_DATAOUT_ATOMICS(uint32_t val)
+{
+ return ((val) << CNA_DATA_SIZE3_DATAOUT_ATOMICS__SHIFT) & CNA_DATA_SIZE3_DATAOUT_ATOMICS__MASK;
+}
+
+#define REG_CNA_WEIGHT_SIZE0 0x00001030
+#define CNA_WEIGHT_SIZE0_WEIGHT_BYTES__MASK 0xffffffff
+#define CNA_WEIGHT_SIZE0_WEIGHT_BYTES__SHIFT 0
+static inline uint32_t CNA_WEIGHT_SIZE0_WEIGHT_BYTES(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE0_WEIGHT_BYTES__SHIFT) & CNA_WEIGHT_SIZE0_WEIGHT_BYTES__MASK;
+}
+
+#define REG_CNA_WEIGHT_SIZE1 0x00001034
+#define CNA_WEIGHT_SIZE1_RESERVED_0__MASK 0xfff80000
+#define CNA_WEIGHT_SIZE1_RESERVED_0__SHIFT 19
+static inline uint32_t CNA_WEIGHT_SIZE1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE1_RESERVED_0__SHIFT) & CNA_WEIGHT_SIZE1_RESERVED_0__MASK;
+}
+#define CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__MASK 0x0007ffff
+#define CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__SHIFT 0
+static inline uint32_t CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__SHIFT) & CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__MASK;
+}
+
+#define REG_CNA_WEIGHT_SIZE2 0x00001038
+#define CNA_WEIGHT_SIZE2_RESERVED_0__MASK 0xe0000000
+#define CNA_WEIGHT_SIZE2_RESERVED_0__SHIFT 29
+static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_RESERVED_0__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_0__MASK;
+}
+#define CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__MASK 0x1f000000
+#define CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__SHIFT 24
+static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_WIDTH(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__MASK;
+}
+#define CNA_WEIGHT_SIZE2_RESERVED_1__MASK 0x00e00000
+#define CNA_WEIGHT_SIZE2_RESERVED_1__SHIFT 21
+static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_RESERVED_1__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_1__MASK;
+}
+#define CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__MASK 0x001f0000
+#define CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__SHIFT 16
+static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__MASK;
+}
+#define CNA_WEIGHT_SIZE2_RESERVED_2__MASK 0x0000c000
+#define CNA_WEIGHT_SIZE2_RESERVED_2__SHIFT 14
+static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_2(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_RESERVED_2__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_2__MASK;
+}
+#define CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__MASK 0x00003fff
+#define CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__SHIFT 0
+static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_KERNELS(uint32_t val)
+{
+ return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__MASK;
+}
+
+#define REG_CNA_CBUF_CON0 0x00001040
+#define CNA_CBUF_CON0_RESERVED_0__MASK 0xffffc000
+#define CNA_CBUF_CON0_RESERVED_0__SHIFT 14
+static inline uint32_t CNA_CBUF_CON0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_RESERVED_0__SHIFT) & CNA_CBUF_CON0_RESERVED_0__MASK;
+}
+#define CNA_CBUF_CON0_WEIGHT_REUSE__MASK 0x00002000
+#define CNA_CBUF_CON0_WEIGHT_REUSE__SHIFT 13
+static inline uint32_t CNA_CBUF_CON0_WEIGHT_REUSE(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_WEIGHT_REUSE__SHIFT) & CNA_CBUF_CON0_WEIGHT_REUSE__MASK;
+}
+#define CNA_CBUF_CON0_DATA_REUSE__MASK 0x00001000
+#define CNA_CBUF_CON0_DATA_REUSE__SHIFT 12
+static inline uint32_t CNA_CBUF_CON0_DATA_REUSE(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_DATA_REUSE__SHIFT) & CNA_CBUF_CON0_DATA_REUSE__MASK;
+}
+#define CNA_CBUF_CON0_RESERVED_1__MASK 0x00000800
+#define CNA_CBUF_CON0_RESERVED_1__SHIFT 11
+static inline uint32_t CNA_CBUF_CON0_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_RESERVED_1__SHIFT) & CNA_CBUF_CON0_RESERVED_1__MASK;
+}
+#define CNA_CBUF_CON0_FC_DATA_BANK__MASK 0x00000700
+#define CNA_CBUF_CON0_FC_DATA_BANK__SHIFT 8
+static inline uint32_t CNA_CBUF_CON0_FC_DATA_BANK(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_FC_DATA_BANK__SHIFT) & CNA_CBUF_CON0_FC_DATA_BANK__MASK;
+}
+#define CNA_CBUF_CON0_WEIGHT_BANK__MASK 0x000000f0
+#define CNA_CBUF_CON0_WEIGHT_BANK__SHIFT 4
+static inline uint32_t CNA_CBUF_CON0_WEIGHT_BANK(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_WEIGHT_BANK__SHIFT) & CNA_CBUF_CON0_WEIGHT_BANK__MASK;
+}
+#define CNA_CBUF_CON0_DATA_BANK__MASK 0x0000000f
+#define CNA_CBUF_CON0_DATA_BANK__SHIFT 0
+static inline uint32_t CNA_CBUF_CON0_DATA_BANK(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON0_DATA_BANK__SHIFT) & CNA_CBUF_CON0_DATA_BANK__MASK;
+}
+
+#define REG_CNA_CBUF_CON1 0x00001044
+#define CNA_CBUF_CON1_RESERVED_0__MASK 0xffffc000
+#define CNA_CBUF_CON1_RESERVED_0__SHIFT 14
+static inline uint32_t CNA_CBUF_CON1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON1_RESERVED_0__SHIFT) & CNA_CBUF_CON1_RESERVED_0__MASK;
+}
+#define CNA_CBUF_CON1_DATA_ENTRIES__MASK 0x00003fff
+#define CNA_CBUF_CON1_DATA_ENTRIES__SHIFT 0
+static inline uint32_t CNA_CBUF_CON1_DATA_ENTRIES(uint32_t val)
+{
+ return ((val) << CNA_CBUF_CON1_DATA_ENTRIES__SHIFT) & CNA_CBUF_CON1_DATA_ENTRIES__MASK;
+}
+
+#define REG_CNA_CVT_CON0 0x0000104c
+#define CNA_CVT_CON0_RESERVED_0__MASK 0xf0000000
+#define CNA_CVT_CON0_RESERVED_0__SHIFT 28
+static inline uint32_t CNA_CVT_CON0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_RESERVED_0__SHIFT) & CNA_CVT_CON0_RESERVED_0__MASK;
+}
+#define CNA_CVT_CON0_CVT_TRUNCATE_3__MASK 0x0fc00000
+#define CNA_CVT_CON0_CVT_TRUNCATE_3__SHIFT 22
+static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_3(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_3__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_3__MASK;
+}
+#define CNA_CVT_CON0_CVT_TRUNCATE_2__MASK 0x003f0000
+#define CNA_CVT_CON0_CVT_TRUNCATE_2__SHIFT 16
+static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_2(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_2__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_2__MASK;
+}
+#define CNA_CVT_CON0_CVT_TRUNCATE_1__MASK 0x0000fc00
+#define CNA_CVT_CON0_CVT_TRUNCATE_1__SHIFT 10
+static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_1(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_1__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_1__MASK;
+}
+#define CNA_CVT_CON0_CVT_TRUNCATE_0__MASK 0x000003f0
+#define CNA_CVT_CON0_CVT_TRUNCATE_0__SHIFT 4
+static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_0(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_0__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_0__MASK;
+}
+#define CNA_CVT_CON0_DATA_SIGN__MASK 0x00000008
+#define CNA_CVT_CON0_DATA_SIGN__SHIFT 3
+static inline uint32_t CNA_CVT_CON0_DATA_SIGN(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_DATA_SIGN__SHIFT) & CNA_CVT_CON0_DATA_SIGN__MASK;
+}
+#define CNA_CVT_CON0_ROUND_TYPE__MASK 0x00000004
+#define CNA_CVT_CON0_ROUND_TYPE__SHIFT 2
+static inline uint32_t CNA_CVT_CON0_ROUND_TYPE(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_ROUND_TYPE__SHIFT) & CNA_CVT_CON0_ROUND_TYPE__MASK;
+}
+#define CNA_CVT_CON0_CVT_TYPE__MASK 0x00000002
+#define CNA_CVT_CON0_CVT_TYPE__SHIFT 1
+static inline uint32_t CNA_CVT_CON0_CVT_TYPE(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_TYPE__SHIFT) & CNA_CVT_CON0_CVT_TYPE__MASK;
+}
+#define CNA_CVT_CON0_CVT_BYPASS__MASK 0x00000001
+#define CNA_CVT_CON0_CVT_BYPASS__SHIFT 0
+static inline uint32_t CNA_CVT_CON0_CVT_BYPASS(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON0_CVT_BYPASS__SHIFT) & CNA_CVT_CON0_CVT_BYPASS__MASK;
+}
+
+#define REG_CNA_CVT_CON1 0x00001050
+#define CNA_CVT_CON1_CVT_SCALE0__MASK 0xffff0000
+#define CNA_CVT_CON1_CVT_SCALE0__SHIFT 16
+static inline uint32_t CNA_CVT_CON1_CVT_SCALE0(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON1_CVT_SCALE0__SHIFT) & CNA_CVT_CON1_CVT_SCALE0__MASK;
+}
+#define CNA_CVT_CON1_CVT_OFFSET0__MASK 0x0000ffff
+#define CNA_CVT_CON1_CVT_OFFSET0__SHIFT 0
+static inline uint32_t CNA_CVT_CON1_CVT_OFFSET0(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON1_CVT_OFFSET0__SHIFT) & CNA_CVT_CON1_CVT_OFFSET0__MASK;
+}
+
+#define REG_CNA_CVT_CON2 0x00001054
+#define CNA_CVT_CON2_CVT_SCALE1__MASK 0xffff0000
+#define CNA_CVT_CON2_CVT_SCALE1__SHIFT 16
+static inline uint32_t CNA_CVT_CON2_CVT_SCALE1(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON2_CVT_SCALE1__SHIFT) & CNA_CVT_CON2_CVT_SCALE1__MASK;
+}
+#define CNA_CVT_CON2_CVT_OFFSET1__MASK 0x0000ffff
+#define CNA_CVT_CON2_CVT_OFFSET1__SHIFT 0
+static inline uint32_t CNA_CVT_CON2_CVT_OFFSET1(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON2_CVT_OFFSET1__SHIFT) & CNA_CVT_CON2_CVT_OFFSET1__MASK;
+}
+
+#define REG_CNA_CVT_CON3 0x00001058
+#define CNA_CVT_CON3_CVT_SCALE2__MASK 0xffff0000
+#define CNA_CVT_CON3_CVT_SCALE2__SHIFT 16
+static inline uint32_t CNA_CVT_CON3_CVT_SCALE2(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON3_CVT_SCALE2__SHIFT) & CNA_CVT_CON3_CVT_SCALE2__MASK;
+}
+#define CNA_CVT_CON3_CVT_OFFSET2__MASK 0x0000ffff
+#define CNA_CVT_CON3_CVT_OFFSET2__SHIFT 0
+static inline uint32_t CNA_CVT_CON3_CVT_OFFSET2(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON3_CVT_OFFSET2__SHIFT) & CNA_CVT_CON3_CVT_OFFSET2__MASK;
+}
+
+#define REG_CNA_CVT_CON4 0x0000105c
+#define CNA_CVT_CON4_CVT_SCALE3__MASK 0xffff0000
+#define CNA_CVT_CON4_CVT_SCALE3__SHIFT 16
+static inline uint32_t CNA_CVT_CON4_CVT_SCALE3(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON4_CVT_SCALE3__SHIFT) & CNA_CVT_CON4_CVT_SCALE3__MASK;
+}
+#define CNA_CVT_CON4_CVT_OFFSET3__MASK 0x0000ffff
+#define CNA_CVT_CON4_CVT_OFFSET3__SHIFT 0
+static inline uint32_t CNA_CVT_CON4_CVT_OFFSET3(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON4_CVT_OFFSET3__SHIFT) & CNA_CVT_CON4_CVT_OFFSET3__MASK;
+}
+
+#define REG_CNA_FC_CON0 0x00001060
+#define CNA_FC_CON0_FC_SKIP_DATA__MASK 0xffff0000
+#define CNA_FC_CON0_FC_SKIP_DATA__SHIFT 16
+static inline uint32_t CNA_FC_CON0_FC_SKIP_DATA(uint32_t val)
+{
+ return ((val) << CNA_FC_CON0_FC_SKIP_DATA__SHIFT) & CNA_FC_CON0_FC_SKIP_DATA__MASK;
+}
+#define CNA_FC_CON0_RESERVED_0__MASK 0x0000fffe
+#define CNA_FC_CON0_RESERVED_0__SHIFT 1
+static inline uint32_t CNA_FC_CON0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_FC_CON0_RESERVED_0__SHIFT) & CNA_FC_CON0_RESERVED_0__MASK;
+}
+#define CNA_FC_CON0_FC_SKIP_EN__MASK 0x00000001
+#define CNA_FC_CON0_FC_SKIP_EN__SHIFT 0
+static inline uint32_t CNA_FC_CON0_FC_SKIP_EN(uint32_t val)
+{
+ return ((val) << CNA_FC_CON0_FC_SKIP_EN__SHIFT) & CNA_FC_CON0_FC_SKIP_EN__MASK;
+}
+
+#define REG_CNA_FC_CON1 0x00001064
+#define CNA_FC_CON1_RESERVED_0__MASK 0xfffe0000
+#define CNA_FC_CON1_RESERVED_0__SHIFT 17
+static inline uint32_t CNA_FC_CON1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_FC_CON1_RESERVED_0__SHIFT) & CNA_FC_CON1_RESERVED_0__MASK;
+}
+#define CNA_FC_CON1_DATA_OFFSET__MASK 0x0001ffff
+#define CNA_FC_CON1_DATA_OFFSET__SHIFT 0
+static inline uint32_t CNA_FC_CON1_DATA_OFFSET(uint32_t val)
+{
+ return ((val) << CNA_FC_CON1_DATA_OFFSET__SHIFT) & CNA_FC_CON1_DATA_OFFSET__MASK;
+}
+
+#define REG_CNA_PAD_CON0 0x00001068
+#define CNA_PAD_CON0_RESERVED_0__MASK 0xffffff00
+#define CNA_PAD_CON0_RESERVED_0__SHIFT 8
+static inline uint32_t CNA_PAD_CON0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_PAD_CON0_RESERVED_0__SHIFT) & CNA_PAD_CON0_RESERVED_0__MASK;
+}
+#define CNA_PAD_CON0_PAD_LEFT__MASK 0x000000f0
+#define CNA_PAD_CON0_PAD_LEFT__SHIFT 4
+static inline uint32_t CNA_PAD_CON0_PAD_LEFT(uint32_t val)
+{
+ return ((val) << CNA_PAD_CON0_PAD_LEFT__SHIFT) & CNA_PAD_CON0_PAD_LEFT__MASK;
+}
+#define CNA_PAD_CON0_PAD_TOP__MASK 0x0000000f
+#define CNA_PAD_CON0_PAD_TOP__SHIFT 0
+static inline uint32_t CNA_PAD_CON0_PAD_TOP(uint32_t val)
+{
+ return ((val) << CNA_PAD_CON0_PAD_TOP__SHIFT) & CNA_PAD_CON0_PAD_TOP__MASK;
+}
+
+#define REG_CNA_FEATURE_DATA_ADDR 0x00001070
+#define CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__MASK 0xffffffff
+#define CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__SHIFT 0
+static inline uint32_t CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR(uint32_t val)
+{
+ return ((val) << CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__SHIFT) & CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__MASK;
+}
+
+#define REG_CNA_FC_CON2 0x00001074
+#define CNA_FC_CON2_RESERVED_0__MASK 0xfffe0000
+#define CNA_FC_CON2_RESERVED_0__SHIFT 17
+static inline uint32_t CNA_FC_CON2_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_FC_CON2_RESERVED_0__SHIFT) & CNA_FC_CON2_RESERVED_0__MASK;
+}
+#define CNA_FC_CON2_WEIGHT_OFFSET__MASK 0x0001ffff
+#define CNA_FC_CON2_WEIGHT_OFFSET__SHIFT 0
+static inline uint32_t CNA_FC_CON2_WEIGHT_OFFSET(uint32_t val)
+{
+ return ((val) << CNA_FC_CON2_WEIGHT_OFFSET__SHIFT) & CNA_FC_CON2_WEIGHT_OFFSET__MASK;
+}
+
+#define REG_CNA_DMA_CON0 0x00001078
+#define CNA_DMA_CON0_OV4K_BYPASS__MASK 0x80000000
+#define CNA_DMA_CON0_OV4K_BYPASS__SHIFT 31
+static inline uint32_t CNA_DMA_CON0_OV4K_BYPASS(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON0_OV4K_BYPASS__SHIFT) & CNA_DMA_CON0_OV4K_BYPASS__MASK;
+}
+#define CNA_DMA_CON0_RESERVED_0__MASK 0x7ff00000
+#define CNA_DMA_CON0_RESERVED_0__SHIFT 20
+static inline uint32_t CNA_DMA_CON0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON0_RESERVED_0__SHIFT) & CNA_DMA_CON0_RESERVED_0__MASK;
+}
+#define CNA_DMA_CON0_WEIGHT_BURST_LEN__MASK 0x000f0000
+#define CNA_DMA_CON0_WEIGHT_BURST_LEN__SHIFT 16
+static inline uint32_t CNA_DMA_CON0_WEIGHT_BURST_LEN(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON0_WEIGHT_BURST_LEN__SHIFT) & CNA_DMA_CON0_WEIGHT_BURST_LEN__MASK;
+}
+#define CNA_DMA_CON0_RESERVED_1__MASK 0x0000fff0
+#define CNA_DMA_CON0_RESERVED_1__SHIFT 4
+static inline uint32_t CNA_DMA_CON0_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON0_RESERVED_1__SHIFT) & CNA_DMA_CON0_RESERVED_1__MASK;
+}
+#define CNA_DMA_CON0_DATA_BURST_LEN__MASK 0x0000000f
+#define CNA_DMA_CON0_DATA_BURST_LEN__SHIFT 0
+static inline uint32_t CNA_DMA_CON0_DATA_BURST_LEN(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON0_DATA_BURST_LEN__SHIFT) & CNA_DMA_CON0_DATA_BURST_LEN__MASK;
+}
+
+#define REG_CNA_DMA_CON1 0x0000107c
+#define CNA_DMA_CON1_RESERVED_0__MASK 0xf0000000
+#define CNA_DMA_CON1_RESERVED_0__SHIFT 28
+static inline uint32_t CNA_DMA_CON1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON1_RESERVED_0__SHIFT) & CNA_DMA_CON1_RESERVED_0__MASK;
+}
+#define CNA_DMA_CON1_LINE_STRIDE__MASK 0x0fffffff
+#define CNA_DMA_CON1_LINE_STRIDE__SHIFT 0
+static inline uint32_t CNA_DMA_CON1_LINE_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON1_LINE_STRIDE__SHIFT) & CNA_DMA_CON1_LINE_STRIDE__MASK;
+}
+
+#define REG_CNA_DMA_CON2 0x00001080
+#define CNA_DMA_CON2_RESERVED_0__MASK 0xf0000000
+#define CNA_DMA_CON2_RESERVED_0__SHIFT 28
+static inline uint32_t CNA_DMA_CON2_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON2_RESERVED_0__SHIFT) & CNA_DMA_CON2_RESERVED_0__MASK;
+}
+#define CNA_DMA_CON2_SURF_STRIDE__MASK 0x0fffffff
+#define CNA_DMA_CON2_SURF_STRIDE__SHIFT 0
+static inline uint32_t CNA_DMA_CON2_SURF_STRIDE(uint32_t val)
+{
+ return ((val) << CNA_DMA_CON2_SURF_STRIDE__SHIFT) & CNA_DMA_CON2_SURF_STRIDE__MASK;
+}
+
+#define REG_CNA_FC_DATA_SIZE0 0x00001084
+#define CNA_FC_DATA_SIZE0_RESERVED_0__MASK 0xc0000000
+#define CNA_FC_DATA_SIZE0_RESERVED_0__SHIFT 30
+static inline uint32_t CNA_FC_DATA_SIZE0_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE0_RESERVED_0__SHIFT) & CNA_FC_DATA_SIZE0_RESERVED_0__MASK;
+}
+#define CNA_FC_DATA_SIZE0_DMA_WIDTH__MASK 0x3fff0000
+#define CNA_FC_DATA_SIZE0_DMA_WIDTH__SHIFT 16
+static inline uint32_t CNA_FC_DATA_SIZE0_DMA_WIDTH(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE0_DMA_WIDTH__SHIFT) & CNA_FC_DATA_SIZE0_DMA_WIDTH__MASK;
+}
+#define CNA_FC_DATA_SIZE0_RESERVED_1__MASK 0x0000f800
+#define CNA_FC_DATA_SIZE0_RESERVED_1__SHIFT 11
+static inline uint32_t CNA_FC_DATA_SIZE0_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE0_RESERVED_1__SHIFT) & CNA_FC_DATA_SIZE0_RESERVED_1__MASK;
+}
+#define CNA_FC_DATA_SIZE0_DMA_HEIGHT__MASK 0x000007ff
+#define CNA_FC_DATA_SIZE0_DMA_HEIGHT__SHIFT 0
+static inline uint32_t CNA_FC_DATA_SIZE0_DMA_HEIGHT(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE0_DMA_HEIGHT__SHIFT) & CNA_FC_DATA_SIZE0_DMA_HEIGHT__MASK;
+}
+
+#define REG_CNA_FC_DATA_SIZE1 0x00001088
+#define CNA_FC_DATA_SIZE1_RESERVED_0__MASK 0xffff0000
+#define CNA_FC_DATA_SIZE1_RESERVED_0__SHIFT 16
+static inline uint32_t CNA_FC_DATA_SIZE1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE1_RESERVED_0__SHIFT) & CNA_FC_DATA_SIZE1_RESERVED_0__MASK;
+}
+#define CNA_FC_DATA_SIZE1_DMA_CHANNEL__MASK 0x0000ffff
+#define CNA_FC_DATA_SIZE1_DMA_CHANNEL__SHIFT 0
+static inline uint32_t CNA_FC_DATA_SIZE1_DMA_CHANNEL(uint32_t val)
+{
+ return ((val) << CNA_FC_DATA_SIZE1_DMA_CHANNEL__SHIFT) & CNA_FC_DATA_SIZE1_DMA_CHANNEL__MASK;
+}
+
+#define REG_CNA_CLK_GATE 0x00001090
+#define CNA_CLK_GATE_RESERVED_0__MASK 0xffffffe0
+#define CNA_CLK_GATE_RESERVED_0__SHIFT 5
+static inline uint32_t CNA_CLK_GATE_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_RESERVED_0__SHIFT) & CNA_CLK_GATE_RESERVED_0__MASK;
+}
+#define CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__MASK 0x00000010
+#define CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__SHIFT 4
+static inline uint32_t CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__MASK;
+}
+#define CNA_CLK_GATE_RESERVED_1__MASK 0x00000008
+#define CNA_CLK_GATE_RESERVED_1__SHIFT 3
+static inline uint32_t CNA_CLK_GATE_RESERVED_1(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_RESERVED_1__SHIFT) & CNA_CLK_GATE_RESERVED_1__MASK;
+}
+#define CNA_CLK_GATE_CSC_DISABLE_CLKGATE__MASK 0x00000004
+#define CNA_CLK_GATE_CSC_DISABLE_CLKGATE__SHIFT 2
+static inline uint32_t CNA_CLK_GATE_CSC_DISABLE_CLKGATE(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_CSC_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CSC_DISABLE_CLKGATE__MASK;
+}
+#define CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__MASK 0x00000002
+#define CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__SHIFT 1
+static inline uint32_t CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__MASK;
+}
+#define CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__MASK 0x00000001
+#define CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__SHIFT 0
+static inline uint32_t CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE(uint32_t val)
+{
+ return ((val) << CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__MASK;
+}
+
+#define REG_CNA_DCOMP_CTRL 0x00001100
+#define CNA_DCOMP_CTRL_RESERVED_0__MASK 0xfffffff0
+#define CNA_DCOMP_CTRL_RESERVED_0__SHIFT 4
+static inline uint32_t CNA_DCOMP_CTRL_RESERVED_0(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_CTRL_RESERVED_0__SHIFT) & CNA_DCOMP_CTRL_RESERVED_0__MASK;
+}
+#define CNA_DCOMP_CTRL_WT_DEC_BYPASS__MASK 0x00000008
+#define CNA_DCOMP_CTRL_WT_DEC_BYPASS__SHIFT 3
+static inline uint32_t CNA_DCOMP_CTRL_WT_DEC_BYPASS(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_CTRL_WT_DEC_BYPASS__SHIFT) & CNA_DCOMP_CTRL_WT_DEC_BYPASS__MASK;
+}
+#define CNA_DCOMP_CTRL_DECOMP_CONTROL__MASK 0x00000007
+#define CNA_DCOMP_CTRL_DECOMP_CONTROL__SHIFT 0
+static inline uint32_t CNA_DCOMP_CTRL_DECOMP_CONTROL(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_CTRL_DECOMP_CONTROL__SHIFT) & CNA_DCOMP_CTRL_DECOMP_CONTROL__MASK;
+}
+
+#define REG_CNA_DCOMP_REGNUM 0x00001104
+#define CNA_DCOMP_REGNUM_DCOMP_REGNUM__MASK 0xffffffff
+#define CNA_DCOMP_REGNUM_DCOMP_REGNUM__SHIFT 0
+static inline uint32_t CNA_DCOMP_REGNUM_DCOMP_REGNUM(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_REGNUM_DCOMP_REGNUM__SHIFT) & CNA_DCOMP_REGNUM_DCOMP_REGNUM__MASK;
+}
+
+#define REG_CNA_DCOMP_ADDR0 0x00001110
+#define CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__MASK 0xffffffff
+#define CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__SHIFT 0
+static inline uint32_t CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__SHIFT) & CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT0 0x00001140
+#define CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__SHIFT) & CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT1 0x00001144
+#define CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__SHIFT) & CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT2 0x00001148
+#define CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__SHIFT) & CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT3 0x0000114c
+#define CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__SHIFT) & CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT4 0x00001150
+#define CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__SHIFT) & CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT5 0x00001154
+#define CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__SHIFT) & CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT6 0x00001158
+#define CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__SHIFT) & CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT7 0x0000115c
+#define CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__SHIFT) & CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT8 0x00001160
+#define CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__SHIFT) & CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT9 0x00001164
+#define CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__SHIFT) & CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT10 0x00001168
+#define CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__SHIFT) & CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT11 0x0000116c
+#define CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__SHIFT) & CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT12 0x00001170
+#define CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__SHIFT) & CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT13 0x00001174
+#define CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__SHIFT) & CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT14 0x00001178
+#define CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__SHIFT) & CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__MASK;
+}
+
+#define REG_CNA_DCOMP_AMOUNT15 0x0000117c
+#define CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__MASK 0xffffffff
+#define CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__SHIFT 0
+static inline uint32_t CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15(uint32_t val)
+{
+ return ((val) << CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__SHIFT) & CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__MASK;
+}
+
+#define REG_CNA_CVT_CON5 0x00001180
+#define CNA_CVT_CON5_PER_CHANNEL_CVT_EN__MASK 0xffffffff
+#define CNA_CVT_CON5_PER_CHANNEL_CVT_EN__SHIFT 0
+static inline uint32_t CNA_CVT_CON5_PER_CHANNEL_CVT_EN(uint32_t val)
+{
+ return ((val) << CNA_CVT_CON5_PER_CHANNEL_CVT_EN__SHIFT) & CNA_CVT_CON5_PER_CHANNEL_CVT_EN__MASK;
+}
+
+#define REG_CNA_PAD_CON1 0x00001184
+#define CNA_PAD_CON1_PAD_VALUE__MASK 0xffffffff
+#define CNA_PAD_CON1_PAD_VALUE__SHIFT 0
+static inline uint32_t CNA_PAD_CON1_PAD_VALUE(uint32_t val)
+{
+ return ((val) << CNA_PAD_CON1_PAD_VALUE__SHIFT) & CNA_PAD_CON1_PAD_VALUE__MASK;
+}
+
+#define REG_CORE_S_STATUS 0x00003000
+#define CORE_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define CORE_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t CORE_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_S_STATUS_RESERVED_0__SHIFT) & CORE_S_STATUS_RESERVED_0__MASK;
+}
+#define CORE_S_STATUS_STATUS_1__MASK 0x00030000
+#define CORE_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t CORE_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << CORE_S_STATUS_STATUS_1__SHIFT) & CORE_S_STATUS_STATUS_1__MASK;
+}
+#define CORE_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define CORE_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t CORE_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << CORE_S_STATUS_RESERVED_1__SHIFT) & CORE_S_STATUS_RESERVED_1__MASK;
+}
+#define CORE_S_STATUS_STATUS_0__MASK 0x00000003
+#define CORE_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t CORE_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << CORE_S_STATUS_STATUS_0__SHIFT) & CORE_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_CORE_S_POINTER 0x00003004
+#define CORE_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define CORE_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t CORE_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_RESERVED_0__SHIFT) & CORE_S_POINTER_RESERVED_0__MASK;
+}
+#define CORE_S_POINTER_EXECUTER__MASK 0x00010000
+#define CORE_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t CORE_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_EXECUTER__SHIFT) & CORE_S_POINTER_EXECUTER__MASK;
+}
+#define CORE_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define CORE_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t CORE_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_RESERVED_1__SHIFT) & CORE_S_POINTER_RESERVED_1__MASK;
+}
+#define CORE_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define CORE_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t CORE_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & CORE_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define CORE_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define CORE_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t CORE_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_POINTER_PP_CLEAR__SHIFT) & CORE_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define CORE_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define CORE_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t CORE_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_POINTER_PP_MODE__SHIFT) & CORE_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define CORE_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define CORE_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t CORE_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_EXECUTER_PP_EN__SHIFT) & CORE_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define CORE_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define CORE_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t CORE_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_POINTER_PP_EN__SHIFT) & CORE_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define CORE_S_POINTER_POINTER__MASK 0x00000001
+#define CORE_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t CORE_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << CORE_S_POINTER_POINTER__SHIFT) & CORE_S_POINTER_POINTER__MASK;
+}
+
+#define REG_CORE_OPERATION_ENABLE 0x00003008
+#define CORE_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define CORE_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t CORE_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_OPERATION_ENABLE_RESERVED_0__SHIFT) & CORE_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define CORE_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define CORE_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t CORE_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << CORE_OPERATION_ENABLE_OP_EN__SHIFT) & CORE_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_CORE_MAC_GATING 0x0000300c
+#define CORE_MAC_GATING_RESERVED_0__MASK 0xf8000000
+#define CORE_MAC_GATING_RESERVED_0__SHIFT 27
+static inline uint32_t CORE_MAC_GATING_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_MAC_GATING_RESERVED_0__SHIFT) & CORE_MAC_GATING_RESERVED_0__MASK;
+}
+#define CORE_MAC_GATING_SLCG_OP_EN__MASK 0x07ffffff
+#define CORE_MAC_GATING_SLCG_OP_EN__SHIFT 0
+static inline uint32_t CORE_MAC_GATING_SLCG_OP_EN(uint32_t val)
+{
+ return ((val) << CORE_MAC_GATING_SLCG_OP_EN__SHIFT) & CORE_MAC_GATING_SLCG_OP_EN__MASK;
+}
+
+#define REG_CORE_MISC_CFG 0x00003010
+#define CORE_MISC_CFG_RESERVED_0__MASK 0xfff00000
+#define CORE_MISC_CFG_RESERVED_0__SHIFT 20
+static inline uint32_t CORE_MISC_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_RESERVED_0__SHIFT) & CORE_MISC_CFG_RESERVED_0__MASK;
+}
+#define CORE_MISC_CFG_SOFT_GATING__MASK 0x000fc000
+#define CORE_MISC_CFG_SOFT_GATING__SHIFT 14
+static inline uint32_t CORE_MISC_CFG_SOFT_GATING(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_SOFT_GATING__SHIFT) & CORE_MISC_CFG_SOFT_GATING__MASK;
+}
+#define CORE_MISC_CFG_RESERVED_1__MASK 0x00003800
+#define CORE_MISC_CFG_RESERVED_1__SHIFT 11
+static inline uint32_t CORE_MISC_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_RESERVED_1__SHIFT) & CORE_MISC_CFG_RESERVED_1__MASK;
+}
+#define CORE_MISC_CFG_PROC_PRECISION__MASK 0x00000700
+#define CORE_MISC_CFG_PROC_PRECISION__SHIFT 8
+static inline uint32_t CORE_MISC_CFG_PROC_PRECISION(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_PROC_PRECISION__SHIFT) & CORE_MISC_CFG_PROC_PRECISION__MASK;
+}
+#define CORE_MISC_CFG_RESERVED_2__MASK 0x000000fc
+#define CORE_MISC_CFG_RESERVED_2__SHIFT 2
+static inline uint32_t CORE_MISC_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_RESERVED_2__SHIFT) & CORE_MISC_CFG_RESERVED_2__MASK;
+}
+#define CORE_MISC_CFG_DW_EN__MASK 0x00000002
+#define CORE_MISC_CFG_DW_EN__SHIFT 1
+static inline uint32_t CORE_MISC_CFG_DW_EN(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_DW_EN__SHIFT) & CORE_MISC_CFG_DW_EN__MASK;
+}
+#define CORE_MISC_CFG_QD_EN__MASK 0x00000001
+#define CORE_MISC_CFG_QD_EN__SHIFT 0
+static inline uint32_t CORE_MISC_CFG_QD_EN(uint32_t val)
+{
+ return ((val) << CORE_MISC_CFG_QD_EN__SHIFT) & CORE_MISC_CFG_QD_EN__MASK;
+}
+
+#define REG_CORE_DATAOUT_SIZE_0 0x00003014
+#define CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__MASK 0xffff0000
+#define CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__SHIFT 16
+static inline uint32_t CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT(uint32_t val)
+{
+ return ((val) << CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__SHIFT) & CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__MASK;
+}
+#define CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__MASK 0x0000ffff
+#define CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__SHIFT 0
+static inline uint32_t CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH(uint32_t val)
+{
+ return ((val) << CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__SHIFT) & CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__MASK;
+}
+
+#define REG_CORE_DATAOUT_SIZE_1 0x00003018
+#define CORE_DATAOUT_SIZE_1_RESERVED_0__MASK 0xffff0000
+#define CORE_DATAOUT_SIZE_1_RESERVED_0__SHIFT 16
+static inline uint32_t CORE_DATAOUT_SIZE_1_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_DATAOUT_SIZE_1_RESERVED_0__SHIFT) & CORE_DATAOUT_SIZE_1_RESERVED_0__MASK;
+}
+#define CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__MASK 0x0000ffff
+#define CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__SHIFT 0
+static inline uint32_t CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL(uint32_t val)
+{
+ return ((val) << CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__SHIFT) & CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__MASK;
+}
+
+#define REG_CORE_CLIP_TRUNCATE 0x0000301c
+#define CORE_CLIP_TRUNCATE_RESERVED_0__MASK 0xffffff80
+#define CORE_CLIP_TRUNCATE_RESERVED_0__SHIFT 7
+static inline uint32_t CORE_CLIP_TRUNCATE_RESERVED_0(uint32_t val)
+{
+ return ((val) << CORE_CLIP_TRUNCATE_RESERVED_0__SHIFT) & CORE_CLIP_TRUNCATE_RESERVED_0__MASK;
+}
+#define CORE_CLIP_TRUNCATE_ROUND_TYPE__MASK 0x00000040
+#define CORE_CLIP_TRUNCATE_ROUND_TYPE__SHIFT 6
+static inline uint32_t CORE_CLIP_TRUNCATE_ROUND_TYPE(uint32_t val)
+{
+ return ((val) << CORE_CLIP_TRUNCATE_ROUND_TYPE__SHIFT) & CORE_CLIP_TRUNCATE_ROUND_TYPE__MASK;
+}
+#define CORE_CLIP_TRUNCATE_RESERVED_1__MASK 0x00000020
+#define CORE_CLIP_TRUNCATE_RESERVED_1__SHIFT 5
+static inline uint32_t CORE_CLIP_TRUNCATE_RESERVED_1(uint32_t val)
+{
+ return ((val) << CORE_CLIP_TRUNCATE_RESERVED_1__SHIFT) & CORE_CLIP_TRUNCATE_RESERVED_1__MASK;
+}
+#define CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__MASK 0x0000001f
+#define CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__SHIFT 0
+static inline uint32_t CORE_CLIP_TRUNCATE_CLIP_TRUNCATE(uint32_t val)
+{
+ return ((val) << CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__SHIFT) & CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__MASK;
+}
+
+#define REG_DPU_S_STATUS 0x00004000
+#define DPU_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define DPU_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t DPU_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_S_STATUS_RESERVED_0__SHIFT) & DPU_S_STATUS_RESERVED_0__MASK;
+}
+#define DPU_S_STATUS_STATUS_1__MASK 0x00030000
+#define DPU_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t DPU_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << DPU_S_STATUS_STATUS_1__SHIFT) & DPU_S_STATUS_STATUS_1__MASK;
+}
+#define DPU_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define DPU_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t DPU_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_S_STATUS_RESERVED_1__SHIFT) & DPU_S_STATUS_RESERVED_1__MASK;
+}
+#define DPU_S_STATUS_STATUS_0__MASK 0x00000003
+#define DPU_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t DPU_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << DPU_S_STATUS_STATUS_0__SHIFT) & DPU_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_DPU_S_POINTER 0x00004004
+#define DPU_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define DPU_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t DPU_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_RESERVED_0__SHIFT) & DPU_S_POINTER_RESERVED_0__MASK;
+}
+#define DPU_S_POINTER_EXECUTER__MASK 0x00010000
+#define DPU_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t DPU_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_EXECUTER__SHIFT) & DPU_S_POINTER_EXECUTER__MASK;
+}
+#define DPU_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define DPU_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t DPU_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_RESERVED_1__SHIFT) & DPU_S_POINTER_RESERVED_1__MASK;
+}
+#define DPU_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define DPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t DPU_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & DPU_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define DPU_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define DPU_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t DPU_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_POINTER_PP_CLEAR__SHIFT) & DPU_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define DPU_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define DPU_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t DPU_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_POINTER_PP_MODE__SHIFT) & DPU_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define DPU_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define DPU_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t DPU_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_EXECUTER_PP_EN__SHIFT) & DPU_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define DPU_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define DPU_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t DPU_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_POINTER_PP_EN__SHIFT) & DPU_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define DPU_S_POINTER_POINTER__MASK 0x00000001
+#define DPU_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t DPU_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << DPU_S_POINTER_POINTER__SHIFT) & DPU_S_POINTER_POINTER__MASK;
+}
+
+#define REG_DPU_OPERATION_ENABLE 0x00004008
+#define DPU_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define DPU_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t DPU_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_OPERATION_ENABLE_RESERVED_0__SHIFT) & DPU_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define DPU_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define DPU_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t DPU_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << DPU_OPERATION_ENABLE_OP_EN__SHIFT) & DPU_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_DPU_FEATURE_MODE_CFG 0x0000400c
+#define DPU_FEATURE_MODE_CFG_COMB_USE__MASK 0x80000000
+#define DPU_FEATURE_MODE_CFG_COMB_USE__SHIFT 31
+static inline uint32_t DPU_FEATURE_MODE_CFG_COMB_USE(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_COMB_USE__SHIFT) & DPU_FEATURE_MODE_CFG_COMB_USE__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_TP_EN__MASK 0x40000000
+#define DPU_FEATURE_MODE_CFG_TP_EN__SHIFT 30
+static inline uint32_t DPU_FEATURE_MODE_CFG_TP_EN(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_TP_EN__SHIFT) & DPU_FEATURE_MODE_CFG_TP_EN__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_RGP_TYPE__MASK 0x3c000000
+#define DPU_FEATURE_MODE_CFG_RGP_TYPE__SHIFT 26
+static inline uint32_t DPU_FEATURE_MODE_CFG_RGP_TYPE(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_RGP_TYPE__SHIFT) & DPU_FEATURE_MODE_CFG_RGP_TYPE__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_NONALIGN__MASK 0x02000000
+#define DPU_FEATURE_MODE_CFG_NONALIGN__SHIFT 25
+static inline uint32_t DPU_FEATURE_MODE_CFG_NONALIGN(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_NONALIGN__SHIFT) & DPU_FEATURE_MODE_CFG_NONALIGN__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_SURF_LEN__MASK 0x01fffe00
+#define DPU_FEATURE_MODE_CFG_SURF_LEN__SHIFT 9
+static inline uint32_t DPU_FEATURE_MODE_CFG_SURF_LEN(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_SURF_LEN__SHIFT) & DPU_FEATURE_MODE_CFG_SURF_LEN__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_BURST_LEN__MASK 0x000001e0
+#define DPU_FEATURE_MODE_CFG_BURST_LEN__SHIFT 5
+static inline uint32_t DPU_FEATURE_MODE_CFG_BURST_LEN(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_BURST_LEN__SHIFT) & DPU_FEATURE_MODE_CFG_BURST_LEN__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_CONV_MODE__MASK 0x00000018
+#define DPU_FEATURE_MODE_CFG_CONV_MODE__SHIFT 3
+static inline uint32_t DPU_FEATURE_MODE_CFG_CONV_MODE(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_CONV_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_CONV_MODE__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_OUTPUT_MODE__MASK 0x00000006
+#define DPU_FEATURE_MODE_CFG_OUTPUT_MODE__SHIFT 1
+static inline uint32_t DPU_FEATURE_MODE_CFG_OUTPUT_MODE(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_OUTPUT_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_OUTPUT_MODE__MASK;
+}
+#define DPU_FEATURE_MODE_CFG_FLYING_MODE__MASK 0x00000001
+#define DPU_FEATURE_MODE_CFG_FLYING_MODE__SHIFT 0
+static inline uint32_t DPU_FEATURE_MODE_CFG_FLYING_MODE(uint32_t val)
+{
+ return ((val) << DPU_FEATURE_MODE_CFG_FLYING_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_FLYING_MODE__MASK;
+}
+
+#define REG_DPU_DATA_FORMAT 0x00004010
+#define DPU_DATA_FORMAT_OUT_PRECISION__MASK 0xe0000000
+#define DPU_DATA_FORMAT_OUT_PRECISION__SHIFT 29
+static inline uint32_t DPU_DATA_FORMAT_OUT_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_OUT_PRECISION__SHIFT) & DPU_DATA_FORMAT_OUT_PRECISION__MASK;
+}
+#define DPU_DATA_FORMAT_IN_PRECISION__MASK 0x1c000000
+#define DPU_DATA_FORMAT_IN_PRECISION__SHIFT 26
+static inline uint32_t DPU_DATA_FORMAT_IN_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_IN_PRECISION__SHIFT) & DPU_DATA_FORMAT_IN_PRECISION__MASK;
+}
+#define DPU_DATA_FORMAT_EW_TRUNCATE_NEG__MASK 0x03ff0000
+#define DPU_DATA_FORMAT_EW_TRUNCATE_NEG__SHIFT 16
+static inline uint32_t DPU_DATA_FORMAT_EW_TRUNCATE_NEG(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_EW_TRUNCATE_NEG__SHIFT) & DPU_DATA_FORMAT_EW_TRUNCATE_NEG__MASK;
+}
+#define DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__MASK 0x0000fc00
+#define DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__SHIFT 10
+static inline uint32_t DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__SHIFT) & DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__MASK;
+}
+#define DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__MASK 0x000003f0
+#define DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__SHIFT 4
+static inline uint32_t DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__SHIFT) & DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__MASK;
+}
+#define DPU_DATA_FORMAT_MC_SURF_OUT__MASK 0x00000008
+#define DPU_DATA_FORMAT_MC_SURF_OUT__SHIFT 3
+static inline uint32_t DPU_DATA_FORMAT_MC_SURF_OUT(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_MC_SURF_OUT__SHIFT) & DPU_DATA_FORMAT_MC_SURF_OUT__MASK;
+}
+#define DPU_DATA_FORMAT_PROC_PRECISION__MASK 0x00000007
+#define DPU_DATA_FORMAT_PROC_PRECISION__SHIFT 0
+static inline uint32_t DPU_DATA_FORMAT_PROC_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_DATA_FORMAT_PROC_PRECISION__SHIFT) & DPU_DATA_FORMAT_PROC_PRECISION__MASK;
+}
+
+#define REG_DPU_OFFSET_PEND 0x00004014
+#define DPU_OFFSET_PEND_RESERVED_0__MASK 0xffff0000
+#define DPU_OFFSET_PEND_RESERVED_0__SHIFT 16
+static inline uint32_t DPU_OFFSET_PEND_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_OFFSET_PEND_RESERVED_0__SHIFT) & DPU_OFFSET_PEND_RESERVED_0__MASK;
+}
+#define DPU_OFFSET_PEND_OFFSET_PEND__MASK 0x0000ffff
+#define DPU_OFFSET_PEND_OFFSET_PEND__SHIFT 0
+static inline uint32_t DPU_OFFSET_PEND_OFFSET_PEND(uint32_t val)
+{
+ return ((val) << DPU_OFFSET_PEND_OFFSET_PEND__SHIFT) & DPU_OFFSET_PEND_OFFSET_PEND__MASK;
+}
+
+#define REG_DPU_DST_BASE_ADDR 0x00004020
+#define DPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK 0xffffffff
+#define DPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT 0
+static inline uint32_t DPU_DST_BASE_ADDR_DST_BASE_ADDR(uint32_t val)
+{
+ return ((val) << DPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT) & DPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK;
+}
+
+#define REG_DPU_DST_SURF_STRIDE 0x00004024
+#define DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK 0xfffffff0
+#define DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT 4
+static inline uint32_t DPU_DST_SURF_STRIDE_DST_SURF_STRIDE(uint32_t val)
+{
+ return ((val) << DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT) & DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK;
+}
+#define DPU_DST_SURF_STRIDE_RESERVED_0__MASK 0x0000000f
+#define DPU_DST_SURF_STRIDE_RESERVED_0__SHIFT 0
+static inline uint32_t DPU_DST_SURF_STRIDE_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_DST_SURF_STRIDE_RESERVED_0__SHIFT) & DPU_DST_SURF_STRIDE_RESERVED_0__MASK;
+}
+
+#define REG_DPU_DATA_CUBE_WIDTH 0x00004030
+#define DPU_DATA_CUBE_WIDTH_RESERVED_0__MASK 0xffffe000
+#define DPU_DATA_CUBE_WIDTH_RESERVED_0__SHIFT 13
+static inline uint32_t DPU_DATA_CUBE_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_WIDTH_RESERVED_0__SHIFT) & DPU_DATA_CUBE_WIDTH_RESERVED_0__MASK;
+}
+#define DPU_DATA_CUBE_WIDTH_WIDTH__MASK 0x00001fff
+#define DPU_DATA_CUBE_WIDTH_WIDTH__SHIFT 0
+static inline uint32_t DPU_DATA_CUBE_WIDTH_WIDTH(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_WIDTH_WIDTH__SHIFT) & DPU_DATA_CUBE_WIDTH_WIDTH__MASK;
+}
+
+#define REG_DPU_DATA_CUBE_HEIGHT 0x00004034
+#define DPU_DATA_CUBE_HEIGHT_RESERVED_0__MASK 0xfe000000
+#define DPU_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT 25
+static inline uint32_t DPU_DATA_CUBE_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT) & DPU_DATA_CUBE_HEIGHT_RESERVED_0__MASK;
+}
+#define DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__MASK 0x01c00000
+#define DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__SHIFT 22
+static inline uint32_t DPU_DATA_CUBE_HEIGHT_MINMAX_CTL(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__SHIFT) & DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__MASK;
+}
+#define DPU_DATA_CUBE_HEIGHT_RESERVED_1__MASK 0x003fe000
+#define DPU_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_DATA_CUBE_HEIGHT_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT) & DPU_DATA_CUBE_HEIGHT_RESERVED_1__MASK;
+}
+#define DPU_DATA_CUBE_HEIGHT_HEIGHT__MASK 0x00001fff
+#define DPU_DATA_CUBE_HEIGHT_HEIGHT__SHIFT 0
+static inline uint32_t DPU_DATA_CUBE_HEIGHT_HEIGHT(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_HEIGHT_HEIGHT__SHIFT) & DPU_DATA_CUBE_HEIGHT_HEIGHT__MASK;
+}
+
+#define REG_DPU_DATA_CUBE_NOTCH_ADDR 0x00004038
+#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__MASK 0xe0000000
+#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__SHIFT 29
+static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__MASK;
+}
+#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__MASK 0x1fff0000
+#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__SHIFT 16
+static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__MASK;
+}
+#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__MASK 0x0000e000
+#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__MASK;
+}
+#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__MASK 0x00001fff
+#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__SHIFT 0
+static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__MASK;
+}
+
+#define REG_DPU_DATA_CUBE_CHANNEL 0x0000403c
+#define DPU_DATA_CUBE_CHANNEL_RESERVED_0__MASK 0xe0000000
+#define DPU_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT 29
+static inline uint32_t DPU_DATA_CUBE_CHANNEL_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT) & DPU_DATA_CUBE_CHANNEL_RESERVED_0__MASK;
+}
+#define DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__MASK 0x1fff0000
+#define DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__SHIFT 16
+static inline uint32_t DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__SHIFT) & DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__MASK;
+}
+#define DPU_DATA_CUBE_CHANNEL_RESERVED_1__MASK 0x0000e000
+#define DPU_DATA_CUBE_CHANNEL_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_DATA_CUBE_CHANNEL_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_CHANNEL_RESERVED_1__SHIFT) & DPU_DATA_CUBE_CHANNEL_RESERVED_1__MASK;
+}
+#define DPU_DATA_CUBE_CHANNEL_CHANNEL__MASK 0x00001fff
+#define DPU_DATA_CUBE_CHANNEL_CHANNEL__SHIFT 0
+static inline uint32_t DPU_DATA_CUBE_CHANNEL_CHANNEL(uint32_t val)
+{
+ return ((val) << DPU_DATA_CUBE_CHANNEL_CHANNEL__SHIFT) & DPU_DATA_CUBE_CHANNEL_CHANNEL__MASK;
+}
+
+#define REG_DPU_BS_CFG 0x00004040
+#define DPU_BS_CFG_RESERVED_0__MASK 0xfff00000
+#define DPU_BS_CFG_RESERVED_0__SHIFT 20
+static inline uint32_t DPU_BS_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_RESERVED_0__SHIFT) & DPU_BS_CFG_RESERVED_0__MASK;
+}
+#define DPU_BS_CFG_BS_ALU_ALGO__MASK 0x000f0000
+#define DPU_BS_CFG_BS_ALU_ALGO__SHIFT 16
+static inline uint32_t DPU_BS_CFG_BS_ALU_ALGO(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_ALU_ALGO__SHIFT) & DPU_BS_CFG_BS_ALU_ALGO__MASK;
+}
+#define DPU_BS_CFG_RESERVED_1__MASK 0x0000fe00
+#define DPU_BS_CFG_RESERVED_1__SHIFT 9
+static inline uint32_t DPU_BS_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_RESERVED_1__SHIFT) & DPU_BS_CFG_RESERVED_1__MASK;
+}
+#define DPU_BS_CFG_BS_ALU_SRC__MASK 0x00000100
+#define DPU_BS_CFG_BS_ALU_SRC__SHIFT 8
+static inline uint32_t DPU_BS_CFG_BS_ALU_SRC(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_ALU_SRC__SHIFT) & DPU_BS_CFG_BS_ALU_SRC__MASK;
+}
+#define DPU_BS_CFG_BS_RELUX_EN__MASK 0x00000080
+#define DPU_BS_CFG_BS_RELUX_EN__SHIFT 7
+static inline uint32_t DPU_BS_CFG_BS_RELUX_EN(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_RELUX_EN__SHIFT) & DPU_BS_CFG_BS_RELUX_EN__MASK;
+}
+#define DPU_BS_CFG_BS_RELU_BYPASS__MASK 0x00000040
+#define DPU_BS_CFG_BS_RELU_BYPASS__SHIFT 6
+static inline uint32_t DPU_BS_CFG_BS_RELU_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_RELU_BYPASS__SHIFT) & DPU_BS_CFG_BS_RELU_BYPASS__MASK;
+}
+#define DPU_BS_CFG_BS_MUL_PRELU__MASK 0x00000020
+#define DPU_BS_CFG_BS_MUL_PRELU__SHIFT 5
+static inline uint32_t DPU_BS_CFG_BS_MUL_PRELU(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_MUL_PRELU__SHIFT) & DPU_BS_CFG_BS_MUL_PRELU__MASK;
+}
+#define DPU_BS_CFG_BS_MUL_BYPASS__MASK 0x00000010
+#define DPU_BS_CFG_BS_MUL_BYPASS__SHIFT 4
+static inline uint32_t DPU_BS_CFG_BS_MUL_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_MUL_BYPASS__SHIFT) & DPU_BS_CFG_BS_MUL_BYPASS__MASK;
+}
+#define DPU_BS_CFG_RESERVED_2__MASK 0x0000000c
+#define DPU_BS_CFG_RESERVED_2__SHIFT 2
+static inline uint32_t DPU_BS_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_RESERVED_2__SHIFT) & DPU_BS_CFG_RESERVED_2__MASK;
+}
+#define DPU_BS_CFG_BS_ALU_BYPASS__MASK 0x00000002
+#define DPU_BS_CFG_BS_ALU_BYPASS__SHIFT 1
+static inline uint32_t DPU_BS_CFG_BS_ALU_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_ALU_BYPASS__SHIFT) & DPU_BS_CFG_BS_ALU_BYPASS__MASK;
+}
+#define DPU_BS_CFG_BS_BYPASS__MASK 0x00000001
+#define DPU_BS_CFG_BS_BYPASS__SHIFT 0
+static inline uint32_t DPU_BS_CFG_BS_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BS_CFG_BS_BYPASS__SHIFT) & DPU_BS_CFG_BS_BYPASS__MASK;
+}
+
+#define REG_DPU_BS_ALU_CFG 0x00004044
+#define DPU_BS_ALU_CFG_BS_ALU_OPERAND__MASK 0xffffffff
+#define DPU_BS_ALU_CFG_BS_ALU_OPERAND__SHIFT 0
+static inline uint32_t DPU_BS_ALU_CFG_BS_ALU_OPERAND(uint32_t val)
+{
+ return ((val) << DPU_BS_ALU_CFG_BS_ALU_OPERAND__SHIFT) & DPU_BS_ALU_CFG_BS_ALU_OPERAND__MASK;
+}
+
+#define REG_DPU_BS_MUL_CFG 0x00004048
+#define DPU_BS_MUL_CFG_BS_MUL_OPERAND__MASK 0xffff0000
+#define DPU_BS_MUL_CFG_BS_MUL_OPERAND__SHIFT 16
+static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_OPERAND(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_BS_MUL_OPERAND__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_OPERAND__MASK;
+}
+#define DPU_BS_MUL_CFG_RESERVED_0__MASK 0x0000c000
+#define DPU_BS_MUL_CFG_RESERVED_0__SHIFT 14
+static inline uint32_t DPU_BS_MUL_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_RESERVED_0__SHIFT) & DPU_BS_MUL_CFG_RESERVED_0__MASK;
+}
+#define DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__MASK 0x00003f00
+#define DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__SHIFT 8
+static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__MASK;
+}
+#define DPU_BS_MUL_CFG_RESERVED_1__MASK 0x000000fc
+#define DPU_BS_MUL_CFG_RESERVED_1__SHIFT 2
+static inline uint32_t DPU_BS_MUL_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_RESERVED_1__SHIFT) & DPU_BS_MUL_CFG_RESERVED_1__MASK;
+}
+#define DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__MASK 0x00000002
+#define DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__SHIFT 1
+static inline uint32_t DPU_BS_MUL_CFG_BS_TRUNCATE_SRC(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__SHIFT) & DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__MASK;
+}
+#define DPU_BS_MUL_CFG_BS_MUL_SRC__MASK 0x00000001
+#define DPU_BS_MUL_CFG_BS_MUL_SRC__SHIFT 0
+static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_SRC(uint32_t val)
+{
+ return ((val) << DPU_BS_MUL_CFG_BS_MUL_SRC__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_SRC__MASK;
+}
+
+#define REG_DPU_BS_RELUX_CMP_VALUE 0x0000404c
+#define DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__MASK 0xffffffff
+#define DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__SHIFT 0
+static inline uint32_t DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT(uint32_t val)
+{
+ return ((val) << DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__SHIFT) & DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__MASK;
+}
+
+#define REG_DPU_BS_OW_CFG 0x00004050
+#define DPU_BS_OW_CFG_RGP_CNTER__MASK 0xf0000000
+#define DPU_BS_OW_CFG_RGP_CNTER__SHIFT 28
+static inline uint32_t DPU_BS_OW_CFG_RGP_CNTER(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_RGP_CNTER__SHIFT) & DPU_BS_OW_CFG_RGP_CNTER__MASK;
+}
+#define DPU_BS_OW_CFG_TP_ORG_EN__MASK 0x08000000
+#define DPU_BS_OW_CFG_TP_ORG_EN__SHIFT 27
+static inline uint32_t DPU_BS_OW_CFG_TP_ORG_EN(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_TP_ORG_EN__SHIFT) & DPU_BS_OW_CFG_TP_ORG_EN__MASK;
+}
+#define DPU_BS_OW_CFG_RESERVED_0__MASK 0x07fff800
+#define DPU_BS_OW_CFG_RESERVED_0__SHIFT 11
+static inline uint32_t DPU_BS_OW_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_RESERVED_0__SHIFT) & DPU_BS_OW_CFG_RESERVED_0__MASK;
+}
+#define DPU_BS_OW_CFG_SIZE_E_2__MASK 0x00000700
+#define DPU_BS_OW_CFG_SIZE_E_2__SHIFT 8
+static inline uint32_t DPU_BS_OW_CFG_SIZE_E_2(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_SIZE_E_2__SHIFT) & DPU_BS_OW_CFG_SIZE_E_2__MASK;
+}
+#define DPU_BS_OW_CFG_SIZE_E_1__MASK 0x000000e0
+#define DPU_BS_OW_CFG_SIZE_E_1__SHIFT 5
+static inline uint32_t DPU_BS_OW_CFG_SIZE_E_1(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_SIZE_E_1__SHIFT) & DPU_BS_OW_CFG_SIZE_E_1__MASK;
+}
+#define DPU_BS_OW_CFG_SIZE_E_0__MASK 0x0000001c
+#define DPU_BS_OW_CFG_SIZE_E_0__SHIFT 2
+static inline uint32_t DPU_BS_OW_CFG_SIZE_E_0(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_SIZE_E_0__SHIFT) & DPU_BS_OW_CFG_SIZE_E_0__MASK;
+}
+#define DPU_BS_OW_CFG_OD_BYPASS__MASK 0x00000002
+#define DPU_BS_OW_CFG_OD_BYPASS__SHIFT 1
+static inline uint32_t DPU_BS_OW_CFG_OD_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_OD_BYPASS__SHIFT) & DPU_BS_OW_CFG_OD_BYPASS__MASK;
+}
+#define DPU_BS_OW_CFG_OW_SRC__MASK 0x00000001
+#define DPU_BS_OW_CFG_OW_SRC__SHIFT 0
+static inline uint32_t DPU_BS_OW_CFG_OW_SRC(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_CFG_OW_SRC__SHIFT) & DPU_BS_OW_CFG_OW_SRC__MASK;
+}
+
+#define REG_DPU_BS_OW_OP 0x00004054
+#define DPU_BS_OW_OP_RESERVED_0__MASK 0xffff0000
+#define DPU_BS_OW_OP_RESERVED_0__SHIFT 16
+static inline uint32_t DPU_BS_OW_OP_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_OP_RESERVED_0__SHIFT) & DPU_BS_OW_OP_RESERVED_0__MASK;
+}
+#define DPU_BS_OW_OP_OW_OP__MASK 0x0000ffff
+#define DPU_BS_OW_OP_OW_OP__SHIFT 0
+static inline uint32_t DPU_BS_OW_OP_OW_OP(uint32_t val)
+{
+ return ((val) << DPU_BS_OW_OP_OW_OP__SHIFT) & DPU_BS_OW_OP_OW_OP__MASK;
+}
+
+#define REG_DPU_WDMA_SIZE_0 0x00004058
+#define DPU_WDMA_SIZE_0_RESERVED_0__MASK 0xf0000000
+#define DPU_WDMA_SIZE_0_RESERVED_0__SHIFT 28
+static inline uint32_t DPU_WDMA_SIZE_0_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_0_RESERVED_0__SHIFT) & DPU_WDMA_SIZE_0_RESERVED_0__MASK;
+}
+#define DPU_WDMA_SIZE_0_TP_PRECISION__MASK 0x08000000
+#define DPU_WDMA_SIZE_0_TP_PRECISION__SHIFT 27
+static inline uint32_t DPU_WDMA_SIZE_0_TP_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_0_TP_PRECISION__SHIFT) & DPU_WDMA_SIZE_0_TP_PRECISION__MASK;
+}
+#define DPU_WDMA_SIZE_0_SIZE_C_WDMA__MASK 0x07ff0000
+#define DPU_WDMA_SIZE_0_SIZE_C_WDMA__SHIFT 16
+static inline uint32_t DPU_WDMA_SIZE_0_SIZE_C_WDMA(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_0_SIZE_C_WDMA__SHIFT) & DPU_WDMA_SIZE_0_SIZE_C_WDMA__MASK;
+}
+#define DPU_WDMA_SIZE_0_RESERVED_1__MASK 0x0000e000
+#define DPU_WDMA_SIZE_0_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_WDMA_SIZE_0_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_0_RESERVED_1__SHIFT) & DPU_WDMA_SIZE_0_RESERVED_1__MASK;
+}
+#define DPU_WDMA_SIZE_0_CHANNEL_WDMA__MASK 0x00001fff
+#define DPU_WDMA_SIZE_0_CHANNEL_WDMA__SHIFT 0
+static inline uint32_t DPU_WDMA_SIZE_0_CHANNEL_WDMA(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_0_CHANNEL_WDMA__SHIFT) & DPU_WDMA_SIZE_0_CHANNEL_WDMA__MASK;
+}
+
+#define REG_DPU_WDMA_SIZE_1 0x0000405c
+#define DPU_WDMA_SIZE_1_RESERVED_0__MASK 0xe0000000
+#define DPU_WDMA_SIZE_1_RESERVED_0__SHIFT 29
+static inline uint32_t DPU_WDMA_SIZE_1_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_1_RESERVED_0__SHIFT) & DPU_WDMA_SIZE_1_RESERVED_0__MASK;
+}
+#define DPU_WDMA_SIZE_1_HEIGHT_WDMA__MASK 0x1fff0000
+#define DPU_WDMA_SIZE_1_HEIGHT_WDMA__SHIFT 16
+static inline uint32_t DPU_WDMA_SIZE_1_HEIGHT_WDMA(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_1_HEIGHT_WDMA__SHIFT) & DPU_WDMA_SIZE_1_HEIGHT_WDMA__MASK;
+}
+#define DPU_WDMA_SIZE_1_RESERVED_1__MASK 0x0000e000
+#define DPU_WDMA_SIZE_1_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_WDMA_SIZE_1_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_1_RESERVED_1__SHIFT) & DPU_WDMA_SIZE_1_RESERVED_1__MASK;
+}
+#define DPU_WDMA_SIZE_1_WIDTH_WDMA__MASK 0x00001fff
+#define DPU_WDMA_SIZE_1_WIDTH_WDMA__SHIFT 0
+static inline uint32_t DPU_WDMA_SIZE_1_WIDTH_WDMA(uint32_t val)
+{
+ return ((val) << DPU_WDMA_SIZE_1_WIDTH_WDMA__SHIFT) & DPU_WDMA_SIZE_1_WIDTH_WDMA__MASK;
+}
+
+#define REG_DPU_BN_CFG 0x00004060
+#define DPU_BN_CFG_RESERVED_0__MASK 0xfff00000
+#define DPU_BN_CFG_RESERVED_0__SHIFT 20
+static inline uint32_t DPU_BN_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_RESERVED_0__SHIFT) & DPU_BN_CFG_RESERVED_0__MASK;
+}
+#define DPU_BN_CFG_BN_ALU_ALGO__MASK 0x000f0000
+#define DPU_BN_CFG_BN_ALU_ALGO__SHIFT 16
+static inline uint32_t DPU_BN_CFG_BN_ALU_ALGO(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_ALU_ALGO__SHIFT) & DPU_BN_CFG_BN_ALU_ALGO__MASK;
+}
+#define DPU_BN_CFG_RESERVED_1__MASK 0x0000fe00
+#define DPU_BN_CFG_RESERVED_1__SHIFT 9
+static inline uint32_t DPU_BN_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_RESERVED_1__SHIFT) & DPU_BN_CFG_RESERVED_1__MASK;
+}
+#define DPU_BN_CFG_BN_ALU_SRC__MASK 0x00000100
+#define DPU_BN_CFG_BN_ALU_SRC__SHIFT 8
+static inline uint32_t DPU_BN_CFG_BN_ALU_SRC(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_ALU_SRC__SHIFT) & DPU_BN_CFG_BN_ALU_SRC__MASK;
+}
+#define DPU_BN_CFG_BN_RELUX_EN__MASK 0x00000080
+#define DPU_BN_CFG_BN_RELUX_EN__SHIFT 7
+static inline uint32_t DPU_BN_CFG_BN_RELUX_EN(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_RELUX_EN__SHIFT) & DPU_BN_CFG_BN_RELUX_EN__MASK;
+}
+#define DPU_BN_CFG_BN_RELU_BYPASS__MASK 0x00000040
+#define DPU_BN_CFG_BN_RELU_BYPASS__SHIFT 6
+static inline uint32_t DPU_BN_CFG_BN_RELU_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_RELU_BYPASS__SHIFT) & DPU_BN_CFG_BN_RELU_BYPASS__MASK;
+}
+#define DPU_BN_CFG_BN_MUL_PRELU__MASK 0x00000020
+#define DPU_BN_CFG_BN_MUL_PRELU__SHIFT 5
+static inline uint32_t DPU_BN_CFG_BN_MUL_PRELU(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_MUL_PRELU__SHIFT) & DPU_BN_CFG_BN_MUL_PRELU__MASK;
+}
+#define DPU_BN_CFG_BN_MUL_BYPASS__MASK 0x00000010
+#define DPU_BN_CFG_BN_MUL_BYPASS__SHIFT 4
+static inline uint32_t DPU_BN_CFG_BN_MUL_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_MUL_BYPASS__SHIFT) & DPU_BN_CFG_BN_MUL_BYPASS__MASK;
+}
+#define DPU_BN_CFG_RESERVED_2__MASK 0x0000000c
+#define DPU_BN_CFG_RESERVED_2__SHIFT 2
+static inline uint32_t DPU_BN_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_RESERVED_2__SHIFT) & DPU_BN_CFG_RESERVED_2__MASK;
+}
+#define DPU_BN_CFG_BN_ALU_BYPASS__MASK 0x00000002
+#define DPU_BN_CFG_BN_ALU_BYPASS__SHIFT 1
+static inline uint32_t DPU_BN_CFG_BN_ALU_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_ALU_BYPASS__SHIFT) & DPU_BN_CFG_BN_ALU_BYPASS__MASK;
+}
+#define DPU_BN_CFG_BN_BYPASS__MASK 0x00000001
+#define DPU_BN_CFG_BN_BYPASS__SHIFT 0
+static inline uint32_t DPU_BN_CFG_BN_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_BN_CFG_BN_BYPASS__SHIFT) & DPU_BN_CFG_BN_BYPASS__MASK;
+}
+
+#define REG_DPU_BN_ALU_CFG 0x00004064
+#define DPU_BN_ALU_CFG_BN_ALU_OPERAND__MASK 0xffffffff
+#define DPU_BN_ALU_CFG_BN_ALU_OPERAND__SHIFT 0
+static inline uint32_t DPU_BN_ALU_CFG_BN_ALU_OPERAND(uint32_t val)
+{
+ return ((val) << DPU_BN_ALU_CFG_BN_ALU_OPERAND__SHIFT) & DPU_BN_ALU_CFG_BN_ALU_OPERAND__MASK;
+}
+
+#define REG_DPU_BN_MUL_CFG 0x00004068
+#define DPU_BN_MUL_CFG_BN_MUL_OPERAND__MASK 0xffff0000
+#define DPU_BN_MUL_CFG_BN_MUL_OPERAND__SHIFT 16
+static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_OPERAND(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_BN_MUL_OPERAND__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_OPERAND__MASK;
+}
+#define DPU_BN_MUL_CFG_RESERVED_0__MASK 0x0000c000
+#define DPU_BN_MUL_CFG_RESERVED_0__SHIFT 14
+static inline uint32_t DPU_BN_MUL_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_RESERVED_0__SHIFT) & DPU_BN_MUL_CFG_RESERVED_0__MASK;
+}
+#define DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__MASK 0x00003f00
+#define DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__SHIFT 8
+static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__MASK;
+}
+#define DPU_BN_MUL_CFG_RESERVED_1__MASK 0x000000fc
+#define DPU_BN_MUL_CFG_RESERVED_1__SHIFT 2
+static inline uint32_t DPU_BN_MUL_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_RESERVED_1__SHIFT) & DPU_BN_MUL_CFG_RESERVED_1__MASK;
+}
+#define DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__MASK 0x00000002
+#define DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__SHIFT 1
+static inline uint32_t DPU_BN_MUL_CFG_BN_TRUNCATE_SRC(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__SHIFT) & DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__MASK;
+}
+#define DPU_BN_MUL_CFG_BN_MUL_SRC__MASK 0x00000001
+#define DPU_BN_MUL_CFG_BN_MUL_SRC__SHIFT 0
+static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_SRC(uint32_t val)
+{
+ return ((val) << DPU_BN_MUL_CFG_BN_MUL_SRC__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_SRC__MASK;
+}
+
+#define REG_DPU_BN_RELUX_CMP_VALUE 0x0000406c
+#define DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__MASK 0xffffffff
+#define DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__SHIFT 0
+static inline uint32_t DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT(uint32_t val)
+{
+ return ((val) << DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__SHIFT) & DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__MASK;
+}
+
+#define REG_DPU_EW_CFG 0x00004070
+#define DPU_EW_CFG_EW_CVT_TYPE__MASK 0x80000000
+#define DPU_EW_CFG_EW_CVT_TYPE__SHIFT 31
+static inline uint32_t DPU_EW_CFG_EW_CVT_TYPE(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_CVT_TYPE__SHIFT) & DPU_EW_CFG_EW_CVT_TYPE__MASK;
+}
+#define DPU_EW_CFG_EW_CVT_ROUND__MASK 0x40000000
+#define DPU_EW_CFG_EW_CVT_ROUND__SHIFT 30
+static inline uint32_t DPU_EW_CFG_EW_CVT_ROUND(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_CVT_ROUND__SHIFT) & DPU_EW_CFG_EW_CVT_ROUND__MASK;
+}
+#define DPU_EW_CFG_EW_DATA_MODE__MASK 0x30000000
+#define DPU_EW_CFG_EW_DATA_MODE__SHIFT 28
+static inline uint32_t DPU_EW_CFG_EW_DATA_MODE(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_DATA_MODE__SHIFT) & DPU_EW_CFG_EW_DATA_MODE__MASK;
+}
+#define DPU_EW_CFG_RESERVED_0__MASK 0x0f000000
+#define DPU_EW_CFG_RESERVED_0__SHIFT 24
+static inline uint32_t DPU_EW_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_RESERVED_0__SHIFT) & DPU_EW_CFG_RESERVED_0__MASK;
+}
+#define DPU_EW_CFG_EDATA_SIZE__MASK 0x00c00000
+#define DPU_EW_CFG_EDATA_SIZE__SHIFT 22
+static inline uint32_t DPU_EW_CFG_EDATA_SIZE(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EDATA_SIZE__SHIFT) & DPU_EW_CFG_EDATA_SIZE__MASK;
+}
+#define DPU_EW_CFG_EW_EQUAL_EN__MASK 0x00200000
+#define DPU_EW_CFG_EW_EQUAL_EN__SHIFT 21
+static inline uint32_t DPU_EW_CFG_EW_EQUAL_EN(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_EQUAL_EN__SHIFT) & DPU_EW_CFG_EW_EQUAL_EN__MASK;
+}
+#define DPU_EW_CFG_EW_BINARY_EN__MASK 0x00100000
+#define DPU_EW_CFG_EW_BINARY_EN__SHIFT 20
+static inline uint32_t DPU_EW_CFG_EW_BINARY_EN(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_BINARY_EN__SHIFT) & DPU_EW_CFG_EW_BINARY_EN__MASK;
+}
+#define DPU_EW_CFG_EW_ALU_ALGO__MASK 0x000f0000
+#define DPU_EW_CFG_EW_ALU_ALGO__SHIFT 16
+static inline uint32_t DPU_EW_CFG_EW_ALU_ALGO(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_ALU_ALGO__SHIFT) & DPU_EW_CFG_EW_ALU_ALGO__MASK;
+}
+#define DPU_EW_CFG_RESERVED_1__MASK 0x0000f800
+#define DPU_EW_CFG_RESERVED_1__SHIFT 11
+static inline uint32_t DPU_EW_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_RESERVED_1__SHIFT) & DPU_EW_CFG_RESERVED_1__MASK;
+}
+#define DPU_EW_CFG_EW_RELUX_EN__MASK 0x00000400
+#define DPU_EW_CFG_EW_RELUX_EN__SHIFT 10
+static inline uint32_t DPU_EW_CFG_EW_RELUX_EN(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_RELUX_EN__SHIFT) & DPU_EW_CFG_EW_RELUX_EN__MASK;
+}
+#define DPU_EW_CFG_EW_RELU_BYPASS__MASK 0x00000200
+#define DPU_EW_CFG_EW_RELU_BYPASS__SHIFT 9
+static inline uint32_t DPU_EW_CFG_EW_RELU_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_RELU_BYPASS__SHIFT) & DPU_EW_CFG_EW_RELU_BYPASS__MASK;
+}
+#define DPU_EW_CFG_EW_OP_CVT_BYPASS__MASK 0x00000100
+#define DPU_EW_CFG_EW_OP_CVT_BYPASS__SHIFT 8
+static inline uint32_t DPU_EW_CFG_EW_OP_CVT_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_OP_CVT_BYPASS__SHIFT) & DPU_EW_CFG_EW_OP_CVT_BYPASS__MASK;
+}
+#define DPU_EW_CFG_EW_LUT_BYPASS__MASK 0x00000080
+#define DPU_EW_CFG_EW_LUT_BYPASS__SHIFT 7
+static inline uint32_t DPU_EW_CFG_EW_LUT_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_LUT_BYPASS__SHIFT) & DPU_EW_CFG_EW_LUT_BYPASS__MASK;
+}
+#define DPU_EW_CFG_EW_OP_SRC__MASK 0x00000040
+#define DPU_EW_CFG_EW_OP_SRC__SHIFT 6
+static inline uint32_t DPU_EW_CFG_EW_OP_SRC(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_OP_SRC__SHIFT) & DPU_EW_CFG_EW_OP_SRC__MASK;
+}
+#define DPU_EW_CFG_EW_MUL_PRELU__MASK 0x00000020
+#define DPU_EW_CFG_EW_MUL_PRELU__SHIFT 5
+static inline uint32_t DPU_EW_CFG_EW_MUL_PRELU(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_MUL_PRELU__SHIFT) & DPU_EW_CFG_EW_MUL_PRELU__MASK;
+}
+#define DPU_EW_CFG_RESERVED_2__MASK 0x00000018
+#define DPU_EW_CFG_RESERVED_2__SHIFT 3
+static inline uint32_t DPU_EW_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_RESERVED_2__SHIFT) & DPU_EW_CFG_RESERVED_2__MASK;
+}
+#define DPU_EW_CFG_EW_OP_TYPE__MASK 0x00000004
+#define DPU_EW_CFG_EW_OP_TYPE__SHIFT 2
+static inline uint32_t DPU_EW_CFG_EW_OP_TYPE(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_OP_TYPE__SHIFT) & DPU_EW_CFG_EW_OP_TYPE__MASK;
+}
+#define DPU_EW_CFG_EW_OP_BYPASS__MASK 0x00000002
+#define DPU_EW_CFG_EW_OP_BYPASS__SHIFT 1
+static inline uint32_t DPU_EW_CFG_EW_OP_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_OP_BYPASS__SHIFT) & DPU_EW_CFG_EW_OP_BYPASS__MASK;
+}
+#define DPU_EW_CFG_EW_BYPASS__MASK 0x00000001
+#define DPU_EW_CFG_EW_BYPASS__SHIFT 0
+static inline uint32_t DPU_EW_CFG_EW_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_EW_CFG_EW_BYPASS__SHIFT) & DPU_EW_CFG_EW_BYPASS__MASK;
+}
+
+#define REG_DPU_EW_CVT_OFFSET_VALUE 0x00004074
+#define DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__MASK 0xffffffff
+#define DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__SHIFT 0
+static inline uint32_t DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET(uint32_t val)
+{
+ return ((val) << DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__SHIFT) & DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__MASK;
+}
+
+#define REG_DPU_EW_CVT_SCALE_VALUE 0x00004078
+#define DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__MASK 0xffc00000
+#define DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__SHIFT 22
+static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE(uint32_t val)
+{
+ return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__MASK;
+}
+#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__MASK 0x003f0000
+#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__SHIFT 16
+static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__MASK;
+}
+#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__MASK 0x0000ffff
+#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__SHIFT 0
+static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE(uint32_t val)
+{
+ return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__MASK;
+}
+
+#define REG_DPU_EW_RELUX_CMP_VALUE 0x0000407c
+#define DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__MASK 0xffffffff
+#define DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__SHIFT 0
+static inline uint32_t DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT(uint32_t val)
+{
+ return ((val) << DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__SHIFT) & DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__MASK;
+}
+
+#define REG_DPU_OUT_CVT_OFFSET 0x00004080
+#define DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__MASK 0xffffffff
+#define DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__SHIFT 0
+static inline uint32_t DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__SHIFT) & DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__MASK;
+}
+
+#define REG_DPU_OUT_CVT_SCALE 0x00004084
+#define DPU_OUT_CVT_SCALE_RESERVED_0__MASK 0xfffe0000
+#define DPU_OUT_CVT_SCALE_RESERVED_0__SHIFT 17
+static inline uint32_t DPU_OUT_CVT_SCALE_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SCALE_RESERVED_0__SHIFT) & DPU_OUT_CVT_SCALE_RESERVED_0__MASK;
+}
+#define DPU_OUT_CVT_SCALE_FP32TOFP16_EN__MASK 0x00010000
+#define DPU_OUT_CVT_SCALE_FP32TOFP16_EN__SHIFT 16
+static inline uint32_t DPU_OUT_CVT_SCALE_FP32TOFP16_EN(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SCALE_FP32TOFP16_EN__SHIFT) & DPU_OUT_CVT_SCALE_FP32TOFP16_EN__MASK;
+}
+#define DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__MASK 0x0000ffff
+#define DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__SHIFT 0
+static inline uint32_t DPU_OUT_CVT_SCALE_OUT_CVT_SCALE(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__SHIFT) & DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__MASK;
+}
+
+#define REG_DPU_OUT_CVT_SHIFT 0x00004088
+#define DPU_OUT_CVT_SHIFT_CVT_TYPE__MASK 0x80000000
+#define DPU_OUT_CVT_SHIFT_CVT_TYPE__SHIFT 31
+static inline uint32_t DPU_OUT_CVT_SHIFT_CVT_TYPE(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SHIFT_CVT_TYPE__SHIFT) & DPU_OUT_CVT_SHIFT_CVT_TYPE__MASK;
+}
+#define DPU_OUT_CVT_SHIFT_CVT_ROUND__MASK 0x40000000
+#define DPU_OUT_CVT_SHIFT_CVT_ROUND__SHIFT 30
+static inline uint32_t DPU_OUT_CVT_SHIFT_CVT_ROUND(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SHIFT_CVT_ROUND__SHIFT) & DPU_OUT_CVT_SHIFT_CVT_ROUND__MASK;
+}
+#define DPU_OUT_CVT_SHIFT_RESERVED_0__MASK 0x3ff00000
+#define DPU_OUT_CVT_SHIFT_RESERVED_0__SHIFT 20
+static inline uint32_t DPU_OUT_CVT_SHIFT_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SHIFT_RESERVED_0__SHIFT) & DPU_OUT_CVT_SHIFT_RESERVED_0__MASK;
+}
+#define DPU_OUT_CVT_SHIFT_MINUS_EXP__MASK 0x000ff000
+#define DPU_OUT_CVT_SHIFT_MINUS_EXP__SHIFT 12
+static inline uint32_t DPU_OUT_CVT_SHIFT_MINUS_EXP(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SHIFT_MINUS_EXP__SHIFT) & DPU_OUT_CVT_SHIFT_MINUS_EXP__MASK;
+}
+#define DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__MASK 0x00000fff
+#define DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__SHIFT 0
+static inline uint32_t DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__SHIFT) & DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_0 0x00004090
+#define DPU_EW_OP_VALUE_0_EW_OPERAND_0__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_0_EW_OPERAND_0__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_0_EW_OPERAND_0(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_0_EW_OPERAND_0__SHIFT) & DPU_EW_OP_VALUE_0_EW_OPERAND_0__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_1 0x00004094
+#define DPU_EW_OP_VALUE_1_EW_OPERAND_1__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_1_EW_OPERAND_1__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_1_EW_OPERAND_1(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_1_EW_OPERAND_1__SHIFT) & DPU_EW_OP_VALUE_1_EW_OPERAND_1__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_2 0x00004098
+#define DPU_EW_OP_VALUE_2_EW_OPERAND_2__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_2_EW_OPERAND_2__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_2_EW_OPERAND_2(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_2_EW_OPERAND_2__SHIFT) & DPU_EW_OP_VALUE_2_EW_OPERAND_2__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_3 0x0000409c
+#define DPU_EW_OP_VALUE_3_EW_OPERAND_3__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_3_EW_OPERAND_3__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_3_EW_OPERAND_3(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_3_EW_OPERAND_3__SHIFT) & DPU_EW_OP_VALUE_3_EW_OPERAND_3__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_4 0x000040a0
+#define DPU_EW_OP_VALUE_4_EW_OPERAND_4__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_4_EW_OPERAND_4__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_4_EW_OPERAND_4(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_4_EW_OPERAND_4__SHIFT) & DPU_EW_OP_VALUE_4_EW_OPERAND_4__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_5 0x000040a4
+#define DPU_EW_OP_VALUE_5_EW_OPERAND_5__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_5_EW_OPERAND_5__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_5_EW_OPERAND_5(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_5_EW_OPERAND_5__SHIFT) & DPU_EW_OP_VALUE_5_EW_OPERAND_5__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_6 0x000040a8
+#define DPU_EW_OP_VALUE_6_EW_OPERAND_6__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_6_EW_OPERAND_6__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_6_EW_OPERAND_6(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_6_EW_OPERAND_6__SHIFT) & DPU_EW_OP_VALUE_6_EW_OPERAND_6__MASK;
+}
+
+#define REG_DPU_EW_OP_VALUE_7 0x000040ac
+#define DPU_EW_OP_VALUE_7_EW_OPERAND_7__MASK 0xffffffff
+#define DPU_EW_OP_VALUE_7_EW_OPERAND_7__SHIFT 0
+static inline uint32_t DPU_EW_OP_VALUE_7_EW_OPERAND_7(uint32_t val)
+{
+ return ((val) << DPU_EW_OP_VALUE_7_EW_OPERAND_7__SHIFT) & DPU_EW_OP_VALUE_7_EW_OPERAND_7__MASK;
+}
+
+#define REG_DPU_SURFACE_ADD 0x000040c0
+#define DPU_SURFACE_ADD_SURF_ADD__MASK 0xfffffff0
+#define DPU_SURFACE_ADD_SURF_ADD__SHIFT 4
+static inline uint32_t DPU_SURFACE_ADD_SURF_ADD(uint32_t val)
+{
+ return ((val) << DPU_SURFACE_ADD_SURF_ADD__SHIFT) & DPU_SURFACE_ADD_SURF_ADD__MASK;
+}
+#define DPU_SURFACE_ADD_RESERVED_0__MASK 0x0000000f
+#define DPU_SURFACE_ADD_RESERVED_0__SHIFT 0
+static inline uint32_t DPU_SURFACE_ADD_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_SURFACE_ADD_RESERVED_0__SHIFT) & DPU_SURFACE_ADD_RESERVED_0__MASK;
+}
+
+#define REG_DPU_LUT_ACCESS_CFG 0x00004100
+#define DPU_LUT_ACCESS_CFG_RESERVED_0__MASK 0xfffc0000
+#define DPU_LUT_ACCESS_CFG_RESERVED_0__SHIFT 18
+static inline uint32_t DPU_LUT_ACCESS_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_CFG_RESERVED_0__SHIFT) & DPU_LUT_ACCESS_CFG_RESERVED_0__MASK;
+}
+#define DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__MASK 0x00020000
+#define DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__SHIFT 17
+static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__MASK;
+}
+#define DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__MASK 0x00010000
+#define DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__SHIFT 16
+static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_TABLE_ID(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__MASK;
+}
+#define DPU_LUT_ACCESS_CFG_RESERVED_1__MASK 0x0000fc00
+#define DPU_LUT_ACCESS_CFG_RESERVED_1__SHIFT 10
+static inline uint32_t DPU_LUT_ACCESS_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_CFG_RESERVED_1__SHIFT) & DPU_LUT_ACCESS_CFG_RESERVED_1__MASK;
+}
+#define DPU_LUT_ACCESS_CFG_LUT_ADDR__MASK 0x000003ff
+#define DPU_LUT_ACCESS_CFG_LUT_ADDR__SHIFT 0
+static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_ADDR(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_CFG_LUT_ADDR__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_ADDR__MASK;
+}
+
+#define REG_DPU_LUT_ACCESS_DATA 0x00004104
+#define DPU_LUT_ACCESS_DATA_RESERVED_0__MASK 0xffff0000
+#define DPU_LUT_ACCESS_DATA_RESERVED_0__SHIFT 16
+static inline uint32_t DPU_LUT_ACCESS_DATA_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_DATA_RESERVED_0__SHIFT) & DPU_LUT_ACCESS_DATA_RESERVED_0__MASK;
+}
+#define DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__MASK 0x0000ffff
+#define DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__SHIFT 0
+static inline uint32_t DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA(uint32_t val)
+{
+ return ((val) << DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__SHIFT) & DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__MASK;
+}
+
+#define REG_DPU_LUT_CFG 0x00004108
+#define DPU_LUT_CFG_RESERVED_0__MASK 0xffffff00
+#define DPU_LUT_CFG_RESERVED_0__SHIFT 8
+static inline uint32_t DPU_LUT_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_RESERVED_0__SHIFT) & DPU_LUT_CFG_RESERVED_0__MASK;
+}
+#define DPU_LUT_CFG_LUT_CAL_SEL__MASK 0x00000080
+#define DPU_LUT_CFG_LUT_CAL_SEL__SHIFT 7
+static inline uint32_t DPU_LUT_CFG_LUT_CAL_SEL(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_CAL_SEL__SHIFT) & DPU_LUT_CFG_LUT_CAL_SEL__MASK;
+}
+#define DPU_LUT_CFG_LUT_HYBRID_PRIORITY__MASK 0x00000040
+#define DPU_LUT_CFG_LUT_HYBRID_PRIORITY__SHIFT 6
+static inline uint32_t DPU_LUT_CFG_LUT_HYBRID_PRIORITY(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_HYBRID_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_HYBRID_PRIORITY__MASK;
+}
+#define DPU_LUT_CFG_LUT_OFLOW_PRIORITY__MASK 0x00000020
+#define DPU_LUT_CFG_LUT_OFLOW_PRIORITY__SHIFT 5
+static inline uint32_t DPU_LUT_CFG_LUT_OFLOW_PRIORITY(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_OFLOW_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_OFLOW_PRIORITY__MASK;
+}
+#define DPU_LUT_CFG_LUT_UFLOW_PRIORITY__MASK 0x00000010
+#define DPU_LUT_CFG_LUT_UFLOW_PRIORITY__SHIFT 4
+static inline uint32_t DPU_LUT_CFG_LUT_UFLOW_PRIORITY(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_UFLOW_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_UFLOW_PRIORITY__MASK;
+}
+#define DPU_LUT_CFG_LUT_LO_LE_MUX__MASK 0x0000000c
+#define DPU_LUT_CFG_LUT_LO_LE_MUX__SHIFT 2
+static inline uint32_t DPU_LUT_CFG_LUT_LO_LE_MUX(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_LO_LE_MUX__SHIFT) & DPU_LUT_CFG_LUT_LO_LE_MUX__MASK;
+}
+#define DPU_LUT_CFG_LUT_EXPAND_EN__MASK 0x00000002
+#define DPU_LUT_CFG_LUT_EXPAND_EN__SHIFT 1
+static inline uint32_t DPU_LUT_CFG_LUT_EXPAND_EN(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_EXPAND_EN__SHIFT) & DPU_LUT_CFG_LUT_EXPAND_EN__MASK;
+}
+#define DPU_LUT_CFG_LUT_ROAD_SEL__MASK 0x00000001
+#define DPU_LUT_CFG_LUT_ROAD_SEL__SHIFT 0
+static inline uint32_t DPU_LUT_CFG_LUT_ROAD_SEL(uint32_t val)
+{
+ return ((val) << DPU_LUT_CFG_LUT_ROAD_SEL__SHIFT) & DPU_LUT_CFG_LUT_ROAD_SEL__MASK;
+}
+
+#define REG_DPU_LUT_INFO 0x0000410c
+#define DPU_LUT_INFO_RESERVED_0__MASK 0xff000000
+#define DPU_LUT_INFO_RESERVED_0__SHIFT 24
+static inline uint32_t DPU_LUT_INFO_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_INFO_RESERVED_0__SHIFT) & DPU_LUT_INFO_RESERVED_0__MASK;
+}
+#define DPU_LUT_INFO_LUT_LO_INDEX_SELECT__MASK 0x00ff0000
+#define DPU_LUT_INFO_LUT_LO_INDEX_SELECT__SHIFT 16
+static inline uint32_t DPU_LUT_INFO_LUT_LO_INDEX_SELECT(uint32_t val)
+{
+ return ((val) << DPU_LUT_INFO_LUT_LO_INDEX_SELECT__SHIFT) & DPU_LUT_INFO_LUT_LO_INDEX_SELECT__MASK;
+}
+#define DPU_LUT_INFO_LUT_LE_INDEX_SELECT__MASK 0x0000ff00
+#define DPU_LUT_INFO_LUT_LE_INDEX_SELECT__SHIFT 8
+static inline uint32_t DPU_LUT_INFO_LUT_LE_INDEX_SELECT(uint32_t val)
+{
+ return ((val) << DPU_LUT_INFO_LUT_LE_INDEX_SELECT__SHIFT) & DPU_LUT_INFO_LUT_LE_INDEX_SELECT__MASK;
+}
+#define DPU_LUT_INFO_RESERVED_1__MASK 0x000000ff
+#define DPU_LUT_INFO_RESERVED_1__SHIFT 0
+static inline uint32_t DPU_LUT_INFO_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_LUT_INFO_RESERVED_1__SHIFT) & DPU_LUT_INFO_RESERVED_1__MASK;
+}
+
+#define REG_DPU_LUT_LE_START 0x00004110
+#define DPU_LUT_LE_START_LUT_LE_START__MASK 0xffffffff
+#define DPU_LUT_LE_START_LUT_LE_START__SHIFT 0
+static inline uint32_t DPU_LUT_LE_START_LUT_LE_START(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_START_LUT_LE_START__SHIFT) & DPU_LUT_LE_START_LUT_LE_START__MASK;
+}
+
+#define REG_DPU_LUT_LE_END 0x00004114
+#define DPU_LUT_LE_END_LUT_LE_END__MASK 0xffffffff
+#define DPU_LUT_LE_END_LUT_LE_END__SHIFT 0
+static inline uint32_t DPU_LUT_LE_END_LUT_LE_END(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_END_LUT_LE_END__SHIFT) & DPU_LUT_LE_END_LUT_LE_END__MASK;
+}
+
+#define REG_DPU_LUT_LO_START 0x00004118
+#define DPU_LUT_LO_START_LUT_LO_START__MASK 0xffffffff
+#define DPU_LUT_LO_START_LUT_LO_START__SHIFT 0
+static inline uint32_t DPU_LUT_LO_START_LUT_LO_START(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_START_LUT_LO_START__SHIFT) & DPU_LUT_LO_START_LUT_LO_START__MASK;
+}
+
+#define REG_DPU_LUT_LO_END 0x0000411c
+#define DPU_LUT_LO_END_LUT_LO_END__MASK 0xffffffff
+#define DPU_LUT_LO_END_LUT_LO_END__SHIFT 0
+static inline uint32_t DPU_LUT_LO_END_LUT_LO_END(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_END_LUT_LO_END__SHIFT) & DPU_LUT_LO_END_LUT_LO_END__MASK;
+}
+
+#define REG_DPU_LUT_LE_SLOPE_SCALE 0x00004120
+#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__MASK 0xffff0000
+#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__SHIFT 16
+static inline uint32_t DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__SHIFT) & DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__MASK;
+}
+#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__MASK 0x0000ffff
+#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__SHIFT 0
+static inline uint32_t DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__SHIFT) & DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__MASK;
+}
+
+#define REG_DPU_LUT_LE_SLOPE_SHIFT 0x00004124
+#define DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__MASK 0xfffffc00
+#define DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__SHIFT 10
+static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__MASK;
+}
+#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__MASK 0x000003e0
+#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__SHIFT 5
+static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__MASK;
+}
+#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__MASK 0x0000001f
+#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__SHIFT 0
+static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__MASK;
+}
+
+#define REG_DPU_LUT_LO_SLOPE_SCALE 0x00004128
+#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__MASK 0xffff0000
+#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__SHIFT 16
+static inline uint32_t DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__SHIFT) & DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__MASK;
+}
+#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__MASK 0x0000ffff
+#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__SHIFT 0
+static inline uint32_t DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__SHIFT) & DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__MASK;
+}
+
+#define REG_DPU_LUT_LO_SLOPE_SHIFT 0x0000412c
+#define DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__MASK 0xfffffc00
+#define DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__SHIFT 10
+static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__MASK;
+}
+#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__MASK 0x000003e0
+#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__SHIFT 5
+static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__MASK;
+}
+#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__MASK 0x0000001f
+#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__SHIFT 0
+static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT(uint32_t val)
+{
+ return ((val) << DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_S_STATUS 0x00005000
+#define DPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define DPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t DPU_RDMA_RDMA_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK 0x00030000
+#define DPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t DPU_RDMA_RDMA_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT) & DPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK;
+}
+#define DPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define DPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t DPU_RDMA_RDMA_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK;
+}
+#define DPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK 0x00000003
+#define DPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT) & DPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_S_POINTER 0x00005004
+#define DPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define DPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK 0x00010000
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define DPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define DPU_RDMA_RDMA_S_POINTER_POINTER__MASK 0x00000001
+#define DPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_OPERATION_ENABLE 0x00005008
+#define DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT) & DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_DATA_CUBE_WIDTH 0x0000500c
+#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__MASK 0xffffe000
+#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__SHIFT 13
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__MASK 0x00001fff
+#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_DATA_CUBE_HEIGHT 0x00005010
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__MASK 0xe0000000
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT 29
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__MASK 0x1fff0000
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__SHIFT 16
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__MASK;
+}
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__MASK 0x0000e000
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT 13
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__MASK;
+}
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__MASK 0x00001fff
+#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_DATA_CUBE_CHANNEL 0x00005014
+#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__MASK 0xffffe000
+#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT 13
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__MASK 0x00001fff
+#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_SRC_BASE_ADDR 0x00005018
+#define DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK 0xffffffff
+#define DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_BRDMA_CFG 0x0000501c
+#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__MASK 0xffffffe0
+#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__SHIFT 5
+static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__MASK 0x0000001e
+#define DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__MASK;
+}
+#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__MASK 0x00000001
+#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_BS_BASE_ADDR 0x00005020
+#define DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__MASK 0xffffffff
+#define DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_NRDMA_CFG 0x00005028
+#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__MASK 0xffffffe0
+#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__SHIFT 5
+static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__MASK 0x0000001e
+#define DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__MASK;
+}
+#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__MASK 0x00000001
+#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_BN_BASE_ADDR 0x0000502c
+#define DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__MASK 0xffffffff
+#define DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_ERDMA_CFG 0x00005034
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__MASK 0xc0000000
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__SHIFT 30
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__MASK 0x20000000
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__SHIFT 29
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__MASK 0x10000000
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__SHIFT 28
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__MASK 0x0ffffff0
+#define DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__MASK 0x0000000c
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__SHIFT 2
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__MASK 0x00000002
+#define DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__MASK;
+}
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__MASK 0x00000001
+#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_EW_BASE_ADDR 0x00005038
+#define DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__MASK 0xffffffff
+#define DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_EW_SURF_STRIDE 0x00005040
+#define DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__MASK 0xfffffff0
+#define DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__SHIFT) & DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__MASK;
+}
+#define DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__MASK 0x0000000f
+#define DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_FEATURE_MODE_CFG 0x00005044
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__MASK 0xfffc0000
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__SHIFT 18
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__MASK 0x00038000
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__SHIFT 15
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__MASK 0x00007800
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__SHIFT 11
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__MASK 0x00000700
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__SHIFT 8
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__MASK 0x000000e0
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__SHIFT 5
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__MASK 0x00000010
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__MASK 0x00000008
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__SHIFT 3
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__MASK 0x00000006
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__SHIFT 1
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__MASK;
+}
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__MASK 0x00000001
+#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_SRC_DMA_CFG 0x00005048
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__MASK 0xfff80000
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__SHIFT 19
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__MASK 0x0007c000
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__SHIFT 14
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__MASK 0x00002000
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__SHIFT 13
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__MASK 0x00001000
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__SHIFT 12
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__MASK 0x00000e00
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__SHIFT 9
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__MASK 0x000001c0
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__SHIFT 6
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__MASK 0x00000038
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__SHIFT 3
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__MASK;
+}
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__MASK 0x00000007
+#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_SURF_NOTCH 0x0000504c
+#define DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__MASK 0xfffffff0
+#define DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__MASK;
+}
+#define DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__MASK 0x0000000f
+#define DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_PAD_CFG 0x00005064
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__MASK 0xffff0000
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__SHIFT 16
+static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__MASK;
+}
+#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__MASK 0x0000ff80
+#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__SHIFT 7
+static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__MASK;
+}
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__MASK 0x00000070
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_TOP(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__MASK;
+}
+#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__MASK 0x00000008
+#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__SHIFT 3
+static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__MASK;
+}
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__MASK 0x00000007
+#define DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_WEIGHT 0x00005068
+#define DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__MASK 0xff000000
+#define DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__SHIFT 24
+static inline uint32_t DPU_RDMA_RDMA_WEIGHT_E_WEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__MASK;
+}
+#define DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__MASK 0x00ff0000
+#define DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__SHIFT 16
+static inline uint32_t DPU_RDMA_RDMA_WEIGHT_N_WEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__MASK;
+}
+#define DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__MASK 0x0000ff00
+#define DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__SHIFT 8
+static inline uint32_t DPU_RDMA_RDMA_WEIGHT_B_WEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__MASK;
+}
+#define DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__MASK 0x000000ff
+#define DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_WEIGHT_M_WEIGHT(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__MASK;
+}
+
+#define REG_DPU_RDMA_RDMA_EW_SURF_NOTCH 0x0000506c
+#define DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__MASK 0xfffffff0
+#define DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__SHIFT 4
+static inline uint32_t DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__SHIFT) & DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__MASK;
+}
+#define DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__MASK 0x0000000f
+#define DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__SHIFT 0
+static inline uint32_t DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0(uint32_t val)
+{
+ return ((val) << DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__MASK;
+}
+
+#define REG_PPU_S_STATUS 0x00006000
+#define PPU_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define PPU_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t PPU_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_S_STATUS_RESERVED_0__SHIFT) & PPU_S_STATUS_RESERVED_0__MASK;
+}
+#define PPU_S_STATUS_STATUS_1__MASK 0x00030000
+#define PPU_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t PPU_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << PPU_S_STATUS_STATUS_1__SHIFT) & PPU_S_STATUS_STATUS_1__MASK;
+}
+#define PPU_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define PPU_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t PPU_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_S_STATUS_RESERVED_1__SHIFT) & PPU_S_STATUS_RESERVED_1__MASK;
+}
+#define PPU_S_STATUS_STATUS_0__MASK 0x00000003
+#define PPU_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t PPU_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << PPU_S_STATUS_STATUS_0__SHIFT) & PPU_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_PPU_S_POINTER 0x00006004
+#define PPU_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define PPU_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t PPU_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_RESERVED_0__SHIFT) & PPU_S_POINTER_RESERVED_0__MASK;
+}
+#define PPU_S_POINTER_EXECUTER__MASK 0x00010000
+#define PPU_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t PPU_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_EXECUTER__SHIFT) & PPU_S_POINTER_EXECUTER__MASK;
+}
+#define PPU_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define PPU_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t PPU_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_RESERVED_1__SHIFT) & PPU_S_POINTER_RESERVED_1__MASK;
+}
+#define PPU_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define PPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t PPU_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & PPU_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define PPU_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define PPU_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t PPU_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_POINTER_PP_CLEAR__SHIFT) & PPU_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define PPU_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define PPU_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t PPU_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_POINTER_PP_MODE__SHIFT) & PPU_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define PPU_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define PPU_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t PPU_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_EXECUTER_PP_EN__SHIFT) & PPU_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define PPU_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define PPU_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t PPU_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_POINTER_PP_EN__SHIFT) & PPU_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define PPU_S_POINTER_POINTER__MASK 0x00000001
+#define PPU_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t PPU_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << PPU_S_POINTER_POINTER__SHIFT) & PPU_S_POINTER_POINTER__MASK;
+}
+
+#define REG_PPU_OPERATION_ENABLE 0x00006008
+#define PPU_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define PPU_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t PPU_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_ENABLE_RESERVED_0__SHIFT) & PPU_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define PPU_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define PPU_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t PPU_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_ENABLE_OP_EN__SHIFT) & PPU_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_IN_WIDTH 0x0000600c
+#define PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_IN_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK 0x00001fff
+#define PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT) & PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_IN_HEIGHT 0x00006010
+#define PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK 0x00001fff
+#define PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT) & PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_IN_CHANNEL 0x00006014
+#define PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK 0x00001fff
+#define PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT) & PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_OUT_WIDTH 0x00006018
+#define PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__MASK 0x00001fff
+#define PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__SHIFT) & PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_OUT_HEIGHT 0x0000601c
+#define PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__MASK 0x00001fff
+#define PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__SHIFT) & PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__MASK;
+}
+
+#define REG_PPU_DATA_CUBE_OUT_CHANNEL 0x00006020
+#define PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__MASK 0xffffe000
+#define PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__MASK;
+}
+#define PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__MASK 0x00001fff
+#define PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__SHIFT 0
+static inline uint32_t PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL(uint32_t val)
+{
+ return ((val) << PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__SHIFT) & PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__MASK;
+}
+
+#define REG_PPU_OPERATION_MODE_CFG 0x00006024
+#define PPU_OPERATION_MODE_CFG_RESERVED_0__MASK 0x80000000
+#define PPU_OPERATION_MODE_CFG_RESERVED_0__SHIFT 31
+static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_0__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_0__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_INDEX_EN__MASK 0x40000000
+#define PPU_OPERATION_MODE_CFG_INDEX_EN__SHIFT 30
+static inline uint32_t PPU_OPERATION_MODE_CFG_INDEX_EN(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_INDEX_EN__SHIFT) & PPU_OPERATION_MODE_CFG_INDEX_EN__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_RESERVED_1__MASK 0x20000000
+#define PPU_OPERATION_MODE_CFG_RESERVED_1__SHIFT 29
+static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_1__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_1__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_NOTCH_ADDR__MASK 0x1fff0000
+#define PPU_OPERATION_MODE_CFG_NOTCH_ADDR__SHIFT 16
+static inline uint32_t PPU_OPERATION_MODE_CFG_NOTCH_ADDR(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_NOTCH_ADDR__SHIFT) & PPU_OPERATION_MODE_CFG_NOTCH_ADDR__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_RESERVED_2__MASK 0x0000ff00
+#define PPU_OPERATION_MODE_CFG_RESERVED_2__SHIFT 8
+static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_2__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_2__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_USE_CNT__MASK 0x000000e0
+#define PPU_OPERATION_MODE_CFG_USE_CNT__SHIFT 5
+static inline uint32_t PPU_OPERATION_MODE_CFG_USE_CNT(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_USE_CNT__SHIFT) & PPU_OPERATION_MODE_CFG_USE_CNT__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_FLYING_MODE__MASK 0x00000010
+#define PPU_OPERATION_MODE_CFG_FLYING_MODE__SHIFT 4
+static inline uint32_t PPU_OPERATION_MODE_CFG_FLYING_MODE(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_FLYING_MODE__SHIFT) & PPU_OPERATION_MODE_CFG_FLYING_MODE__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_RESERVED_3__MASK 0x0000000c
+#define PPU_OPERATION_MODE_CFG_RESERVED_3__SHIFT 2
+static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_3(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_3__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_3__MASK;
+}
+#define PPU_OPERATION_MODE_CFG_POOLING_METHOD__MASK 0x00000003
+#define PPU_OPERATION_MODE_CFG_POOLING_METHOD__SHIFT 0
+static inline uint32_t PPU_OPERATION_MODE_CFG_POOLING_METHOD(uint32_t val)
+{
+ return ((val) << PPU_OPERATION_MODE_CFG_POOLING_METHOD__SHIFT) & PPU_OPERATION_MODE_CFG_POOLING_METHOD__MASK;
+}
+
+#define REG_PPU_POOLING_KERNEL_CFG 0x00006034
+#define PPU_POOLING_KERNEL_CFG_RESERVED_0__MASK 0xff000000
+#define PPU_POOLING_KERNEL_CFG_RESERVED_0__SHIFT 24
+static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_0__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_0__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__MASK 0x00f00000
+#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__SHIFT 20
+static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__MASK 0x000f0000
+#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__SHIFT 16
+static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_RESERVED_1__MASK 0x0000f000
+#define PPU_POOLING_KERNEL_CFG_RESERVED_1__SHIFT 12
+static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_1__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_1__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__MASK 0x00000f00
+#define PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__SHIFT 8
+static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_RESERVED_2__MASK 0x000000f0
+#define PPU_POOLING_KERNEL_CFG_RESERVED_2__SHIFT 4
+static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_2__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_2__MASK;
+}
+#define PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__MASK 0x0000000f
+#define PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__SHIFT 0
+static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__MASK;
+}
+
+#define REG_PPU_RECIP_KERNEL_WIDTH 0x00006038
+#define PPU_RECIP_KERNEL_WIDTH_RESERVED_0__MASK 0xfffe0000
+#define PPU_RECIP_KERNEL_WIDTH_RESERVED_0__SHIFT 17
+static inline uint32_t PPU_RECIP_KERNEL_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RECIP_KERNEL_WIDTH_RESERVED_0__SHIFT) & PPU_RECIP_KERNEL_WIDTH_RESERVED_0__MASK;
+}
+#define PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__MASK 0x0001ffff
+#define PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__SHIFT 0
+static inline uint32_t PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__SHIFT) & PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__MASK;
+}
+
+#define REG_PPU_RECIP_KERNEL_HEIGHT 0x0000603c
+#define PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__MASK 0xfffe0000
+#define PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__SHIFT 17
+static inline uint32_t PPU_RECIP_KERNEL_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__SHIFT) & PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__MASK;
+}
+#define PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__MASK 0x0001ffff
+#define PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__SHIFT 0
+static inline uint32_t PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__SHIFT) & PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__MASK;
+}
+
+#define REG_PPU_POOLING_PADDING_CFG 0x00006040
+#define PPU_POOLING_PADDING_CFG_RESERVED_0__MASK 0xffff8000
+#define PPU_POOLING_PADDING_CFG_RESERVED_0__SHIFT 15
+static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_0__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_0__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_PAD_BOTTOM__MASK 0x00007000
+#define PPU_POOLING_PADDING_CFG_PAD_BOTTOM__SHIFT 12
+static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_BOTTOM(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_PAD_BOTTOM__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_BOTTOM__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_RESERVED_1__MASK 0x00000800
+#define PPU_POOLING_PADDING_CFG_RESERVED_1__SHIFT 11
+static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_1__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_1__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_PAD_RIGHT__MASK 0x00000700
+#define PPU_POOLING_PADDING_CFG_PAD_RIGHT__SHIFT 8
+static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_RIGHT(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_PAD_RIGHT__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_RIGHT__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_RESERVED_2__MASK 0x00000080
+#define PPU_POOLING_PADDING_CFG_RESERVED_2__SHIFT 7
+static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_2(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_2__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_2__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_PAD_TOP__MASK 0x00000070
+#define PPU_POOLING_PADDING_CFG_PAD_TOP__SHIFT 4
+static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_TOP(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_PAD_TOP__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_TOP__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_RESERVED_3__MASK 0x00000008
+#define PPU_POOLING_PADDING_CFG_RESERVED_3__SHIFT 3
+static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_3(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_3__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_3__MASK;
+}
+#define PPU_POOLING_PADDING_CFG_PAD_LEFT__MASK 0x00000007
+#define PPU_POOLING_PADDING_CFG_PAD_LEFT__SHIFT 0
+static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_LEFT(uint32_t val)
+{
+ return ((val) << PPU_POOLING_PADDING_CFG_PAD_LEFT__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_LEFT__MASK;
+}
+
+#define REG_PPU_PADDING_VALUE_1_CFG 0x00006044
+#define PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__MASK 0xffffffff
+#define PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__SHIFT 0
+static inline uint32_t PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0(uint32_t val)
+{
+ return ((val) << PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__SHIFT) & PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__MASK;
+}
+
+#define REG_PPU_PADDING_VALUE_2_CFG 0x00006048
+#define PPU_PADDING_VALUE_2_CFG_RESERVED_0__MASK 0xfffffff8
+#define PPU_PADDING_VALUE_2_CFG_RESERVED_0__SHIFT 3
+static inline uint32_t PPU_PADDING_VALUE_2_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_PADDING_VALUE_2_CFG_RESERVED_0__SHIFT) & PPU_PADDING_VALUE_2_CFG_RESERVED_0__MASK;
+}
+#define PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__MASK 0x00000007
+#define PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__SHIFT 0
+static inline uint32_t PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1(uint32_t val)
+{
+ return ((val) << PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__SHIFT) & PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__MASK;
+}
+
+#define REG_PPU_DST_BASE_ADDR 0x00006070
+#define PPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK 0xfffffff0
+#define PPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT 4
+static inline uint32_t PPU_DST_BASE_ADDR_DST_BASE_ADDR(uint32_t val)
+{
+ return ((val) << PPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT) & PPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK;
+}
+#define PPU_DST_BASE_ADDR_RESERVED_0__MASK 0x0000000f
+#define PPU_DST_BASE_ADDR_RESERVED_0__SHIFT 0
+static inline uint32_t PPU_DST_BASE_ADDR_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DST_BASE_ADDR_RESERVED_0__SHIFT) & PPU_DST_BASE_ADDR_RESERVED_0__MASK;
+}
+
+#define REG_PPU_DST_SURF_STRIDE 0x0000607c
+#define PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK 0xfffffff0
+#define PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT 4
+static inline uint32_t PPU_DST_SURF_STRIDE_DST_SURF_STRIDE(uint32_t val)
+{
+ return ((val) << PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT) & PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK;
+}
+#define PPU_DST_SURF_STRIDE_RESERVED_0__MASK 0x0000000f
+#define PPU_DST_SURF_STRIDE_RESERVED_0__SHIFT 0
+static inline uint32_t PPU_DST_SURF_STRIDE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_DST_SURF_STRIDE_RESERVED_0__SHIFT) & PPU_DST_SURF_STRIDE_RESERVED_0__MASK;
+}
+
+#define REG_PPU_DATA_FORMAT 0x00006084
+#define PPU_DATA_FORMAT_INDEX_ADD__MASK 0xfffffff0
+#define PPU_DATA_FORMAT_INDEX_ADD__SHIFT 4
+static inline uint32_t PPU_DATA_FORMAT_INDEX_ADD(uint32_t val)
+{
+ return ((val) << PPU_DATA_FORMAT_INDEX_ADD__SHIFT) & PPU_DATA_FORMAT_INDEX_ADD__MASK;
+}
+#define PPU_DATA_FORMAT_DPU_FLYIN__MASK 0x00000008
+#define PPU_DATA_FORMAT_DPU_FLYIN__SHIFT 3
+static inline uint32_t PPU_DATA_FORMAT_DPU_FLYIN(uint32_t val)
+{
+ return ((val) << PPU_DATA_FORMAT_DPU_FLYIN__SHIFT) & PPU_DATA_FORMAT_DPU_FLYIN__MASK;
+}
+#define PPU_DATA_FORMAT_PROC_PRECISION__MASK 0x00000007
+#define PPU_DATA_FORMAT_PROC_PRECISION__SHIFT 0
+static inline uint32_t PPU_DATA_FORMAT_PROC_PRECISION(uint32_t val)
+{
+ return ((val) << PPU_DATA_FORMAT_PROC_PRECISION__SHIFT) & PPU_DATA_FORMAT_PROC_PRECISION__MASK;
+}
+
+#define REG_PPU_MISC_CTRL 0x000060dc
+#define PPU_MISC_CTRL_SURF_LEN__MASK 0xffff0000
+#define PPU_MISC_CTRL_SURF_LEN__SHIFT 16
+static inline uint32_t PPU_MISC_CTRL_SURF_LEN(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_SURF_LEN__SHIFT) & PPU_MISC_CTRL_SURF_LEN__MASK;
+}
+#define PPU_MISC_CTRL_RESERVED_0__MASK 0x0000fe00
+#define PPU_MISC_CTRL_RESERVED_0__SHIFT 9
+static inline uint32_t PPU_MISC_CTRL_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_RESERVED_0__SHIFT) & PPU_MISC_CTRL_RESERVED_0__MASK;
+}
+#define PPU_MISC_CTRL_MC_SURF_OUT__MASK 0x00000100
+#define PPU_MISC_CTRL_MC_SURF_OUT__SHIFT 8
+static inline uint32_t PPU_MISC_CTRL_MC_SURF_OUT(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_MC_SURF_OUT__SHIFT) & PPU_MISC_CTRL_MC_SURF_OUT__MASK;
+}
+#define PPU_MISC_CTRL_NONALIGN__MASK 0x00000080
+#define PPU_MISC_CTRL_NONALIGN__SHIFT 7
+static inline uint32_t PPU_MISC_CTRL_NONALIGN(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_NONALIGN__SHIFT) & PPU_MISC_CTRL_NONALIGN__MASK;
+}
+#define PPU_MISC_CTRL_RESERVED_1__MASK 0x00000070
+#define PPU_MISC_CTRL_RESERVED_1__SHIFT 4
+static inline uint32_t PPU_MISC_CTRL_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_RESERVED_1__SHIFT) & PPU_MISC_CTRL_RESERVED_1__MASK;
+}
+#define PPU_MISC_CTRL_BURST_LEN__MASK 0x0000000f
+#define PPU_MISC_CTRL_BURST_LEN__SHIFT 0
+static inline uint32_t PPU_MISC_CTRL_BURST_LEN(uint32_t val)
+{
+ return ((val) << PPU_MISC_CTRL_BURST_LEN__SHIFT) & PPU_MISC_CTRL_BURST_LEN__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_S_STATUS 0x00007000
+#define PPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK 0xfffc0000
+#define PPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT 18
+static inline uint32_t PPU_RDMA_RDMA_S_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK 0x00030000
+#define PPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT 16
+static inline uint32_t PPU_RDMA_RDMA_S_STATUS_STATUS_1(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT) & PPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK;
+}
+#define PPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK 0x0000fffc
+#define PPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT 2
+static inline uint32_t PPU_RDMA_RDMA_S_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT) & PPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK;
+}
+#define PPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK 0x00000003
+#define PPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_S_STATUS_STATUS_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT) & PPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_S_POINTER 0x00007004
+#define PPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK 0xfffe0000
+#define PPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT 17
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK 0x00010000
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT 16
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK 0x0000ffc0
+#define PPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT 6
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_RESERVED_1(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT) & PPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT 3
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004
+#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT 2
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK 0x00000002
+#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT 1
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK;
+}
+#define PPU_RDMA_RDMA_S_POINTER_POINTER__MASK 0x00000001
+#define PPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_OPERATION_ENABLE 0x00007008
+#define PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe
+#define PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT 1
+static inline uint32_t PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK 0x00000001
+#define PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT) & PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_CUBE_IN_WIDTH 0x0000700c
+#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__MASK 0xffffe000
+#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK 0x00001fff
+#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_CUBE_IN_HEIGHT 0x00007010
+#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__MASK 0xffffe000
+#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK 0x00001fff
+#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_CUBE_IN_CHANNEL 0x00007014
+#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__MASK 0xffffe000
+#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT 13
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK 0x00001fff
+#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_SRC_BASE_ADDR 0x0000701c
+#define PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK 0xffffffff
+#define PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT) & PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_SRC_LINE_STRIDE 0x00007024
+#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__MASK 0xfffffff0
+#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__SHIFT 4
+static inline uint32_t PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__SHIFT) & PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__MASK;
+}
+#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__MASK 0x0000000f
+#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_SRC_SURF_STRIDE 0x00007028
+#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__MASK 0xfffffff0
+#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__SHIFT 4
+static inline uint32_t PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__SHIFT) & PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__MASK;
+}
+#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__MASK 0x0000000f
+#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__MASK;
+}
+
+#define REG_PPU_RDMA_RDMA_DATA_FORMAT 0x00007030
+#define PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__MASK 0xfffffffc
+#define PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__SHIFT 2
+static inline uint32_t PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__MASK;
+}
+#define PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__MASK 0x00000003
+#define PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__SHIFT 0
+static inline uint32_t PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION(uint32_t val)
+{
+ return ((val) << PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__SHIFT) & PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__MASK;
+}
+
+#define REG_DDMA_CFG_OUTSTANDING 0x00008000
+#define DDMA_CFG_OUTSTANDING_RESERVED_0__MASK 0xffff0000
+#define DDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT 16
+static inline uint32_t DDMA_CFG_OUTSTANDING_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT) & DDMA_CFG_OUTSTANDING_RESERVED_0__MASK;
+}
+#define DDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK 0x0000ff00
+#define DDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT 8
+static inline uint32_t DDMA_CFG_OUTSTANDING_WR_OS_CNT(uint32_t val)
+{
+ return ((val) << DDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT) & DDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK;
+}
+#define DDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK 0x000000ff
+#define DDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT 0
+static inline uint32_t DDMA_CFG_OUTSTANDING_RD_OS_CNT(uint32_t val)
+{
+ return ((val) << DDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT) & DDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK;
+}
+
+#define REG_DDMA_RD_WEIGHT_0 0x00008004
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK 0xff000000
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT 24
+static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK;
+}
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK 0x00ff0000
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT 16
+static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK;
+}
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK 0x0000ff00
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT 8
+static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK;
+}
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK 0x000000ff
+#define DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT 0
+static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK;
+}
+
+#define REG_DDMA_WR_WEIGHT_0 0x00008008
+#define DDMA_WR_WEIGHT_0_RESERVED_0__MASK 0xffff0000
+#define DDMA_WR_WEIGHT_0_RESERVED_0__SHIFT 16
+static inline uint32_t DDMA_WR_WEIGHT_0_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_WR_WEIGHT_0_RESERVED_0__SHIFT) & DDMA_WR_WEIGHT_0_RESERVED_0__MASK;
+}
+#define DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK 0x0000ff00
+#define DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT 8
+static inline uint32_t DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP(uint32_t val)
+{
+ return ((val) << DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT) & DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK;
+}
+#define DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK 0x000000ff
+#define DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT 0
+static inline uint32_t DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU(uint32_t val)
+{
+ return ((val) << DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT) & DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK;
+}
+
+#define REG_DDMA_CFG_ID_ERROR 0x0000800c
+#define DDMA_CFG_ID_ERROR_RESERVED_0__MASK 0xfffffc00
+#define DDMA_CFG_ID_ERROR_RESERVED_0__SHIFT 10
+static inline uint32_t DDMA_CFG_ID_ERROR_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_ID_ERROR_RESERVED_0__SHIFT) & DDMA_CFG_ID_ERROR_RESERVED_0__MASK;
+}
+#define DDMA_CFG_ID_ERROR_WR_RESP_ID__MASK 0x000003c0
+#define DDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT 6
+static inline uint32_t DDMA_CFG_ID_ERROR_WR_RESP_ID(uint32_t val)
+{
+ return ((val) << DDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT) & DDMA_CFG_ID_ERROR_WR_RESP_ID__MASK;
+}
+#define DDMA_CFG_ID_ERROR_RESERVED_1__MASK 0x00000020
+#define DDMA_CFG_ID_ERROR_RESERVED_1__SHIFT 5
+static inline uint32_t DDMA_CFG_ID_ERROR_RESERVED_1(uint32_t val)
+{
+ return ((val) << DDMA_CFG_ID_ERROR_RESERVED_1__SHIFT) & DDMA_CFG_ID_ERROR_RESERVED_1__MASK;
+}
+#define DDMA_CFG_ID_ERROR_RD_RESP_ID__MASK 0x0000001f
+#define DDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT 0
+static inline uint32_t DDMA_CFG_ID_ERROR_RD_RESP_ID(uint32_t val)
+{
+ return ((val) << DDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT) & DDMA_CFG_ID_ERROR_RD_RESP_ID__MASK;
+}
+
+#define REG_DDMA_RD_WEIGHT_1 0x00008010
+#define DDMA_RD_WEIGHT_1_RESERVED_0__MASK 0xffffff00
+#define DDMA_RD_WEIGHT_1_RESERVED_0__SHIFT 8
+static inline uint32_t DDMA_RD_WEIGHT_1_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_1_RESERVED_0__SHIFT) & DDMA_RD_WEIGHT_1_RESERVED_0__MASK;
+}
+#define DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK 0x000000ff
+#define DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT 0
+static inline uint32_t DDMA_RD_WEIGHT_1_RD_WEIGHT_PC(uint32_t val)
+{
+ return ((val) << DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT) & DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_FIFO_CLR 0x00008014
+#define DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK 0xfffffffe
+#define DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT 1
+static inline uint32_t DDMA_CFG_DMA_FIFO_CLR_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT) & DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK;
+}
+#define DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK 0x00000001
+#define DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT) & DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_ARB 0x00008018
+#define DDMA_CFG_DMA_ARB_RESERVED_0__MASK 0xfffffc00
+#define DDMA_CFG_DMA_ARB_RESERVED_0__SHIFT 10
+static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_RESERVED_0__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_0__MASK;
+}
+#define DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK 0x00000200
+#define DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT 9
+static inline uint32_t DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT) & DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK;
+}
+#define DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK 0x00000100
+#define DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT 8
+static inline uint32_t DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT) & DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK;
+}
+#define DDMA_CFG_DMA_ARB_RESERVED_1__MASK 0x00000080
+#define DDMA_CFG_DMA_ARB_RESERVED_1__SHIFT 7
+static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_1(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_RESERVED_1__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_1__MASK;
+}
+#define DDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK 0x00000070
+#define DDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT 4
+static inline uint32_t DDMA_CFG_DMA_ARB_WR_FIX_ARB(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT) & DDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK;
+}
+#define DDMA_CFG_DMA_ARB_RESERVED_2__MASK 0x00000008
+#define DDMA_CFG_DMA_ARB_RESERVED_2__SHIFT 3
+static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_2(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_RESERVED_2__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_2__MASK;
+}
+#define DDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK 0x00000007
+#define DDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_ARB_RD_FIX_ARB(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT) & DDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_RD_QOS 0x00008020
+#define DDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK 0xfffffc00
+#define DDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT 10
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT) & DDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK;
+}
+#define DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK 0x00000300
+#define DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT 8
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_PC_QOS(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK;
+}
+#define DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK 0x000000c0
+#define DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT 6
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK;
+}
+#define DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK 0x00000030
+#define DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT 4
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK;
+}
+#define DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK 0x0000000c
+#define DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT 2
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK;
+}
+#define DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK 0x00000003
+#define DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_RD_CFG 0x00008024
+#define DDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK 0xffffe000
+#define DDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT 13
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT) & DDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK;
+}
+#define DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK 0x00001000
+#define DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT 12
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARLOCK(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK;
+}
+#define DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK 0x00000f00
+#define DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT 8
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARCACHE(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK;
+}
+#define DDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK 0x000000e0
+#define DDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT 5
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARPROT(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK;
+}
+#define DDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK 0x00000018
+#define DDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT 3
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARBURST(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK;
+}
+#define DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK 0x00000007
+#define DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARSIZE(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_WR_CFG 0x00008028
+#define DDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK 0xffffe000
+#define DDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT 13
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT) & DDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK;
+}
+#define DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK 0x00001000
+#define DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT 12
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWLOCK(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK;
+}
+#define DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK 0x00000f00
+#define DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT 8
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWCACHE(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK;
+}
+#define DDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK 0x000000e0
+#define DDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT 5
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWPROT(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK;
+}
+#define DDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK 0x00000018
+#define DDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT 3
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWBURST(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK;
+}
+#define DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK 0x00000007
+#define DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWSIZE(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK;
+}
+
+#define REG_DDMA_CFG_DMA_WSTRB 0x0000802c
+#define DDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK 0xffffffff
+#define DDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT 0
+static inline uint32_t DDMA_CFG_DMA_WSTRB_WR_WSTRB(uint32_t val)
+{
+ return ((val) << DDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT) & DDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK;
+}
+
+#define REG_DDMA_CFG_STATUS 0x00008030
+#define DDMA_CFG_STATUS_RESERVED_0__MASK 0xfffffe00
+#define DDMA_CFG_STATUS_RESERVED_0__SHIFT 9
+static inline uint32_t DDMA_CFG_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << DDMA_CFG_STATUS_RESERVED_0__SHIFT) & DDMA_CFG_STATUS_RESERVED_0__MASK;
+}
+#define DDMA_CFG_STATUS_IDEL__MASK 0x00000100
+#define DDMA_CFG_STATUS_IDEL__SHIFT 8
+static inline uint32_t DDMA_CFG_STATUS_IDEL(uint32_t val)
+{
+ return ((val) << DDMA_CFG_STATUS_IDEL__SHIFT) & DDMA_CFG_STATUS_IDEL__MASK;
+}
+#define DDMA_CFG_STATUS_RESERVED_1__MASK 0x000000ff
+#define DDMA_CFG_STATUS_RESERVED_1__SHIFT 0
+static inline uint32_t DDMA_CFG_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << DDMA_CFG_STATUS_RESERVED_1__SHIFT) & DDMA_CFG_STATUS_RESERVED_1__MASK;
+}
+
+#define REG_SDMA_CFG_OUTSTANDING 0x00009000
+#define SDMA_CFG_OUTSTANDING_RESERVED_0__MASK 0xffff0000
+#define SDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT 16
+static inline uint32_t SDMA_CFG_OUTSTANDING_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT) & SDMA_CFG_OUTSTANDING_RESERVED_0__MASK;
+}
+#define SDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK 0x0000ff00
+#define SDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT 8
+static inline uint32_t SDMA_CFG_OUTSTANDING_WR_OS_CNT(uint32_t val)
+{
+ return ((val) << SDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT) & SDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK;
+}
+#define SDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK 0x000000ff
+#define SDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT 0
+static inline uint32_t SDMA_CFG_OUTSTANDING_RD_OS_CNT(uint32_t val)
+{
+ return ((val) << SDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT) & SDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK;
+}
+
+#define REG_SDMA_RD_WEIGHT_0 0x00009004
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK 0xff000000
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT 24
+static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK;
+}
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK 0x00ff0000
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT 16
+static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK;
+}
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK 0x0000ff00
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT 8
+static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK;
+}
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK 0x000000ff
+#define SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT 0
+static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK;
+}
+
+#define REG_SDMA_WR_WEIGHT_0 0x00009008
+#define SDMA_WR_WEIGHT_0_RESERVED_0__MASK 0xffff0000
+#define SDMA_WR_WEIGHT_0_RESERVED_0__SHIFT 16
+static inline uint32_t SDMA_WR_WEIGHT_0_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_WR_WEIGHT_0_RESERVED_0__SHIFT) & SDMA_WR_WEIGHT_0_RESERVED_0__MASK;
+}
+#define SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK 0x0000ff00
+#define SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT 8
+static inline uint32_t SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP(uint32_t val)
+{
+ return ((val) << SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT) & SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK;
+}
+#define SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK 0x000000ff
+#define SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT 0
+static inline uint32_t SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU(uint32_t val)
+{
+ return ((val) << SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT) & SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK;
+}
+
+#define REG_SDMA_CFG_ID_ERROR 0x0000900c
+#define SDMA_CFG_ID_ERROR_RESERVED_0__MASK 0xfffffc00
+#define SDMA_CFG_ID_ERROR_RESERVED_0__SHIFT 10
+static inline uint32_t SDMA_CFG_ID_ERROR_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_ID_ERROR_RESERVED_0__SHIFT) & SDMA_CFG_ID_ERROR_RESERVED_0__MASK;
+}
+#define SDMA_CFG_ID_ERROR_WR_RESP_ID__MASK 0x000003c0
+#define SDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT 6
+static inline uint32_t SDMA_CFG_ID_ERROR_WR_RESP_ID(uint32_t val)
+{
+ return ((val) << SDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT) & SDMA_CFG_ID_ERROR_WR_RESP_ID__MASK;
+}
+#define SDMA_CFG_ID_ERROR_RESERVED_1__MASK 0x00000020
+#define SDMA_CFG_ID_ERROR_RESERVED_1__SHIFT 5
+static inline uint32_t SDMA_CFG_ID_ERROR_RESERVED_1(uint32_t val)
+{
+ return ((val) << SDMA_CFG_ID_ERROR_RESERVED_1__SHIFT) & SDMA_CFG_ID_ERROR_RESERVED_1__MASK;
+}
+#define SDMA_CFG_ID_ERROR_RD_RESP_ID__MASK 0x0000001f
+#define SDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT 0
+static inline uint32_t SDMA_CFG_ID_ERROR_RD_RESP_ID(uint32_t val)
+{
+ return ((val) << SDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT) & SDMA_CFG_ID_ERROR_RD_RESP_ID__MASK;
+}
+
+#define REG_SDMA_RD_WEIGHT_1 0x00009010
+#define SDMA_RD_WEIGHT_1_RESERVED_0__MASK 0xffffff00
+#define SDMA_RD_WEIGHT_1_RESERVED_0__SHIFT 8
+static inline uint32_t SDMA_RD_WEIGHT_1_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_1_RESERVED_0__SHIFT) & SDMA_RD_WEIGHT_1_RESERVED_0__MASK;
+}
+#define SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK 0x000000ff
+#define SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT 0
+static inline uint32_t SDMA_RD_WEIGHT_1_RD_WEIGHT_PC(uint32_t val)
+{
+ return ((val) << SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT) & SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_FIFO_CLR 0x00009014
+#define SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK 0xfffffffe
+#define SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT 1
+static inline uint32_t SDMA_CFG_DMA_FIFO_CLR_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT) & SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK;
+}
+#define SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK 0x00000001
+#define SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT) & SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_ARB 0x00009018
+#define SDMA_CFG_DMA_ARB_RESERVED_0__MASK 0xfffffc00
+#define SDMA_CFG_DMA_ARB_RESERVED_0__SHIFT 10
+static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_RESERVED_0__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_0__MASK;
+}
+#define SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK 0x00000200
+#define SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT 9
+static inline uint32_t SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT) & SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK;
+}
+#define SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK 0x00000100
+#define SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT 8
+static inline uint32_t SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT) & SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK;
+}
+#define SDMA_CFG_DMA_ARB_RESERVED_1__MASK 0x00000080
+#define SDMA_CFG_DMA_ARB_RESERVED_1__SHIFT 7
+static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_1(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_RESERVED_1__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_1__MASK;
+}
+#define SDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK 0x00000070
+#define SDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT 4
+static inline uint32_t SDMA_CFG_DMA_ARB_WR_FIX_ARB(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT) & SDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK;
+}
+#define SDMA_CFG_DMA_ARB_RESERVED_2__MASK 0x00000008
+#define SDMA_CFG_DMA_ARB_RESERVED_2__SHIFT 3
+static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_2(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_RESERVED_2__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_2__MASK;
+}
+#define SDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK 0x00000007
+#define SDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_ARB_RD_FIX_ARB(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT) & SDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_RD_QOS 0x00009020
+#define SDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK 0xfffffc00
+#define SDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT 10
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT) & SDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK;
+}
+#define SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK 0x00000300
+#define SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT 8
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_PC_QOS(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK;
+}
+#define SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK 0x000000c0
+#define SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT 6
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK;
+}
+#define SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK 0x00000030
+#define SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT 4
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK;
+}
+#define SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK 0x0000000c
+#define SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT 2
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK;
+}
+#define SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK 0x00000003
+#define SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_RD_CFG 0x00009024
+#define SDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK 0xffffe000
+#define SDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT 13
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT) & SDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK;
+}
+#define SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK 0x00001000
+#define SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT 12
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARLOCK(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK;
+}
+#define SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK 0x00000f00
+#define SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT 8
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARCACHE(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK;
+}
+#define SDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK 0x000000e0
+#define SDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT 5
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARPROT(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK;
+}
+#define SDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK 0x00000018
+#define SDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT 3
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARBURST(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK;
+}
+#define SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK 0x00000007
+#define SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARSIZE(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_WR_CFG 0x00009028
+#define SDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK 0xffffe000
+#define SDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT 13
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT) & SDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK;
+}
+#define SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK 0x00001000
+#define SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT 12
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWLOCK(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK;
+}
+#define SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK 0x00000f00
+#define SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT 8
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWCACHE(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK;
+}
+#define SDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK 0x000000e0
+#define SDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT 5
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWPROT(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK;
+}
+#define SDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK 0x00000018
+#define SDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT 3
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWBURST(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK;
+}
+#define SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK 0x00000007
+#define SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWSIZE(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK;
+}
+
+#define REG_SDMA_CFG_DMA_WSTRB 0x0000902c
+#define SDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK 0xffffffff
+#define SDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT 0
+static inline uint32_t SDMA_CFG_DMA_WSTRB_WR_WSTRB(uint32_t val)
+{
+ return ((val) << SDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT) & SDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK;
+}
+
+#define REG_SDMA_CFG_STATUS 0x00009030
+#define SDMA_CFG_STATUS_RESERVED_0__MASK 0xfffffe00
+#define SDMA_CFG_STATUS_RESERVED_0__SHIFT 9
+static inline uint32_t SDMA_CFG_STATUS_RESERVED_0(uint32_t val)
+{
+ return ((val) << SDMA_CFG_STATUS_RESERVED_0__SHIFT) & SDMA_CFG_STATUS_RESERVED_0__MASK;
+}
+#define SDMA_CFG_STATUS_IDEL__MASK 0x00000100
+#define SDMA_CFG_STATUS_IDEL__SHIFT 8
+static inline uint32_t SDMA_CFG_STATUS_IDEL(uint32_t val)
+{
+ return ((val) << SDMA_CFG_STATUS_IDEL__SHIFT) & SDMA_CFG_STATUS_IDEL__MASK;
+}
+#define SDMA_CFG_STATUS_RESERVED_1__MASK 0x000000ff
+#define SDMA_CFG_STATUS_RESERVED_1__SHIFT 0
+static inline uint32_t SDMA_CFG_STATUS_RESERVED_1(uint32_t val)
+{
+ return ((val) << SDMA_CFG_STATUS_RESERVED_1__SHIFT) & SDMA_CFG_STATUS_RESERVED_1__MASK;
+}
+
+#define REG_GLOBAL_OPERATION_ENABLE 0x0000f008
+#define GLOBAL_OPERATION_ENABLE_RESERVED_0__MASK 0xffffff80
+#define GLOBAL_OPERATION_ENABLE_RESERVED_0__SHIFT 7
+static inline uint32_t GLOBAL_OPERATION_ENABLE_RESERVED_0(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_RESERVED_0__SHIFT) & GLOBAL_OPERATION_ENABLE_RESERVED_0__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__MASK 0x00000040
+#define GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__SHIFT 6
+static inline uint32_t GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_PPU_OP_EN__MASK 0x00000020
+#define GLOBAL_OPERATION_ENABLE_PPU_OP_EN__SHIFT 5
+static inline uint32_t GLOBAL_OPERATION_ENABLE_PPU_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_PPU_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_PPU_OP_EN__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__MASK 0x00000010
+#define GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__SHIFT 4
+static inline uint32_t GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_DPU_OP_EN__MASK 0x00000008
+#define GLOBAL_OPERATION_ENABLE_DPU_OP_EN__SHIFT 3
+static inline uint32_t GLOBAL_OPERATION_ENABLE_DPU_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_DPU_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_DPU_OP_EN__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_CORE_OP_EN__MASK 0x00000004
+#define GLOBAL_OPERATION_ENABLE_CORE_OP_EN__SHIFT 2
+static inline uint32_t GLOBAL_OPERATION_ENABLE_CORE_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_CORE_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_CORE_OP_EN__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_RESERVED_1__MASK 0x00000002
+#define GLOBAL_OPERATION_ENABLE_RESERVED_1__SHIFT 1
+static inline uint32_t GLOBAL_OPERATION_ENABLE_RESERVED_1(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_RESERVED_1__SHIFT) & GLOBAL_OPERATION_ENABLE_RESERVED_1__MASK;
+}
+#define GLOBAL_OPERATION_ENABLE_CNA_OP_EN__MASK 0x00000001
+#define GLOBAL_OPERATION_ENABLE_CNA_OP_EN__SHIFT 0
+static inline uint32_t GLOBAL_OPERATION_ENABLE_CNA_OP_EN(uint32_t val)
+{
+ return ((val) << GLOBAL_OPERATION_ENABLE_CNA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_CNA_OP_EN__MASK;
+}
+
+#endif /* __ROCKET_REGISTERS_XML__ */