27 files changed, 8477 insertions, 0 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
new file mode 100644
index 000000000000..9bdf168bf1d0
--- /dev/null
+++ b/drivers/accel/ivpu/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_IVPU
+	tristate "Intel VPU for Meteor Lake and newer"
+	depends on DRM_ACCEL
+	depends on X86_64 && !UML
+	depends on PCI && PCI_MSI
+	select FW_LOADER
+	select SHMEM
+	help
+	  Choose this option if you have a system that has an 14th generation Intel CPU
+	  or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
+	  inference accelerator for Computer Vision and Deep Learning applications.
+
+	  If "M" is selected, the module will be called intel_vpu.
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
new file mode 100644
index 000000000000..80f1fb3548ae
--- /dev/null
+++ b/drivers/accel/ivpu/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2023 Intel Corporation
+
+intel_vpu-y := \
+	ivpu_drv.o \
+	ivpu_fw.o \
+	ivpu_gem.o \
+	ivpu_hw_mtl.o \
+	ivpu_ipc.o \
+	ivpu_job.o \
+	ivpu_jsm_msg.o \
+	ivpu_mmu.o \
+	ivpu_mmu_context.o \
+	ivpu_pm.o
+
+obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
+\ No newline at end of file
diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO
new file mode 100644
index 000000000000..9077217ae10f
--- /dev/null
+++ b/drivers/accel/ivpu/TODO
@@ -0,0 +1,11 @@
+- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
+- Implement support for BLOB IDs
+- Add debugfs support to improve debugging and testing
+- Add tracing events for performance debugging
+- Implement HW based scheduling support
+- Use syncobjs for submit/sync
+- Refactor IPC protocol to improve message latency
+- Implement BO cache and MADVISE IOCTL
+- Add support for user allocated buffers using prime import and dma-buf heaps
+- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
+- Add driver/device documentation
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
new file mode 100644
index 000000000000..231f29bb5025
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_prime.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+#include "ivpu_pm.h"
+
+#ifndef DRIVER_VERSION_STR
+#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \
+			   __stringify(DRM_IVPU_DRIVER_MINOR) "."
+#endif
+
+static const struct drm_driver driver;
+
+static struct lock_class_key submitted_jobs_xa_lock_class_key;
+
+int ivpu_dbg_mask;
+module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
+MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
+
+int ivpu_test_mode;
+module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+
+u8 ivpu_pll_min_ratio;
+module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
+MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency");
+
+u8 ivpu_pll_max_ratio = U8_MAX;
+module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
+MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency");
+
+struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	kref_get(&file_priv->ref);
+
+	ivpu_dbg(vdev, KREF, "file_priv get: ctx %u refcount %u\n",
+		 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	return file_priv;
+}
+
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id)
+{
+	struct ivpu_file_priv *file_priv;
+
+	xa_lock_irq(&vdev->context_xa);
+	file_priv = xa_load(&vdev->context_xa, id);
+	/* file_priv may still be in context_xa during file_priv_release() */
+	if (file_priv && !kref_get_unless_zero(&file_priv->ref))
+		file_priv = NULL;
+	xa_unlock_irq(&vdev->context_xa);
+
+	if (file_priv)
+		ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n",
+			 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	return file_priv;
+}
+
+static void file_priv_release(struct kref *ref)
+{
+	struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref);
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
+
+	ivpu_cmdq_release_all(file_priv);
+	ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
+	ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
+	mutex_destroy(&file_priv->lock);
+	kfree(file_priv);
+}
+
+void ivpu_file_priv_put(struct ivpu_file_priv **link)
+{
+	struct ivpu_file_priv *file_priv = *link;
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	drm_WARN_ON(&vdev->drm, !file_priv);
+
+	ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n",
+		 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	*link = NULL;
+	kref_put(&file_priv->ref, file_priv_release);
+}
+
+static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	struct drm_ivpu_param *args = data;
+	int ret = 0;
+
+	switch (args->param) {
+	case DRM_IVPU_PARAM_DEVICE_ID:
+		args->value = pdev->device;
+		break;
+	case DRM_IVPU_PARAM_DEVICE_REVISION:
+		args->value = pdev->revision;
+		break;
+	case DRM_IVPU_PARAM_PLATFORM_TYPE:
+		args->value = vdev->platform;
+		break;
+	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
+		args->value = ivpu_hw_reg_pll_freq_get(vdev);
+		break;
+	case DRM_IVPU_PARAM_NUM_CONTEXTS:
+		args->value = ivpu_get_context_count(vdev);
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
+		args->value = vdev->hw->ranges.user_low.start;
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
+		args->value = file_priv->priority;
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_ID:
+		args->value = file_priv->ctx.id;
+		break;
+	case DRM_IVPU_PARAM_FW_API_VERSION:
+		if (args->index < VPU_FW_API_VER_NUM) {
+			struct vpu_firmware_header *fw_hdr;
+
+			fw_hdr = (struct vpu_firmware_header *)vdev->fw->file->data;
+			args->value = fw_hdr->api_version[args->index];
+		} else {
+			ret = -EINVAL;
+		}
+		break;
+	case DRM_IVPU_PARAM_ENGINE_HEARTBEAT:
+		ret = ivpu_jsm_get_heartbeat(vdev, args->index, &args->value);
+		break;
+	case DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID:
+		args->value = (u64)atomic64_inc_return(&vdev->unique_id_counter);
+		break;
+	case DRM_IVPU_PARAM_TILE_CONFIG:
+		args->value = vdev->hw->tile_fuse;
+		break;
+	case DRM_IVPU_PARAM_SKU:
+		args->value = vdev->hw->sku;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct drm_ivpu_param *args = data;
+	int ret = 0;
+
+	switch (args->param) {
+	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
+		if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME)
+			file_priv->priority = args->value;
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int ivpu_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct ivpu_file_priv *file_priv;
+	u32 ctx_id;
+	void *old;
+	int ret;
+
+	ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
+		return ret;
+	}
+
+	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+	if (!file_priv) {
+		ret = -ENOMEM;
+		goto err_xa_erase;
+	}
+
+	file_priv->vdev = vdev;
+	file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL;
+	kref_init(&file_priv->ref);
+	mutex_init(&file_priv->lock);
+
+	ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
+	if (ret)
+		goto err_mutex_destroy;
+
+	old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL);
+	if (xa_is_err(old)) {
+		ret = xa_err(old);
+		ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret);
+		goto err_ctx_fini;
+	}
+
+	ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n",
+		 ctx_id, current->comm, task_pid_nr(current));
+
+	file->driver_priv = file_priv;
+	return 0;
+
+err_ctx_fini:
+	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+err_mutex_destroy:
+	mutex_destroy(&file_priv->lock);
+	kfree(file_priv);
+err_xa_erase:
+	xa_erase_irq(&vdev->context_xa, ctx_id);
+	return ret;
+}
+
+static void ivpu_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+
+	ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n",
+		 file_priv->ctx.id, current->comm, task_pid_nr(current));
+
+	ivpu_file_priv_put(&file_priv);
+}
+
+static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0),
+};
+
+static int ivpu_wait_for_ready(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_consumer cons;
+	struct ivpu_ipc_hdr ipc_hdr;
+	unsigned long timeout;
+	int ret;
+
+	if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+		return 0;
+
+	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+
+	timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
+	while (1) {
+		ret = ivpu_ipc_irq_handler(vdev);
+		if (ret)
+			break;
+		ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
+		if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
+			break;
+
+		cond_resched();
+	}
+
+	ivpu_ipc_consumer_del(vdev, &cons);
+
+	if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) {
+		ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n",
+			 ipc_hdr.data_addr);
+		return -EIO;
+	}
+
+	if (!ret)
+		ivpu_info(vdev, "VPU ready message received successfully\n");
+	else
+		ivpu_hw_diagnose_failure(vdev);
+
+	return ret;
+}
+
+/**
+ * ivpu_boot() - Start VPU firmware
+ * @vdev: VPU device
+ *
+ * This function is paired with ivpu_shutdown() but it doesn't power up the
+ * VPU because power up has to be called very early in ivpu_probe().
+ */
+int ivpu_boot(struct ivpu_device *vdev)
+{
+	int ret;
+
+	/* Update boot params located at first 4KB of FW memory */
+	ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+
+	ret = ivpu_hw_boot_fw(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to start the firmware: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_wait_for_ready(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
+		return ret;
+	}
+
+	ivpu_hw_irq_clear(vdev);
+	enable_irq(vdev->irq);
+	ivpu_hw_irq_enable(vdev);
+	ivpu_ipc_enable(vdev);
+	return 0;
+}
+
+int ivpu_shutdown(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_hw_irq_disable(vdev);
+	disable_irq(vdev->irq);
+	ivpu_ipc_disable(vdev);
+	ivpu_mmu_disable(vdev);
+
+	ret = ivpu_hw_power_down(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+
+	return ret;
+}
+
+static const struct file_operations ivpu_fops = {
+	.owner		= THIS_MODULE,
+	DRM_ACCEL_FOPS,
+};
+
+static const struct drm_driver driver = {
+	.driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
+
+	.open = ivpu_open,
+	.postclose = ivpu_postclose,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = ivpu_gem_prime_import,
+	.gem_prime_mmap = drm_gem_prime_mmap,
+
+	.ioctls = ivpu_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
+	.fops = &ivpu_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRM_IVPU_DRIVER_MAJOR,
+	.minor = DRM_IVPU_DRIVER_MINOR,
+};
+
+static int ivpu_irq_init(struct ivpu_device *vdev)
+{
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	int ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (ret < 0) {
+		ivpu_err(vdev, "Failed to allocate a MSI IRQ: %d\n", ret);
+		return ret;
+	}
+
+	vdev->irq = pci_irq_vector(pdev, 0);
+
+	ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+			       IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_pci_init(struct ivpu_device *vdev)
+{
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	struct resource *bar0 = &pdev->resource[0];
+	struct resource *bar4 = &pdev->resource[4];
+	int ret;
+
+	ivpu_dbg(vdev, MISC, "Mapping BAR0 (RegV) %pR\n", bar0);
+	vdev->regv = devm_ioremap_resource(vdev->drm.dev, bar0);
+	if (IS_ERR(vdev->regv)) {
+		ivpu_err(vdev, "Failed to map bar 0: %pe\n", vdev->regv);
+		return PTR_ERR(vdev->regv);
+	}
+
+	ivpu_dbg(vdev, MISC, "Mapping BAR4 (RegB) %pR\n", bar4);
+	vdev->regb = devm_ioremap_resource(vdev->drm.dev, bar4);
+	if (IS_ERR(vdev->regb)) {
+		ivpu_err(vdev, "Failed to map bar 4: %pe\n", vdev->regb);
+		return PTR_ERR(vdev->regb);
+	}
+
+	ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38));
+	if (ret) {
+		ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret);
+		return ret;
+	}
+	dma_set_max_seg_size(vdev->drm.dev, UINT_MAX);
+
+	/* Clear any pending errors */
+	pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	pci_set_master(pdev);
+
+	return 0;
+}
+
+static int ivpu_dev_init(struct ivpu_device *vdev)
+{
+	int ret;
+
+	vdev->hw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->hw), GFP_KERNEL);
+	if (!vdev->hw)
+		return -ENOMEM;
+
+	vdev->mmu = drmm_kzalloc(&vdev->drm, sizeof(*vdev->mmu), GFP_KERNEL);
+	if (!vdev->mmu)
+		return -ENOMEM;
+
+	vdev->fw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->fw), GFP_KERNEL);
+	if (!vdev->fw)
+		return -ENOMEM;
+
+	vdev->ipc = drmm_kzalloc(&vdev->drm, sizeof(*vdev->ipc), GFP_KERNEL);
+	if (!vdev->ipc)
+		return -ENOMEM;
+
+	vdev->pm = drmm_kzalloc(&vdev->drm, sizeof(*vdev->pm), GFP_KERNEL);
+	if (!vdev->pm)
+		return -ENOMEM;
+
+	vdev->hw->ops = &ivpu_hw_mtl_ops;
+	vdev->platform = IVPU_PLATFORM_INVALID;
+	vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1;
+	vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT;
+	atomic64_set(&vdev->unique_id_counter, 0);
+	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
+	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+
+	ret = ivpu_pci_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	ret = ivpu_irq_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	/* Init basic HW info based on buttress registers which are accessible before power up */
+	ret = ivpu_hw_info_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	/* Power up early so the rest of init code can access VPU registers */
+	ret = ivpu_hw_power_up(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	ret = ivpu_mmu_global_context_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
+		goto err_power_down;
+	}
+
+	ret = ivpu_mmu_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
+		goto err_mmu_gctx_fini;
+	}
+
+	ret = ivpu_fw_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
+		goto err_mmu_gctx_fini;
+	}
+
+	ret = ivpu_ipc_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
+		goto err_fw_fini;
+	}
+
+	ret = ivpu_pm_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
+		goto err_ipc_fini;
+	}
+
+	ret = ivpu_job_done_thread_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+		goto err_ipc_fini;
+	}
+
+	ret = ivpu_fw_load(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
+		goto err_job_done_thread_fini;
+	}
+
+	ret = ivpu_boot(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to boot: %d\n", ret);
+		goto err_job_done_thread_fini;
+	}
+
+	ivpu_pm_enable(vdev);
+
+	return 0;
+
+err_job_done_thread_fini:
+	ivpu_job_done_thread_fini(vdev);
+err_ipc_fini:
+	ivpu_ipc_fini(vdev);
+err_fw_fini:
+	ivpu_fw_fini(vdev);
+err_mmu_gctx_fini:
+	ivpu_mmu_global_context_fini(vdev);
+err_power_down:
+	ivpu_hw_power_down(vdev);
+err_xa_destroy:
+	xa_destroy(&vdev->submitted_jobs_xa);
+	xa_destroy(&vdev->context_xa);
+	return ret;
+}
+
+static void ivpu_dev_fini(struct ivpu_device *vdev)
+{
+	ivpu_pm_disable(vdev);
+	ivpu_shutdown(vdev);
+	ivpu_job_done_thread_fini(vdev);
+	ivpu_ipc_fini(vdev);
+	ivpu_fw_fini(vdev);
+	ivpu_mmu_global_context_fini(vdev);
+
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+	xa_destroy(&vdev->submitted_jobs_xa);
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa));
+	xa_destroy(&vdev->context_xa);
+}
+
+static struct pci_device_id ivpu_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
+
+static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ivpu_device *vdev;
+	int ret;
+
+	vdev = devm_drm_dev_alloc(&pdev->dev, &driver, struct ivpu_device, drm);
+	if (IS_ERR(vdev))
+		return PTR_ERR(vdev);
+
+	pci_set_drvdata(pdev, vdev);
+
+	ret = ivpu_dev_init(vdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
+		return ret;
+	}
+
+	ret = drm_dev_register(&vdev->drm, 0);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to register DRM device: %d\n", ret);
+		ivpu_dev_fini(vdev);
+	}
+
+	return ret;
+}
+
+static void ivpu_remove(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+
+	drm_dev_unregister(&vdev->drm);
+	ivpu_dev_fini(vdev);
+}
+
+static const struct dev_pm_ops ivpu_drv_pci_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(ivpu_pm_suspend_cb, ivpu_pm_resume_cb)
+	SET_RUNTIME_PM_OPS(ivpu_pm_runtime_suspend_cb, ivpu_pm_runtime_resume_cb, NULL)
+};
+
+static const struct pci_error_handlers ivpu_drv_pci_err = {
+	.reset_prepare = ivpu_pm_reset_prepare_cb,
+	.reset_done = ivpu_pm_reset_done_cb,
+};
+
+static struct pci_driver ivpu_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ivpu_pci_ids,
+	.probe = ivpu_probe,
+	.remove = ivpu_remove,
+	.driver = {
+		.pm = &ivpu_drv_pci_pm,
+	},
+	.err_handler = &ivpu_drv_pci_err,
+};
+
+module_pci_driver(ivpu_pci_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
+MODULE_VERSION(DRIVER_VERSION_STR);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
new file mode 100644
index 000000000000..f47b4965db2e
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_DRV_H__
+#define __IVPU_DRV_H__
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+#include <drm/drm_print.h>
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <uapi/drm/ivpu_accel.h>
+
+#include "ivpu_mmu_context.h"
+
+#define DRIVER_NAME "intel_vpu"
+#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DATE "20230117"
+
+#define PCI_DEVICE_ID_MTL   0x7d1d
+
+#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
+#define IVPU_CONTEXT_LIMIT	     64
+#define IVPU_NUM_ENGINES	     2
+
+#define IVPU_PLATFORM_SILICON 0
+#define IVPU_PLATFORM_SIMICS  2
+#define IVPU_PLATFORM_FPGA    3
+#define IVPU_PLATFORM_INVALID 8
+
+#define IVPU_DBG_REG	 BIT(0)
+#define IVPU_DBG_IRQ	 BIT(1)
+#define IVPU_DBG_MMU	 BIT(2)
+#define IVPU_DBG_FILE	 BIT(3)
+#define IVPU_DBG_MISC	 BIT(4)
+#define IVPU_DBG_FW_BOOT BIT(5)
+#define IVPU_DBG_PM	 BIT(6)
+#define IVPU_DBG_IPC	 BIT(7)
+#define IVPU_DBG_BO	 BIT(8)
+#define IVPU_DBG_JOB	 BIT(9)
+#define IVPU_DBG_JSM	 BIT(10)
+#define IVPU_DBG_KREF	 BIT(11)
+#define IVPU_DBG_RPM	 BIT(12)
+
+#define ivpu_err(vdev, fmt, ...) \
+	drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_err_ratelimited(vdev, fmt, ...) \
+	drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_warn(vdev, fmt, ...) \
+	drm_warn(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_warn_ratelimited(vdev, fmt, ...) \
+	drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_info(vdev, fmt, ...) drm_info(&(vdev)->drm, fmt, ##__VA_ARGS__)
+
+#define ivpu_dbg(vdev, type, fmt, args...) do {                                \
+	if (unlikely(IVPU_DBG_##type & ivpu_dbg_mask))                         \
+		dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args);          \
+} while (0)
+
+#define IVPU_WA(wa_name) (vdev->wa.wa_name)
+
+struct ivpu_wa_table {
+	bool punit_disabled;
+	bool clear_runtime_mem;
+};
+
+struct ivpu_hw_info;
+struct ivpu_mmu_info;
+struct ivpu_fw_info;
+struct ivpu_ipc_info;
+struct ivpu_pm_info;
+
+struct ivpu_device {
+	struct drm_device drm;
+	void __iomem *regb;
+	void __iomem *regv;
+	u32 platform;
+	u32 irq;
+
+	struct ivpu_wa_table wa;
+	struct ivpu_hw_info *hw;
+	struct ivpu_mmu_info *mmu;
+	struct ivpu_fw_info *fw;
+	struct ivpu_ipc_info *ipc;
+	struct ivpu_pm_info *pm;
+
+	struct ivpu_mmu_context gctx;
+	struct xarray context_xa;
+	struct xa_limit context_xa_limit;
+
+	struct xarray submitted_jobs_xa;
+	struct task_struct *job_done_thread;
+
+	atomic64_t unique_id_counter;
+
+	struct {
+		int boot;
+		int jsm;
+		int tdr;
+		int reschedule_suspend;
+	} timeout;
+};
+
+/*
+ * file_priv has its own refcount (ref) that allows user space to close the fd
+ * without blocking even if VPU is still processing some jobs.
+ */
+struct ivpu_file_priv {
+	struct kref ref;
+	struct ivpu_device *vdev;
+	struct mutex lock; /* Protects cmdq */
+	struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
+	struct ivpu_mmu_context ctx;
+	u32 priority;
+	bool has_mmu_faults;
+};
+
+extern int ivpu_dbg_mask;
+extern u8 ivpu_pll_min_ratio;
+extern u8 ivpu_pll_max_ratio;
+
+#define IVPU_TEST_MODE_DISABLED  0
+#define IVPU_TEST_MODE_FW_TEST   1
+#define IVPU_TEST_MODE_NULL_HW   2
+extern int ivpu_test_mode;
+
+struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id);
+void ivpu_file_priv_put(struct ivpu_file_priv **link);
+
+int ivpu_boot(struct ivpu_device *vdev);
+int ivpu_shutdown(struct ivpu_device *vdev);
+
+static inline bool ivpu_is_mtl(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL;
+}
+
+static inline u8 ivpu_revision(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->revision;
+}
+
+static inline u16 ivpu_device_id(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->device;
+}
+
+static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev)
+{
+	return container_of(dev, struct ivpu_device, drm);
+}
+
+static inline u32 ivpu_get_context_count(struct ivpu_device *vdev)
+{
+	struct xa_limit ctx_limit = vdev->context_xa_limit;
+
+	return (ctx_limit.max - ctx_limit.min + 1);
+}
+
+static inline u32 ivpu_get_platform(struct ivpu_device *vdev)
+{
+	WARN_ON_ONCE(vdev->platform == IVPU_PLATFORM_INVALID);
+	return vdev->platform;
+}
+
+static inline bool ivpu_is_silicon(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_SILICON;
+}
+
+static inline bool ivpu_is_simics(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_SIMICS;
+}
+
+static inline bool ivpu_is_fpga(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA;
+}
+
+#endif /* __IVPU_DRV_H__ */
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
new file mode 100644
index 000000000000..f58951a0d81b
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/firmware.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_pm.h"
+
+#define FW_GLOBAL_MEM_START	(2ull * SZ_1G)
+#define FW_GLOBAL_MEM_END	(3ull * SZ_1G)
+#define FW_SHARED_MEM_SIZE	SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */
+#define FW_SHARED_MEM_ALIGNMENT	SZ_128K /* VPU MTRR limitation */
+#define FW_RUNTIME_MAX_SIZE	SZ_512M
+#define FW_SHAVE_NN_MAX_SIZE	SZ_2M
+#define FW_RUNTIME_MIN_ADDR	(FW_GLOBAL_MEM_START)
+#define FW_RUNTIME_MAX_ADDR	(FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
+#define FW_VERSION_HEADER_SIZE	SZ_4K
+#define FW_FILE_IMAGE_OFFSET	(VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
+
+#define WATCHDOG_MSS_REDIRECT	32
+#define WATCHDOG_NCE_REDIRECT	33
+
+#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
+
+#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
+	ivpu_fw_check_api(vdev, fw_hdr, #name, \
+			  VPU_##name##_API_VER_INDEX, \
+			  VPU_##name##_API_VER_MAJOR, \
+			  VPU_##name##_API_VER_MINOR, min_major)
+
+static char *ivpu_firmware;
+module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
+MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
+
+static int ivpu_fw_request(struct ivpu_device *vdev)
+{
+	static const char * const fw_names[] = {
+		"mtl_vpu.bin",
+		"intel/vpu/mtl_vpu_v0.0.bin"
+	};
+	int ret = -ENOENT;
+	int i;
+
+	if (ivpu_firmware)
+		return request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev);
+
+	for (i = 0; i < ARRAY_SIZE(fw_names); i++) {
+		ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i], vdev->drm.dev);
+		if (!ret)
+			return 0;
+	}
+
+	ivpu_err(vdev, "Failed to request firmware: %d\n", ret);
+	return ret;
+}
+
+static int
+ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+		  const char *str, int index, u16 expected_major, u16 expected_minor,
+		  u16 min_major)
+{
+	u16 major = (u16)(fw_hdr->api_version[index] >> 16);
+	u16 minor = (u16)(fw_hdr->api_version[index]);
+
+	if (major < min_major) {
+		ivpu_err(vdev, "Incompatible FW %s API version: %d.%d, required %d.0 or later\n",
+			 str, major, minor, min_major);
+		return -EINVAL;
+	}
+	if (major != expected_major) {
+		ivpu_warn(vdev, "Major FW %s API version different: %d.%d (expected %d.%d)\n",
+			  str, major, minor, expected_major, expected_minor);
+	}
+	ivpu_dbg(vdev, FW_BOOT, "FW %s API version: %d.%d (expected %d.%d)\n",
+		 str, major, minor, expected_major, expected_minor);
+
+	return 0;
+}
+
+static int ivpu_fw_parse(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data;
+	u64 runtime_addr, image_load_addr, runtime_size, image_size;
+
+	if (fw->file->size <= FW_FILE_IMAGE_OFFSET) {
+		ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->header_version != VPU_FW_HEADER_VERSION) {
+		ivpu_err(vdev, "Invalid firmware header version: %u\n", fw_hdr->header_version);
+		return -EINVAL;
+	}
+
+	runtime_addr = fw_hdr->boot_params_load_address;
+	runtime_size = fw_hdr->runtime_size;
+	image_load_addr = fw_hdr->image_load_address;
+	image_size = fw_hdr->image_size;
+
+	if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) {
+		ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr);
+		return -EINVAL;
+	}
+
+	if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) {
+		ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size);
+		return -EINVAL;
+	}
+
+	if (FW_FILE_IMAGE_OFFSET + image_size > fw->file->size) {
+		ivpu_err(vdev, "Invalid image size: %llu\n", image_size);
+		return -EINVAL;
+	}
+
+	if (image_load_addr < runtime_addr ||
+	    image_load_addr + image_size > runtime_addr + runtime_size) {
+		ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n",
+			 image_load_addr, image_size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
+		ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->entry_point < image_load_addr ||
+	    fw_hdr->entry_point >= image_load_addr + image_size) {
+		ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point);
+		return -EINVAL;
+	}
+	ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n",
+		 fw_hdr->header_version, fw_hdr->image_format);
+	ivpu_dbg(vdev, FW_BOOT, "FW version: %s\n", (char *)fw_hdr + VPU_FW_HEADER_SIZE);
+
+	if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
+		return -EINVAL;
+	if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
+		return -EINVAL;
+
+	fw->runtime_addr = runtime_addr;
+	fw->runtime_size = runtime_size;
+	fw->image_load_offset = image_load_addr - runtime_addr;
+	fw->image_size = image_size;
+	fw->shave_nn_size = PAGE_ALIGN(fw_hdr->shave_nn_fw_size);
+
+	fw->cold_boot_entry_point = fw_hdr->entry_point;
+	fw->entry_point = fw->cold_boot_entry_point;
+
+	ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
+		 fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
+	ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
+		 fw->runtime_addr, image_load_addr, fw->entry_point);
+
+	return 0;
+}
+
+static void ivpu_fw_release(struct ivpu_device *vdev)
+{
+	release_firmware(vdev->fw->file);
+}
+
+static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT);
+	u64 size = FW_SHARED_MEM_SIZE;
+
+	if (start + size > FW_GLOBAL_MEM_END) {
+		ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size);
+		return -EINVAL;
+	}
+
+	ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size);
+	return 0;
+}
+
+static int ivpu_fw_mem_init(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	int ret;
+
+	ret = ivpu_fw_update_global_range(vdev);
+	if (ret)
+		return ret;
+
+	fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
+	if (!fw->mem) {
+		ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
+		return -ENOMEM;
+	}
+
+	if (fw->shave_nn_size) {
+		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start,
+							  fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+		if (!fw->mem_shave_nn) {
+			ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
+			ivpu_bo_free_internal(fw->mem);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+
+	if (fw->mem_shave_nn) {
+		ivpu_bo_free_internal(fw->mem_shave_nn);
+		fw->mem_shave_nn = NULL;
+	}
+
+	ivpu_bo_free_internal(fw->mem);
+	fw->mem = NULL;
+}
+
+int ivpu_fw_init(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_fw_request(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_fw_parse(vdev);
+	if (ret)
+		goto err_fw_release;
+
+	ret = ivpu_fw_mem_init(vdev);
+	if (ret)
+		goto err_fw_release;
+
+	return 0;
+
+err_fw_release:
+	ivpu_fw_release(vdev);
+	return ret;
+}
+
+void ivpu_fw_fini(struct ivpu_device *vdev)
+{
+	ivpu_fw_mem_fini(vdev);
+	ivpu_fw_release(vdev);
+}
+
+int ivpu_fw_load(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	u64 image_end_offset = fw->image_load_offset + fw->image_size;
+
+	memset(fw->mem->kvaddr, 0, fw->image_load_offset);
+	memcpy(fw->mem->kvaddr + fw->image_load_offset,
+	       fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);
+
+	if (IVPU_WA(clear_runtime_mem)) {
+		u8 *start = fw->mem->kvaddr + image_end_offset;
+		u64 size = fw->mem->base.size - image_end_offset;
+
+		memset(start, 0, size);
+	}
+
+	wmb(); /* Flush WC buffers after writing fw->mem */
+
+	return 0;
+}
+
+static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
+{
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.magic = 0x%x\n",
+		 boot_params->magic);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_id = 0x%x\n",
+		 boot_params->vpu_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_count = 0x%x\n",
+		 boot_params->vpu_count);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.frequency = %u\n",
+		 boot_params->frequency);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.perf_clk_frequency = %u\n",
+		 boot_params->perf_clk_frequency);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_start = 0x%llx\n",
+		 boot_params->ipc_header_area_start);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_size = 0x%x\n",
+		 boot_params->ipc_header_area_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_base = 0x%llx\n",
+		 boot_params->shared_region_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_size = 0x%x\n",
+		 boot_params->shared_region_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_start = 0x%llx\n",
+		 boot_params->ipc_payload_area_start);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_size = 0x%x\n",
+		 boot_params->ipc_payload_area_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_base = 0x%llx\n",
+		 boot_params->global_aliased_pio_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_size = 0x%x\n",
+		 boot_params->global_aliased_pio_size);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.autoconfig = 0x%x\n",
+		 boot_params->autoconfig);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 0x%x\n",
+		 boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n",
+		 boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n",
+		 boot_params->global_memory_allocator_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n",
+		 boot_params->global_memory_allocator_size);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n",
+		 boot_params->shave_nn_fw_base);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_mss = 0x%x\n",
+		 boot_params->watchdog_irq_mss);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n",
+		 boot_params->watchdog_irq_nce);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n",
+		 boot_params->host_to_vpu_irq);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n",
+		 boot_params->job_done_irq);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n",
+		 boot_params->host_version_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.si_stepping = 0x%x\n",
+		 boot_params->si_stepping);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.device_id = 0x%llx\n",
+		 boot_params->device_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.feature_exclusion = 0x%llx\n",
+		 boot_params->feature_exclusion);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.sku = 0x%llx\n",
+		 boot_params->sku);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.min_freq_pll_ratio = 0x%x\n",
+		 boot_params->min_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.pn_freq_pll_ratio = 0x%x\n",
+		 boot_params->pn_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.max_freq_pll_ratio = 0x%x\n",
+		 boot_params->max_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.default_trace_level = 0x%x\n",
+		 boot_params->default_trace_level);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.tracing_buff_message_format_mask = 0x%llx\n",
+		 boot_params->tracing_buff_message_format_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_destination_mask = 0x%x\n",
+		 boot_params->trace_destination_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_hw_component_mask = 0x%llx\n",
+		 boot_params->trace_hw_component_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n",
+		 boot_params->boot_type);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_base = 0x%llx\n",
+		 boot_params->punit_telemetry_sram_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_size = 0x%llx\n",
+		 boot_params->punit_telemetry_sram_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
+		 boot_params->vpu_telemetry_enable);
+}
+
+void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
+{
+	struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
+
+	/* In case of warm boot we only have to reset the entrypoint addr */
+	if (!ivpu_fw_is_cold_boot(vdev)) {
+		boot_params->save_restore_ret_address = 0;
+		vdev->pm->is_warmboot = true;
+		return;
+	}
+
+	vdev->pm->is_warmboot = false;
+
+	boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
+	boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
+	boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
+
+	/*
+	 * Uncached region of VPU address space, covers IPC buffers, job queues
+	 * and log buffers, programmable to L2$ Uncached by VPU MTRR
+	 */
+	boot_params->shared_region_base = vdev->hw->ranges.global_low.start;
+	boot_params->shared_region_size = vdev->hw->ranges.global_low.end -
+					  vdev->hw->ranges.global_low.start;
+
+	boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
+	boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
+
+	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
+	boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
+
+	boot_params->global_aliased_pio_base =
+		vdev->hw->ranges.global_aliased_pio.start;
+	boot_params->global_aliased_pio_size =
+		ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio);
+
+	/* Allow configuration for L2C_PAGE_TABLE with boot param value */
+	boot_params->autoconfig = 1;
+
+	/* Enable L2 cache for first 2GB of high memory */
+	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1;
+	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg =
+		ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start);
+
+	if (vdev->fw->mem_shave_nn)
+		boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr;
+
+	boot_params->watchdog_irq_mss = WATCHDOG_MSS_REDIRECT;
+	boot_params->watchdog_irq_nce = WATCHDOG_NCE_REDIRECT;
+	boot_params->si_stepping = ivpu_revision(vdev);
+	boot_params->device_id = ivpu_device_id(vdev);
+	boot_params->feature_exclusion = vdev->hw->tile_fuse;
+	boot_params->sku = vdev->hw->sku;
+
+	boot_params->min_freq_pll_ratio = vdev->hw->pll.min_ratio;
+	boot_params->pn_freq_pll_ratio = vdev->hw->pll.pn_ratio;
+	boot_params->max_freq_pll_ratio = vdev->hw->pll.max_ratio;
+
+	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
+	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
+	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+
+	wmb(); /* Flush WC buffers after writing bootparams */
+
+	ivpu_fw_boot_params_print(vdev, boot_params);
+}
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
new file mode 100644
index 000000000000..8d275c802d1c
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_FW_H__
+#define __IVPU_FW_H__
+
+struct ivpu_device;
+struct ivpu_bo;
+struct vpu_boot_params;
+
+struct ivpu_fw_info {
+	const struct firmware *file;
+	struct ivpu_bo *mem;
+	struct ivpu_bo *mem_shave_nn;
+	struct ivpu_bo *mem_log_crit;
+	struct ivpu_bo *mem_log_verb;
+	u64 runtime_addr;
+	u32 runtime_size;
+	u64 image_load_offset;
+	u32 image_size;
+	u32 shave_nn_size;
+	u64 entry_point; /* Cold or warm boot entry point for next boot */
+	u64 cold_boot_entry_point;
+};
+
+int ivpu_fw_init(struct ivpu_device *vdev);
+void ivpu_fw_fini(struct ivpu_device *vdev);
+int ivpu_fw_load(struct ivpu_device *vdev);
+void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
+
+static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
+{
+	return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point;
+}
+
+#endif /* __IVPU_FW_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
new file mode 100644
index 000000000000..01d47d3bad5b
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -0,0 +1,749 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/set_memory.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs;
+
+static struct lock_class_key prime_bo_lock_class_key;
+
+static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+	return 0;
+}
+
+static void prime_free_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+}
+
+static int prime_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+
+	sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
+		return PTR_ERR(sgt);
+	}
+
+	bo->sgt = sgt;
+	return 0;
+}
+
+static void prime_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops prime_ops = {
+	.type = IVPU_BO_TYPE_PRIME,
+	.name = "prime",
+	.alloc_pages = prime_alloc_pages_locked,
+	.free_pages = prime_free_pages_locked,
+	.map_pages = prime_map_pages_locked,
+	.unmap_pages = prime_unmap_pages_locked,
+};
+
+static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+
+	pages = drm_gem_get_pages(&bo->base);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	if (bo->flags & DRM_IVPU_BO_WC)
+		set_pages_array_wc(pages, npages);
+	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		set_pages_array_uc(pages, npages);
+
+	bo->pages = pages;
+	return 0;
+}
+
+static void shmem_free_pages_locked(struct ivpu_bo *bo)
+{
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	drm_gem_put_pages(&bo->base, bo->pages, true, false);
+	bo->pages = NULL;
+}
+
+static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+	int ret;
+
+	sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to allocate sgtable\n");
+		return PTR_ERR(sgt);
+	}
+
+	ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+		goto err_free_sgt;
+	}
+
+	bo->sgt = sgt;
+	return 0;
+
+err_free_sgt:
+	kfree(sgt);
+	return ret;
+}
+
+static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
+	sg_free_table(bo->sgt);
+	kfree(bo->sgt);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops shmem_ops = {
+	.type = IVPU_BO_TYPE_SHMEM,
+	.name = "shmem",
+	.alloc_pages = shmem_alloc_pages_locked,
+	.free_pages = shmem_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+	int ret;
+
+	pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+		if (!pages[i]) {
+			ret = -ENOMEM;
+			goto err_free_pages;
+		}
+		cond_resched();
+	}
+
+	bo->pages = pages;
+	return 0;
+
+err_free_pages:
+	while (i--)
+		put_page(pages[i]);
+	kvfree(pages);
+	return ret;
+}
+
+static void internal_free_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+
+	for (i = 0; i < npages; i++)
+		put_page(bo->pages[i]);
+
+	kvfree(bo->pages);
+	bo->pages = NULL;
+}
+
+static const struct ivpu_bo_ops internal_ops = {
+	.type = IVPU_BO_TYPE_INTERNAL,
+	.name = "internal",
+	.alloc_pages = internal_alloc_pages_locked,
+	.free_pages = internal_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	lockdep_assert_held(&bo->lock);
+	drm_WARN_ON(&vdev->drm, bo->sgt);
+
+	ret = bo->ops->alloc_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
+		return ret;
+	}
+
+	ret = bo->ops->map_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
+		goto err_free_pages;
+	}
+	return ret;
+
+err_free_pages:
+	bo->ops->free_pages(bo);
+	return ret;
+}
+
+static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+{
+	mutex_lock(&bo->lock);
+
+	WARN_ON(!bo->sgt);
+	bo->ops->unmap_pages(bo);
+	WARN_ON(bo->sgt);
+	bo->ops->free_pages(bo);
+	WARN_ON(bo->pages);
+
+	mutex_unlock(&bo->lock);
+}
+
+/*
+ * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ *
+ * This function pins physical memory pages, then maps the physical pages
+ * to IOMMU address space and finally updates the VPU MMU page tables
+ * to allow the VPU to translate VPU address to IOMMU address.
+ */
+int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->vpu_addr) {
+		ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
+			 bo->ctx->id, bo->handle);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (!bo->sgt) {
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (ret)
+			goto unlock;
+	}
+
+	if (!bo->mmu_mapped) {
+		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+					       ivpu_bo_is_snooped(bo));
+		if (ret) {
+			ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
+			goto unlock;
+		}
+		bo->mmu_mapped = true;
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static int
+ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
+		       const struct ivpu_addr_range *range)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	if (!range) {
+		if (bo->flags & DRM_IVPU_BO_HIGH_MEM)
+			range = &vdev->hw->ranges.user_high;
+		else
+			range = &vdev->hw->ranges.user_low;
+	}
+
+	mutex_lock(&ctx->lock);
+	ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+	if (!ret) {
+		bo->ctx = ctx;
+		bo->vpu_addr = bo->mm_node.start;
+		list_add_tail(&bo->ctx_node, &ctx->bo_list);
+	}
+	mutex_unlock(&ctx->lock);
+
+	return ret;
+}
+
+static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct ivpu_mmu_context *ctx = bo->ctx;
+
+	ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+		 ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+
+	mutex_lock(&bo->lock);
+
+	if (bo->mmu_mapped) {
+		drm_WARN_ON(&vdev->drm, !bo->sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+		bo->mmu_mapped = false;
+	}
+
+	mutex_lock(&ctx->lock);
+	list_del(&bo->ctx_node);
+	bo->vpu_addr = 0;
+	bo->ctx = NULL;
+	ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
+	mutex_unlock(&ctx->lock);
+
+	mutex_unlock(&bo->lock);
+}
+
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+{
+	struct ivpu_bo *bo, *tmp;
+
+	list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
+		ivpu_bo_free_vpu_addr(bo);
+}
+
+static struct ivpu_bo *
+ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
+	      u64 size, u32 flags, const struct ivpu_bo_ops *ops,
+	      const struct ivpu_addr_range *range, u64 user_ptr)
+{
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+		return ERR_PTR(-EINVAL);
+
+	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+	case DRM_IVPU_BO_CACHED:
+	case DRM_IVPU_BO_UNCACHED:
+	case DRM_IVPU_BO_WC:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&bo->lock);
+	bo->base.funcs = &ivpu_gem_funcs;
+	bo->flags = flags;
+	bo->ops = ops;
+	bo->user_ptr = user_ptr;
+
+	if (ops->type == IVPU_BO_TYPE_SHMEM)
+		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
+	else
+		drm_gem_private_object_init(&vdev->drm, &bo->base, size);
+
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize drm object\n");
+		goto err_free;
+	}
+
+	if (flags & DRM_IVPU_BO_MAPPABLE) {
+		ret = drm_gem_create_mmap_offset(&bo->base);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate mmap offset\n");
+			goto err_release;
+		}
+	}
+
+	if (mmu_context) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
+		if (ret) {
+			ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
+			goto err_release;
+		}
+	}
+
+	return bo;
+
+err_release:
+	drm_gem_object_release(&bo->base);
+err_free:
+	kfree(bo);
+	return ERR_PTR(ret);
+}
+
+static void ivpu_bo_free(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	if (bo->ctx)
+		ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+			 bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+	else
+		ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
+			 (bool)bo->sgt, bo->mmu_mapped);
+
+	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+
+	vunmap(bo->kvaddr);
+
+	if (bo->ctx)
+		ivpu_bo_free_vpu_addr(bo);
+
+	if (bo->sgt)
+		ivpu_bo_unmap_and_free_pages(bo);
+
+	if (bo->base.import_attach)
+		drm_prime_gem_destroy(&bo->base, bo->sgt);
+
+	drm_gem_object_release(&bo->base);
+
+	mutex_destroy(&bo->lock);
+	kfree(bo);
+}
+
+static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
+		 bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+
+	if (obj->import_attach) {
+		/* Drop the reference drm_gem_mmap_obj() acquired.*/
+		drm_gem_object_put(obj);
+		vma->vm_private_data = NULL;
+		return dma_buf_mmap(obj->dma_buf, vma, 0);
+	}
+
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+
+	return 0;
+}
+
+static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt)
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+
+	mutex_unlock(&bo->lock);
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+}
+
+static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	pgoff_t page_offset;
+	struct page *page;
+	vm_fault_t ret;
+	int err;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt) {
+		err = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (err) {
+			ret = vmf_error(err);
+			goto unlock;
+		}
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	if (page_offset >= npages) {
+		ret = VM_FAULT_SIGBUS;
+	} else {
+		page = bo->pages[page_offset];
+		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static const struct vm_operations_struct ivpu_vm_ops = {
+	.fault = ivpu_vm_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs = {
+	.free = ivpu_bo_free,
+	.mmap = ivpu_bo_mmap,
+	.vm_ops = &ivpu_vm_ops,
+	.get_sg_table = ivpu_bo_get_sg_table,
+};
+
+int
+ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_bo_create *args = data;
+	u64 size = PAGE_ALIGN(args->size);
+	struct ivpu_bo *bo;
+	int ret;
+
+	if (args->flags & ~DRM_IVPU_BO_FLAGS)
+		return -EINVAL;
+
+	if (size == 0)
+		return -EINVAL;
+
+	bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
+			 bo, file_priv->ctx.id, args->size, args->flags);
+		return PTR_ERR(bo);
+	}
+
+	ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+	if (!ret) {
+		args->vpu_addr = bo->vpu_addr;
+		args->handle = bo->handle;
+	}
+
+	drm_gem_object_put(&bo->base);
+
+	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+
+	return ret;
+}
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
+{
+	const struct ivpu_addr_range *range;
+	struct ivpu_addr_range fixed_range;
+	struct ivpu_bo *bo;
+	pgprot_t prot;
+	int ret;
+
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
+
+	if (vpu_addr) {
+		fixed_range.start = vpu_addr;
+		fixed_range.end = vpu_addr + size;
+		range = &fixed_range;
+	} else {
+		range = &vdev->hw->ranges.global_low;
+	}
+
+	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+			 bo, vpu_addr, size, flags);
+		return NULL;
+	}
+
+	ret = ivpu_bo_pin(bo);
+	if (ret)
+		goto err_put;
+
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
+	bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+	if (!bo->kvaddr) {
+		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+		goto err_put;
+	}
+
+	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 bo->vpu_addr, bo->base.size, flags);
+
+	return bo;
+
+err_put:
+	drm_gem_object_put(&bo->base);
+	return NULL;
+}
+
+void ivpu_bo_free_internal(struct ivpu_bo *bo)
+{
+	drm_gem_object_put(&bo->base);
+}
+
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct dma_buf_attachment *attach;
+	struct ivpu_bo *bo;
+
+	attach = dma_buf_attach(buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(buf);
+
+	bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
+		goto err_detach;
+	}
+
+	lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
+
+	bo->base.import_attach = attach;
+
+	return &bo->base;
+
+err_detach:
+	dma_buf_detach(buf, attach);
+	dma_buf_put(buf);
+	return ERR_CAST(bo);
+}
+
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct drm_ivpu_bo_info *args = data;
+	struct drm_gem_object *obj;
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	bo = to_ivpu_bo(obj);
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->ctx) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
+			goto unlock;
+		}
+	}
+
+	args->flags = bo->flags;
+	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
+	args->vpu_addr = bo->vpu_addr;
+	args->size = obj->size;
+unlock:
+	mutex_unlock(&bo->lock);
+	drm_gem_object_put(obj);
+	return ret;
+}
+
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_ivpu_bo_wait *args = data;
+	struct drm_gem_object *obj;
+	unsigned long timeout;
+	long ret;
+
+	timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -EINVAL;
+
+	ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout);
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+	} else if (ret > 0) {
+		ret = 0;
+		args->job_status = to_ivpu_bo(obj)->job_status;
+	}
+
+	drm_gem_object_put(obj);
+
+	return ret;
+}
+
+static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
+{
+	unsigned long dma_refcount = 0;
+
+	if (bo->base.dma_buf && bo->base.dma_buf->file)
+		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+
+	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
+		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+}
+
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+	struct ivpu_bo *bo;
+
+	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
+		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+
+	mutex_lock(&vdev->gctx.lock);
+	list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+		ivpu_bo_print_info(bo, p);
+	mutex_unlock(&vdev->gctx.lock);
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		mutex_lock(&file_priv->ctx.lock);
+		list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
+			ivpu_bo_print_info(bo, p);
+		mutex_unlock(&file_priv->ctx.lock);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+void ivpu_bo_list_print(struct drm_device *dev)
+{
+	struct drm_printer p = drm_info_printer(dev->dev);
+
+	ivpu_bo_list(dev, &p);
+}
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
new file mode 100644
index 000000000000..6b0ceda5f253
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+#ifndef __IVPU_GEM_H__
+#define __IVPU_GEM_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_mm.h>
+
+struct dma_buf;
+struct ivpu_bo_ops;
+struct ivpu_file_priv;
+
+struct ivpu_bo {
+	struct drm_gem_object base;
+	const struct ivpu_bo_ops *ops;
+
+	struct ivpu_mmu_context *ctx;
+	struct list_head ctx_node;
+	struct drm_mm_node mm_node;
+
+	struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
+	struct sg_table *sgt;
+	struct page **pages;
+	bool mmu_mapped;
+
+	void *kvaddr;
+	u64 vpu_addr;
+	u32 handle;
+	u32 flags;
+	uintptr_t user_ptr;
+	u32 job_status;
+};
+
+enum ivpu_bo_type {
+	IVPU_BO_TYPE_SHMEM = 1,
+	IVPU_BO_TYPE_INTERNAL,
+	IVPU_BO_TYPE_PRIME,
+};
+
+struct ivpu_bo_ops {
+	enum ivpu_bo_type type;
+	const char *name;
+	int (*alloc_pages)(struct ivpu_bo *bo);
+	void (*free_pages)(struct ivpu_bo *bo);
+	int (*map_pages)(struct ivpu_bo *bo);
+	void (*unmap_pages)(struct ivpu_bo *bo);
+};
+
+int ivpu_bo_pin(struct ivpu_bo *bo);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+void ivpu_bo_free_internal(struct ivpu_bo *bo);
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
+void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
+
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct ivpu_bo, base);
+}
+
+static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
+{
+	if (offset > bo->base.size || !bo->pages)
+		return NULL;
+
+	return bo->pages[offset / PAGE_SIZE];
+}
+
+static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
+{
+	return bo->flags & DRM_IVPU_BO_CACHE_MASK;
+}
+
+static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
+{
+	return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
+}
+
+static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
+{
+	if (bo->flags & DRM_IVPU_BO_WC)
+		return pgprot_writecombine(prot);
+
+	if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		return pgprot_noncached(prot);
+
+	return prot;
+}
+
+static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
+{
+	return to_ivpu_device(bo->base.dev);
+}
+
+static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
+{
+	if (vpu_addr < bo->vpu_addr)
+		return NULL;
+
+	if (vpu_addr >= (bo->vpu_addr + bo->base.size))
+		return NULL;
+
+	return bo->kvaddr + (vpu_addr - bo->vpu_addr);
+}
+
+static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
+{
+	if (cpu_addr < bo->kvaddr)
+		return 0;
+
+	if (cpu_addr >= (bo->kvaddr + bo->base.size))
+		return 0;
+
+	return bo->vpu_addr + (cpu_addr - bo->kvaddr);
+}
+
+#endif /* __IVPU_GEM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
new file mode 100644
index 000000000000..50a9304ab09c
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_H__
+#define __IVPU_HW_H__
+
+#include "ivpu_drv.h"
+
+struct ivpu_hw_ops {
+	int (*info_init)(struct ivpu_device *vdev);
+	int (*power_up)(struct ivpu_device *vdev);
+	int (*boot_fw)(struct ivpu_device *vdev);
+	int (*power_down)(struct ivpu_device *vdev);
+	bool (*is_idle)(struct ivpu_device *vdev);
+	void (*wdt_disable)(struct ivpu_device *vdev);
+	void (*diagnose_failure)(struct ivpu_device *vdev);
+	u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
+	void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id);
+	u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev);
+	u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev);
+	void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr);
+	void (*irq_clear)(struct ivpu_device *vdev);
+	void (*irq_enable)(struct ivpu_device *vdev);
+	void (*irq_disable)(struct ivpu_device *vdev);
+	irqreturn_t (*irq_handler)(int irq, void *ptr);
+};
+
+struct ivpu_addr_range {
+	resource_size_t start;
+	resource_size_t end;
+};
+
+struct ivpu_hw_info {
+	const struct ivpu_hw_ops *ops;
+	struct {
+		struct ivpu_addr_range global_low;
+		struct ivpu_addr_range global_high;
+		struct ivpu_addr_range user_low;
+		struct ivpu_addr_range user_high;
+		struct ivpu_addr_range global_aliased_pio;
+	} ranges;
+	struct {
+		u8 min_ratio;
+		u8 max_ratio;
+		/*
+		 * Pll ratio for the efficiency frequency. The VPU has optimum
+		 * performance to power ratio at this frequency.
+		 */
+		u8 pn_ratio;
+		u32 profiling_freq;
+	} pll;
+	u32 tile_fuse;
+	u32 sku;
+	u16 config;
+};
+
+extern const struct ivpu_hw_ops ivpu_hw_mtl_ops;
+
+static inline int ivpu_hw_info_init(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->info_init(vdev);
+};
+
+static inline int ivpu_hw_power_up(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, PM, "HW power up\n");
+
+	return vdev->hw->ops->power_up(vdev);
+};
+
+static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->boot_fw(vdev);
+};
+
+static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->is_idle(vdev);
+};
+
+static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, PM, "HW power down\n");
+
+	return vdev->hw->ops->power_down(vdev);
+};
+
+static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->wdt_disable(vdev);
+};
+
+/* Register indirect accesses */
+static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_pll_freq_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_offset_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_size_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_enable_get(vdev);
+};
+
+static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id)
+{
+	vdev->hw->ops->reg_db_set(vdev, db_id);
+};
+
+static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_ipc_rx_addr_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_ipc_rx_count_get(vdev);
+};
+
+static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr);
+};
+
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_clear(vdev);
+};
+
+static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_enable(vdev);
+};
+
+static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_disable(vdev);
+};
+
+static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size)
+{
+	range->start = start;
+	range->end = start + size;
+}
+
+static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
+{
+	return range->end - range->start;
+}
+
+static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->diagnose_failure(vdev);
+}
+
+#endif /* __IVPU_HW_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c
new file mode 100644
index 000000000000..62bfaa9081c4
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_mtl.c
@@ -0,0 +1,1084 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_hw_mtl_reg.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_mmu.h"
+#include "ivpu_pm.h"
+
+#define TILE_FUSE_ENABLE_BOTH	     0x0
+#define TILE_FUSE_ENABLE_UPPER	     0x1
+#define TILE_FUSE_ENABLE_LOWER	     0x2
+
+#define TILE_SKU_BOTH_MTL	     0x3630
+#define TILE_SKU_LOWER_MTL	     0x3631
+#define TILE_SKU_UPPER_MTL	     0x3632
+
+/* Work point configuration values */
+#define WP_CONFIG_1_TILE_5_3_RATIO   0x0101
+#define WP_CONFIG_1_TILE_4_3_RATIO   0x0102
+#define WP_CONFIG_2_TILE_5_3_RATIO   0x0201
+#define WP_CONFIG_2_TILE_4_3_RATIO   0x0202
+#define WP_CONFIG_0_TILE_PLL_OFF     0x0000
+
+#define PLL_REF_CLK_FREQ	     (50 * 1000000)
+#define PLL_SIMULATION_FREQ	     (10 * 1000000)
+#define PLL_RATIO_TO_FREQ(x)	     ((x) * PLL_REF_CLK_FREQ)
+#define PLL_DEFAULT_EPP_VALUE	     0x80
+
+#define TIM_SAFE_ENABLE		     0xf1d0dead
+#define TIM_WATCHDOG_RESET_VALUE     0xffffffff
+
+#define TIMEOUT_US		     (150 * USEC_PER_MSEC)
+#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
+#define PLL_TIMEOUT_US		     (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US		     (500 * USEC_PER_MSEC)
+
+#define ICB_0_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT)))
+
+#define ICB_1_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \
+			(REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT)))
+
+#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK)
+
+#define BUTTRESS_IRQ_MASK ((REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \
+			   (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
+			   (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR)))
+
+#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK)
+#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1)
+
+#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
+				     (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
+
+static char *ivpu_platform_to_str(u32 platform)
+{
+	switch (platform) {
+	case IVPU_PLATFORM_SILICON:
+		return "IVPU_PLATFORM_SILICON";
+	case IVPU_PLATFORM_SIMICS:
+		return "IVPU_PLATFORM_SIMICS";
+	case IVPU_PLATFORM_FPGA:
+		return "IVPU_PLATFORM_FPGA";
+	default:
+		return "Invalid platform";
+	}
+}
+
+static void ivpu_hw_read_platform(struct ivpu_device *vdev)
+{
+	u32 gen_ctrl = REGV_RD32(MTL_VPU_HOST_SS_GEN_CTRL);
+	u32 platform = REG_GET_FLD(MTL_VPU_HOST_SS_GEN_CTRL, PS, gen_ctrl);
+
+	if  (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA)
+		vdev->platform = platform;
+	else
+		vdev->platform = IVPU_PLATFORM_SILICON;
+
+	ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n",
+		 ivpu_platform_to_str(vdev->platform), vdev->platform);
+}
+
+static void ivpu_hw_wa_init(struct ivpu_device *vdev)
+{
+	vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
+	vdev->wa.clear_runtime_mem = false;
+}
+
+static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
+{
+	if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) {
+		vdev->timeout.boot = 100000;
+		vdev->timeout.jsm = 50000;
+		vdev->timeout.tdr = 2000000;
+		vdev->timeout.reschedule_suspend = 1000;
+	} else {
+		vdev->timeout.boot = 1000;
+		vdev->timeout.jsm = 500;
+		vdev->timeout.tdr = 2000;
+		vdev->timeout.reschedule_suspend = 10;
+	}
+}
+
+static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
+{
+	return REGB_POLL_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US);
+}
+
+/* Send KMD initiated workpoint change */
+static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio,
+			     u16 target_ratio, u16 config)
+{
+	int ret;
+	u32 val;
+
+	ret = ivpu_pll_wait_for_cmd_send(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret);
+		return ret;
+	}
+
+	val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD0);
+	val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val);
+	val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val);
+	REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD0, val);
+
+	val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD1);
+	val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val);
+	val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val);
+	REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD1, val);
+
+	val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD2);
+	val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val);
+	REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD2, val);
+
+	val = REGB_RD32(MTL_BUTTRESS_WP_REQ_CMD);
+	val = REG_SET_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, val);
+	REGB_WR32(MTL_BUTTRESS_WP_REQ_CMD, val);
+
+	ret = ivpu_pll_wait_for_cmd_send(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable)
+{
+	u32 exp_val = enable ? 0x1 : 0x0;
+
+	if (IVPU_WA(punit_disabled))
+		return 0;
+
+	return REGB_POLL_FLD(MTL_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US);
+}
+
+static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev)
+{
+	if (IVPU_WA(punit_disabled))
+		return 0;
+
+	return REGB_POLL_FLD(MTL_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US);
+}
+
+static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
+{
+	struct ivpu_hw_info *hw = vdev->hw;
+	u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio;
+	u32 fmin_fuse, fmax_fuse;
+
+	fmin_fuse = REGB_RD32(MTL_BUTTRESS_FMIN_FUSE);
+	fuse_min_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse);
+	fuse_pn_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse);
+
+	fmax_fuse = REGB_RD32(MTL_BUTTRESS_FMAX_FUSE);
+	fuse_max_ratio = REG_GET_FLD(MTL_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse);
+
+	hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio);
+	hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio);
+	hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
+}
+
+static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
+{
+	struct ivpu_hw_info *hw = vdev->hw;
+	u16 target_ratio;
+	u16 config;
+	int ret;
+
+	if (IVPU_WA(punit_disabled)) {
+		ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n",
+			 ivpu_platform_to_str(vdev->platform));
+		return 0;
+	}
+
+	if (enable) {
+		target_ratio = hw->pll.pn_ratio;
+		config = hw->config;
+	} else {
+		target_ratio = 0;
+		config = 0;
+	}
+
+	ivpu_dbg(vdev, PM, "PLL workpoint request: %d Hz\n", PLL_RATIO_TO_FREQ(target_ratio));
+
+	ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config);
+	if (ret) {
+		ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_pll_wait_for_lock(vdev, enable);
+	if (ret) {
+		ivpu_err(vdev, "Timed out waiting for PLL lock\n");
+		return ret;
+	}
+
+	if (enable) {
+		ret = ivpu_pll_wait_for_status_ready(vdev);
+		if (ret) {
+			ivpu_err(vdev, "Timed out waiting for PLL ready status\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int ivpu_pll_enable(struct ivpu_device *vdev)
+{
+	return ivpu_pll_drive(vdev, true);
+}
+
+static int ivpu_pll_disable(struct ivpu_device *vdev)
+{
+	return ivpu_pll_drive(vdev, false);
+}
+
+static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR);
+
+	val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val);
+	val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val);
+	val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, MSS_MAS, val);
+
+	REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_CLR, val);
+}
+
+static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_SET);
+
+	if (enable) {
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val);
+	} else {
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val);
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val);
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val);
+	}
+
+	REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_SET, val);
+}
+
+static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_CLK_SET);
+
+	if (enable) {
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
+	} else {
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val);
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val);
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val);
+	}
+
+	REGV_WR32(MTL_VPU_HOST_SS_CPR_CLK_SET, val);
+}
+
+static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QACCEPTN);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QDENY);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
+{
+	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY);
+
+	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev)
+{
+	ivpu_boot_host_ss_rst_clr_assert(vdev);
+
+	return ivpu_boot_noc_qreqn_check(vdev, 0x0);
+}
+
+static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev)
+{
+	REGV_WR32(MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN, 0x0);
+}
+
+static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable)
+{
+	int ret;
+	u32 val;
+
+	val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN);
+	if (enable)
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+	else
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val);
+	REGV_WR32(MTL_VPU_HOST_SS_NOC_QREQN, val);
+
+	ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
+	if (ret) {
+		ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_boot_noc_qdeny_check(vdev, 0x0);
+	if (ret)
+		ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev)
+{
+	return ivpu_boot_host_ss_axi_drive(vdev, true);
+}
+
+static int ivpu_boot_host_ss_axi_disable(struct ivpu_device *vdev)
+{
+	return ivpu_boot_host_ss_axi_drive(vdev, false);
+}
+
+static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable)
+{
+	int ret;
+	u32 val;
+
+	val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+	if (enable) {
+		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+	} else {
+		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+	}
+	REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val);
+
+	ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
+	if (ret) {
+		ivpu_err(vdev, "Failed qacceptn check: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0);
+	if (ret)
+		ivpu_err(vdev, "Failed qdeny check: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev)
+{
+	return ivpu_boot_host_ss_top_noc_drive(vdev, true);
+}
+
+static int ivpu_boot_host_ss_top_noc_disable(struct ivpu_device *vdev)
+{
+	return ivpu_boot_host_ss_top_noc_drive(vdev, false);
+}
+
+static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0);
+
+	if (enable)
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
+	else
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val);
+
+	REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
+}
+
+static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0);
+
+	if (enable)
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
+	else
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val);
+
+	REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, val);
+}
+
+static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
+{
+	/* FPGA model (UPF) is not power aware, skipped Power Island polling */
+	if (ivpu_is_fpga(vdev))
+		return 0;
+
+	return REGV_POLL_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU,
+			     exp_val, PWR_ISLAND_STATUS_TIMEOUT_US);
+}
+
+static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0);
+
+	if (enable)
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
+	else
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val);
+
+	REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, val);
+}
+
+static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE);
+
+	if (enable)
+		val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
+	else
+		val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val);
+
+	REGV_WR32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, val);
+}
+
+static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
+{
+	ivpu_boot_dpu_active_drive(vdev, false);
+	ivpu_boot_pwr_island_isolation_drive(vdev, true);
+	ivpu_boot_pwr_island_trickle_drive(vdev, false);
+	ivpu_boot_pwr_island_drive(vdev, false);
+
+	return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
+}
+
+static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_boot_pwr_island_trickle_drive(vdev, true);
+	ivpu_boot_pwr_island_drive(vdev, true);
+
+	ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1);
+	if (ret) {
+		ivpu_err(vdev, "Timed out waiting for power island status\n");
+		return ret;
+	}
+
+	ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0);
+	if (ret) {
+		ivpu_err(vdev, "Failed qrenqn check %d\n", ret);
+		return ret;
+	}
+
+	ivpu_boot_host_ss_clk_drive(vdev, true);
+	ivpu_boot_pwr_island_isolation_drive(vdev, false);
+	ivpu_boot_host_ss_rst_drive(vdev, true);
+	ivpu_boot_dpu_active_drive(vdev, true);
+
+	return ret;
+}
+
+static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES);
+
+	val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
+	val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
+	val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
+
+	REGV_WR32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, val);
+}
+
+static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev)
+{
+	u32 val = REGV_RD32(MTL_VPU_HOST_IF_TBU_MMUSSIDV);
+
+	if (ivpu_is_fpga(vdev)) {
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
+	} else {
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_AWMMUSSIDV, val);
+		val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_ARMMUSSIDV, val);
+	}
+
+	REGV_WR32(MTL_VPU_HOST_IF_TBU_MMUSSIDV, val);
+}
+
+static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev)
+{
+	u32 val;
+
+	val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
+	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
+
+	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
+	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+
+	val = vdev->fw->entry_point >> 9;
+	REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val);
+
+	val = REG_SET_FLD(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, DONE, val);
+	REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val);
+
+	ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n",
+		 vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume");
+}
+
+static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable)
+{
+	int ret;
+	u32 val;
+
+	ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+	if (ret) {
+		ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret);
+		return ret;
+	}
+
+	val = REGB_RD32(MTL_BUTTRESS_VPU_D0I3_CONTROL);
+	if (enable)
+		val = REG_SET_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val);
+	else
+		val = REG_CLR_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val);
+	REGB_WR32(MTL_BUTTRESS_VPU_D0I3_CONTROL, val);
+
+	ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_hw_mtl_info_init(struct ivpu_device *vdev)
+{
+	struct ivpu_hw_info *hw = vdev->hw;
+	u32 tile_fuse;
+
+	tile_fuse = REGB_RD32(MTL_BUTTRESS_TILE_FUSE);
+	if (!REG_TEST_FLD(MTL_BUTTRESS_TILE_FUSE, VALID, tile_fuse))
+		ivpu_warn(vdev, "Tile Fuse: Invalid (0x%x)\n", tile_fuse);
+
+	hw->tile_fuse = REG_GET_FLD(MTL_BUTTRESS_TILE_FUSE, SKU, tile_fuse);
+	switch (hw->tile_fuse) {
+	case TILE_FUSE_ENABLE_LOWER:
+		hw->sku = TILE_SKU_LOWER_MTL;
+		hw->config = WP_CONFIG_1_TILE_5_3_RATIO;
+		ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Lower\n");
+		break;
+	case TILE_FUSE_ENABLE_UPPER:
+		hw->sku = TILE_SKU_UPPER_MTL;
+		hw->config = WP_CONFIG_1_TILE_4_3_RATIO;
+		ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Upper\n");
+		break;
+	case TILE_FUSE_ENABLE_BOTH:
+		hw->sku = TILE_SKU_BOTH_MTL;
+		hw->config = WP_CONFIG_2_TILE_5_3_RATIO;
+		ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Both\n");
+		break;
+	default:
+		hw->config = WP_CONFIG_0_TILE_PLL_OFF;
+		ivpu_dbg(vdev, MISC, "Tile Fuse: Disable\n");
+		break;
+	}
+
+	ivpu_pll_init_frequency_ratios(vdev);
+
+	ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M);
+	ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M);
+	ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M);
+	ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G);
+	hw->ranges.global_aliased_pio = hw->ranges.user_low;
+
+	return 0;
+}
+
+static int ivpu_hw_mtl_reset(struct ivpu_device *vdev)
+{
+	int ret;
+	u32 val;
+
+	if (IVPU_WA(punit_disabled))
+		return 0;
+
+	ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret) {
+		ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+		return ret;
+	}
+
+	val = REGB_RD32(MTL_BUTTRESS_VPU_IP_RESET);
+	val = REG_SET_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
+	REGB_WR32(MTL_BUTTRESS_VPU_IP_RESET, val);
+
+	ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+	return ret;
+}
+
+static int ivpu_hw_mtl_d0i3_enable(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_boot_d0i3_drive(vdev, true);
+	if (ret)
+		ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret);
+
+	udelay(5); /* VPU requires 5 us to complete the transition */
+
+	return ret;
+}
+
+static int ivpu_hw_mtl_d0i3_disable(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_boot_d0i3_drive(vdev, false);
+	if (ret)
+		ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_hw_mtl_power_up(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_hw_read_platform(vdev);
+	ivpu_hw_wa_init(vdev);
+	ivpu_hw_timeouts_init(vdev);
+
+	ret = ivpu_hw_mtl_reset(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
+
+	ret = ivpu_hw_mtl_d0i3_disable(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
+
+	ret = ivpu_pll_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to enable PLL: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_boot_host_ss_configure(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to configure host SS: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * The control circuitry for vpu_idle indication logic powers up active.
+	 * To ensure unnecessary low power mode signal from LRT during bring up,
+	 * KMD disables the circuitry prior to bringing up the Main Power island.
+	 */
+	ivpu_boot_vpu_idle_gen_disable(vdev);
+
+	ret = ivpu_boot_pwr_domain_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to enable power domain: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_boot_host_ss_axi_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to enable AXI: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_boot_host_ss_top_noc_enable(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_hw_mtl_boot_fw(struct ivpu_device *vdev)
+{
+	ivpu_boot_no_snoop_enable(vdev);
+	ivpu_boot_tbu_mmu_enable(vdev);
+	ivpu_boot_soc_cpu_boot(vdev);
+
+	return 0;
+}
+
+static bool ivpu_hw_mtl_is_idle(struct ivpu_device *vdev)
+{
+	u32 val;
+
+	if (IVPU_WA(punit_disabled))
+		return true;
+
+	val = REGB_RD32(MTL_BUTTRESS_VPU_STATUS);
+	return REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, READY, val) &&
+	       REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, IDLE, val);
+}
+
+static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev)
+{
+	int ret = 0;
+
+	/* FPGA requires manual clearing of IP_Reset bit by enabling quiescent state */
+	if (ivpu_is_fpga(vdev)) {
+		if (ivpu_boot_host_ss_top_noc_disable(vdev)) {
+			ivpu_err(vdev, "Failed to disable TOP NOC\n");
+			ret = -EIO;
+		}
+
+		if (ivpu_boot_host_ss_axi_disable(vdev)) {
+			ivpu_err(vdev, "Failed to disable AXI\n");
+			ret = -EIO;
+		}
+	}
+
+	if (ivpu_boot_pwr_domain_disable(vdev)) {
+		ivpu_err(vdev, "Failed to disable power domain\n");
+		ret = -EIO;
+	}
+
+	if (ivpu_pll_disable(vdev)) {
+		ivpu_err(vdev, "Failed to disable PLL\n");
+		ret = -EIO;
+	}
+
+	if (ivpu_hw_mtl_d0i3_enable(vdev))
+		ivpu_warn(vdev, "Failed to enable D0I3\n");
+
+	return ret;
+}
+
+static void ivpu_hw_mtl_wdt_disable(struct ivpu_device *vdev)
+{
+	u32 val;
+
+	/* Enable writing and set non-zero WDT value */
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+
+	/* Enable writing and disable watchdog timer */
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0);
+
+	/* Now clear the timeout interrupt */
+	val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG);
+	val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val);
+}
+
+/* Register indirect accesses */
+static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev)
+{
+	u32 pll_curr_ratio;
+
+	pll_curr_ratio = REGB_RD32(MTL_BUTTRESS_CURRENT_PLL);
+	pll_curr_ratio &= MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK;
+
+	if (!ivpu_is_silicon(vdev))
+		return PLL_SIMULATION_FREQ;
+
+	return PLL_RATIO_TO_FREQ(pll_curr_ratio);
+}
+
+static u32 ivpu_hw_mtl_reg_telemetry_offset_get(struct ivpu_device *vdev)
+{
+	return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_OFFSET);
+}
+
+static u32 ivpu_hw_mtl_reg_telemetry_size_get(struct ivpu_device *vdev)
+{
+	return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_SIZE);
+}
+
+static u32 ivpu_hw_mtl_reg_telemetry_enable_get(struct ivpu_device *vdev)
+{
+	return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_ENABLE);
+}
+
+static void ivpu_hw_mtl_reg_db_set(struct ivpu_device *vdev, u32 db_id)
+{
+	u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0;
+	u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET);
+
+	REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+}
+
+static u32 ivpu_hw_mtl_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
+{
+	return REGV_RD32(MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM);
+}
+
+static u32 ivpu_hw_mtl_reg_ipc_rx_count_get(struct ivpu_device *vdev)
+{
+	u32 count = REGV_RD32_SILENT(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT);
+
+	return REG_GET_FLD(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
+}
+
+static void ivpu_hw_mtl_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+}
+
+static void ivpu_hw_mtl_irq_clear(struct ivpu_device *vdev)
+{
+	REGV_WR64(MTL_VPU_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK);
+}
+
+static void ivpu_hw_mtl_irq_enable(struct ivpu_device *vdev)
+{
+	REGV_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK);
+	REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK);
+	REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK);
+	REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0);
+}
+
+static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev)
+{
+	REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1);
+	REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK);
+	REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, 0x0ull);
+	REGB_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0);
+}
+
+static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev)
+{
+	ivpu_err_ratelimited(vdev, "WDT NCE irq\n");
+
+	ivpu_pm_schedule_recovery(vdev);
+}
+
+static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev)
+{
+	ivpu_err_ratelimited(vdev, "WDT MSS irq\n");
+
+	ivpu_hw_wdt_disable(vdev);
+	ivpu_pm_schedule_recovery(vdev);
+}
+
+static void ivpu_hw_mtl_irq_noc_firewall_handler(struct ivpu_device *vdev)
+{
+	ivpu_err_ratelimited(vdev, "NOC Firewall irq\n");
+
+	ivpu_pm_schedule_recovery(vdev);
+}
+
+/* Handler for IRQs from VPU core (irqV) */
+static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq)
+{
+	u32 status = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
+
+	REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
+		ivpu_mmu_irq_evtq_handler(vdev);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
+		ivpu_ipc_irq_handler(vdev);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
+		ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status))
+		ivpu_mmu_irq_gerr_handler(vdev);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status))
+		ivpu_hw_mtl_irq_wdt_mss_handler(vdev);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status))
+		ivpu_hw_mtl_irq_wdt_nce_handler(vdev);
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
+		ivpu_hw_mtl_irq_noc_firewall_handler(vdev);
+
+	return status;
+}
+
+/* Handler for IRQs from Buttress core (irqB) */
+static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq)
+{
+	u32 status = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
+	bool schedule_recovery = false;
+
+	if (status == 0)
+		return 0;
+
+	/* Disable global interrupt before handling local buttress interrupts */
+	REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1);
+
+	if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
+		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(MTL_BUTTRESS_CURRENT_PLL));
+
+	if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) {
+		ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0));
+		REGB_WR32(MTL_BUTTRESS_ATS_ERR_CLEAR, 0x1);
+		schedule_recovery = true;
+	}
+
+	if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) {
+		u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG);
+
+		ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
+			 ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log),
+			 REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log),
+			 REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log));
+		REGB_WR32(MTL_BUTTRESS_UFI_ERR_CLEAR, 0x1);
+		schedule_recovery = true;
+	}
+
+	/*
+	 * Clear local interrupt status by writing 0 to all bits.
+	 * This must be done after interrupts are cleared at the source.
+	 * Writing 1 triggers an interrupt, so we can't perform read update write.
+	 */
+	REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0);
+
+	/* Re-enable global interrupt */
+	REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0);
+
+	if (schedule_recovery)
+		ivpu_pm_schedule_recovery(vdev);
+
+	return status;
+}
+
+static irqreturn_t ivpu_hw_mtl_irq_handler(int irq, void *ptr)
+{
+	struct ivpu_device *vdev = ptr;
+	u32 ret_irqv, ret_irqb;
+
+	ret_irqv = ivpu_hw_mtl_irqv_handler(vdev, irq);
+	ret_irqb = ivpu_hw_mtl_irqb_handler(vdev, irq);
+
+	return IRQ_RETVAL(ret_irqb | ret_irqv);
+}
+
+static void ivpu_hw_mtl_diagnose_failure(struct ivpu_device *vdev)
+{
+	u32 irqv = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
+	u32 irqb = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
+
+	if (ivpu_hw_mtl_reg_ipc_rx_count_get(vdev))
+		ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv))
+		ivpu_err(vdev, "WDT MSS timeout detected\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv))
+		ivpu_err(vdev, "WDT NCE timeout detected\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv))
+		ivpu_err(vdev, "NOC Firewall irq detected\n");
+
+	if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb))
+		ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0));
+
+	if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) {
+		u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG);
+
+		ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx",
+			 ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log),
+			 REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log),
+			 REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log));
+	}
+}
+
+const struct ivpu_hw_ops ivpu_hw_mtl_ops = {
+	.info_init = ivpu_hw_mtl_info_init,
+	.power_up = ivpu_hw_mtl_power_up,
+	.is_idle = ivpu_hw_mtl_is_idle,
+	.power_down = ivpu_hw_mtl_power_down,
+	.boot_fw = ivpu_hw_mtl_boot_fw,
+	.wdt_disable = ivpu_hw_mtl_wdt_disable,
+	.diagnose_failure = ivpu_hw_mtl_diagnose_failure,
+	.reg_pll_freq_get = ivpu_hw_mtl_reg_pll_freq_get,
+	.reg_telemetry_offset_get = ivpu_hw_mtl_reg_telemetry_offset_get,
+	.reg_telemetry_size_get = ivpu_hw_mtl_reg_telemetry_size_get,
+	.reg_telemetry_enable_get = ivpu_hw_mtl_reg_telemetry_enable_get,
+	.reg_db_set = ivpu_hw_mtl_reg_db_set,
+	.reg_ipc_rx_addr_get = ivpu_hw_mtl_reg_ipc_rx_addr_get,
+	.reg_ipc_rx_count_get = ivpu_hw_mtl_reg_ipc_rx_count_get,
+	.reg_ipc_tx_set = ivpu_hw_mtl_reg_ipc_tx_set,
+	.irq_clear = ivpu_hw_mtl_irq_clear,
+	.irq_enable = ivpu_hw_mtl_irq_enable,
+	.irq_disable = ivpu_hw_mtl_irq_disable,
+	.irq_handler = ivpu_hw_mtl_irq_handler,
+};
diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
new file mode 100644
index 000000000000..d83ccfd9a871
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_MTL_REG_H__
+#define __IVPU_HW_MTL_REG_H__
+
+#include <linux/bits.h>
+
+#define MTL_BUTTRESS_INTERRUPT_TYPE					0x00000000u
+
+#define MTL_BUTTRESS_INTERRUPT_STAT					0x00000004u
+#define MTL_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK			BIT_MASK(0)
+#define MTL_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK			BIT_MASK(1)
+#define MTL_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK			BIT_MASK(2)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0					0x00000008u
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK			GENMASK(15, 0)
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK			GENMASK(31, 16)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1					0x0000000cu
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK			GENMASK(15, 0)
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK				GENMASK(31, 16)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD2					0x00000010u
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK			GENMASK(15, 0)
+
+#define MTL_BUTTRESS_WP_REQ_CMD						0x00000014u
+#define MTL_BUTTRESS_WP_REQ_CMD_SEND_MASK				BIT_MASK(0)
+
+#define MTL_BUTTRESS_WP_DOWNLOAD					0x00000018u
+#define MTL_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK			GENMASK(15, 0)
+
+#define MTL_BUTTRESS_CURRENT_PLL					0x0000001cu
+#define MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK				GENMASK(15, 0)
+
+#define MTL_BUTTRESS_PLL_ENABLE						0x00000020u
+
+#define MTL_BUTTRESS_FMIN_FUSE						0x00000024u
+#define MTL_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK				GENMASK(7, 0)
+#define MTL_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK				GENMASK(15, 8)
+
+#define MTL_BUTTRESS_FMAX_FUSE						0x00000028u
+#define MTL_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK				GENMASK(7, 0)
+
+#define MTL_BUTTRESS_TILE_FUSE						0x0000002cu
+#define MTL_BUTTRESS_TILE_FUSE_VALID_MASK				BIT_MASK(0)
+#define MTL_BUTTRESS_TILE_FUSE_SKU_MASK					GENMASK(3, 2)
+
+#define MTL_BUTTRESS_LOCAL_INT_MASK					0x00000030u
+#define MTL_BUTTRESS_GLOBAL_INT_MASK					0x00000034u
+
+#define MTL_BUTTRESS_PLL_STATUS						0x00000040u
+#define MTL_BUTTRESS_PLL_STATUS_LOCK_MASK				BIT_MASK(1)
+
+#define MTL_BUTTRESS_VPU_STATUS						0x00000044u
+#define MTL_BUTTRESS_VPU_STATUS_READY_MASK				BIT_MASK(0)
+#define MTL_BUTTRESS_VPU_STATUS_IDLE_MASK				BIT_MASK(1)
+
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL					0x00000060u
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK			BIT_MASK(0)
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK				BIT_MASK(2)
+
+#define MTL_BUTTRESS_VPU_IP_RESET					0x00000050u
+#define MTL_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK				BIT_MASK(0)
+
+#define MTL_BUTTRESS_VPU_TELEMETRY_OFFSET				0x00000080u
+#define MTL_BUTTRESS_VPU_TELEMETRY_SIZE					0x00000084u
+#define MTL_BUTTRESS_VPU_TELEMETRY_ENABLE				0x00000088u
+
+#define MTL_BUTTRESS_ATS_ERR_LOG_0					0x000000a0u
+#define MTL_BUTTRESS_ATS_ERR_LOG_1					0x000000a4u
+#define MTL_BUTTRESS_ATS_ERR_CLEAR					0x000000a8u
+
+#define MTL_BUTTRESS_UFI_ERR_LOG					0x000000b0u
+#define MTL_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK				GENMASK(11, 0)
+#define MTL_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK				GENMASK(19, 12)
+#define MTL_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK				GENMASK(24, 20)
+
+#define MTL_BUTTRESS_UFI_ERR_CLEAR					0x000000b4u
+
+#define MTL_VPU_HOST_SS_CPR_CLK_SET					0x00000084u
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_CPR_RST_SET					0x00000094u
+#define MTL_VPU_HOST_SS_CPR_RST_SET_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_RST_SET_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_CPR_RST_CLR					0x00000098u
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_HW_VERSION					0x00000108u
+#define MTL_VPU_HOST_SS_HW_VERSION_SOC_REVISION_MASK			GENMASK(7, 0)
+#define MTL_VPU_HOST_SS_HW_VERSION_SOC_NUMBER_MASK			GENMASK(15, 8)
+#define MTL_VPU_HOST_SS_HW_VERSION_VPU_GENERATION_MASK			GENMASK(23, 16)
+
+#define MTL_VPU_HOST_SS_GEN_CTRL					0x00000118u
+#define MTL_VPU_HOST_SS_GEN_CTRL_PS_MASK				GENMASK(31, 29)
+
+#define MTL_VPU_HOST_SS_NOC_QREQN					0x00000154u
+#define MTL_VPU_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_NOC_QACCEPTN					0x00000158u
+#define MTL_VPU_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_NOC_QDENY					0x0000015cu
+#define MTL_VPU_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_TOP_NOC_QREQN						0x00000160u
+#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_TOP_NOC_QACCEPTN					0x00000164u
+#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_TOP_NOC_QDENY						0x00000168u
+#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN					0x00000170u
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK			BIT_MASK(2)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK			BIT_MASK(3)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK			BIT_MASK(4)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK			BIT_MASK(6)
+
+#define MTL_VPU_HOST_SS_ICB_STATUS_0					0x00010210u
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK			BIT_MASK(2)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK			BIT_MASK(3)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK		BIT_MASK(4)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK			BIT_MASK(6)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK			BIT_MASK(7)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK		BIT_MASK(8)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK	BIT_MASK(30)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK	BIT_MASK(31)
+
+#define MTL_VPU_HOST_SS_ICB_STATUS_1					0x00010214u
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK	BIT_MASK(0)
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK	BIT_MASK(1)
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK	BIT_MASK(2)
+
+#define MTL_VPU_HOST_SS_ICB_CLEAR_0					0x00010220u
+#define MTL_VPU_HOST_SS_ICB_CLEAR_1					0x00010224u
+#define MTL_VPU_HOST_SS_ICB_ENABLE_0					0x00010240u
+
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM				0x000200f4u
+
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT				0x000200fcu
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK		GENMASK(7, 0)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK		GENMASK(15, 8)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK		GENMASK(23, 16)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK			GENMASK(31, 24)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0					0x00030020u
+#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK			BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0				0x00030024u
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK			BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0			0x00030028u
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK		BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0				0x0003002cu
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK		BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN				0x00030200u
+#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE					0x00030204u
+#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO				0x00041040u
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK		GENMASK(2, 1)
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK		GENMASK(31, 3)
+
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR				0x00082020u
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK	GENMASK(15, 0)
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK		GENMASK(31, 16)
+
+#define MTL_VPU_HOST_MMU_IDR0						0x00200000u
+#define MTL_VPU_HOST_MMU_IDR1						0x00200004u
+#define MTL_VPU_HOST_MMU_IDR3						0x0020000cu
+#define MTL_VPU_HOST_MMU_IDR5						0x00200014u
+#define MTL_VPU_HOST_MMU_CR0						0x00200020u
+#define MTL_VPU_HOST_MMU_CR0ACK						0x00200024u
+#define MTL_VPU_HOST_MMU_CR1						0x00200028u
+#define MTL_VPU_HOST_MMU_CR2						0x0020002cu
+#define MTL_VPU_HOST_MMU_IRQ_CTRL					0x00200050u
+#define MTL_VPU_HOST_MMU_IRQ_CTRLACK					0x00200054u
+
+#define MTL_VPU_HOST_MMU_GERROR						0x00200060u
+#define MTL_VPU_HOST_MMU_GERROR_CMDQ_MASK				BIT_MASK(0)
+#define MTL_VPU_HOST_MMU_GERROR_EVTQ_ABT_MASK				BIT_MASK(2)
+#define MTL_VPU_HOST_MMU_GERROR_PRIQ_ABT_MASK				BIT_MASK(3)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK			BIT_MASK(4)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK			BIT_MASK(6)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_ABT_MASK				BIT_MASK(7)
+
+#define MTL_VPU_HOST_MMU_GERRORN					0x00200064u
+
+#define MTL_VPU_HOST_MMU_STRTAB_BASE					0x00200080u
+#define MTL_VPU_HOST_MMU_STRTAB_BASE_CFG				0x00200088u
+#define MTL_VPU_HOST_MMU_CMDQ_BASE					0x00200090u
+#define MTL_VPU_HOST_MMU_CMDQ_PROD					0x00200098u
+#define MTL_VPU_HOST_MMU_CMDQ_CONS					0x0020009cu
+#define MTL_VPU_HOST_MMU_EVTQ_BASE					0x002000a0u
+#define MTL_VPU_HOST_MMU_EVTQ_PROD					0x002000a8u
+#define MTL_VPU_HOST_MMU_EVTQ_CONS					0x002000acu
+#define MTL_VPU_HOST_MMU_EVTQ_PROD_SEC					(0x002000a8u + SZ_64K)
+#define MTL_VPU_HOST_MMU_EVTQ_CONS_SEC					(0x002000acu + SZ_64K)
+
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES				0x00360000u
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK	BIT_MASK(0)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK		BIT_MASK(1)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK		BIT_MASK(2)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK	BIT_MASK(3)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK	BIT_MASK(4)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK	BIT_MASK(5)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK	GENMASK(10, 6)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK	GENMASK(15, 11)
+
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV					0x00360004u
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK		BIT_MASK(0)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK		BIT_MASK(1)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK		BIT_MASK(2)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK		BIT_MASK(3)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK		BIT_MASK(4)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK		BIT_MASK(5)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK		BIT_MASK(6)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK		BIT_MASK(7)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK		BIT_MASK(8)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK		BIT_MASK(9)
+
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE					0x04000000u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG				0x04400020u
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK		BIT_MASK(0)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK		BIT_MASK(1)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK		BIT_MASK(2)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK		BIT_MASK(3)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)
+
+#define MTL_VPU_CPU_SS_TIM_WATCHDOG					0x0602009cu
+#define MTL_VPU_CPU_SS_TIM_WDOG_EN					0x060200a4u
+#define MTL_VPU_CPU_SS_TIM_SAFE						0x060200a8u
+#define MTL_VPU_CPU_SS_TIM_IPC_FIFO					0x060200f0u
+
+#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG					0x06021008u
+#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
+
+#define MTL_VPU_CPU_SS_DOORBELL_0					0x06300000u
+#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
+
+#define MTL_VPU_CPU_SS_DOORBELL_1					0x06301000u
+
+#endif /* __IVPU_HW_MTL_REG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h
new file mode 100644
index 000000000000..43c2c0c2d050
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_REG_IO_H__
+#define __IVPU_HW_REG_IO_H__
+
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+
+#include "ivpu_drv.h"
+
+#define REG_POLL_SLEEP_US 50
+#define REG_IO_ERROR      0xffffffff
+
+#define REGB_RD32(reg)          ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__)
+#define REGB_RD32_SILENT(reg)   readl(vdev->regb + (reg))
+#define REGB_RD64(reg)          ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__)
+#define REGB_WR32(reg, val)     ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__)
+#define REGB_WR64(reg, val)     ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__)
+
+#define REGV_RD32(reg)          ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__)
+#define REGV_RD32_SILENT(reg)   readl(vdev->regv + (reg))
+#define REGV_RD64(reg)          ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__)
+#define REGV_WR32(reg, val)     ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__)
+#define REGV_WR64(reg, val)     ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__)
+
+#define REGV_WR32I(reg, stride, index, val) \
+	ivpu_hw_reg_wr32_index(vdev, vdev->regv, (reg), (stride), (index), (val), #reg, __func__)
+
+#define REG_FLD(REG, FLD) \
+	(REG##_##FLD##_MASK)
+#define REG_FLD_NUM(REG, FLD, num) \
+	FIELD_PREP(REG##_##FLD##_MASK, num)
+#define REG_GET_FLD(REG, FLD, val) \
+	FIELD_GET(REG##_##FLD##_MASK, val)
+#define REG_CLR_FLD(REG, FLD, val) \
+	((val) & ~(REG##_##FLD##_MASK))
+#define REG_SET_FLD(REG, FLD, val) \
+	((val) | (REG##_##FLD##_MASK))
+#define REG_SET_FLD_NUM(REG, FLD, num, val) \
+	(((val) & ~(REG##_##FLD##_MASK)) | FIELD_PREP(REG##_##FLD##_MASK, num))
+#define REG_TEST_FLD(REG, FLD, val) \
+	((REG##_##FLD##_MASK) == ((val) & (REG##_##FLD##_MASK)))
+#define REG_TEST_FLD_NUM(REG, FLD, num, val) \
+	((num) == FIELD_GET(REG##_##FLD##_MASK, val))
+
+#define REGB_POLL(reg, var, cond, timeout_us) \
+	read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
+
+#define REGV_POLL(reg, var, cond, timeout_us) \
+	read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
+
+#define REGB_POLL_FLD(reg, fld, val, timeout_us) \
+({ \
+	u32 var; \
+	REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+})
+
+#define REGV_POLL_FLD(reg, fld, val, timeout_us) \
+({ \
+	u32 var; \
+	REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+})
+
+static inline u32
+ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		 const char *name, const char *func)
+{
+	u32 val = readl(base + reg);
+
+	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val);
+	return val;
+}
+
+static inline u64
+ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		 const char *name, const char *func)
+{
+	u64 val = readq(base + reg);
+
+	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val);
+	return val;
+}
+
+static inline void
+ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val,
+		 const char *name, const char *func)
+{
+	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val);
+	writel(val, base + reg);
+}
+
+static inline void
+ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val,
+		 const char *name, const char *func)
+{
+	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val);
+	writeq(val, base + reg);
+}
+
+static inline void
+ivpu_hw_reg_wr32_index(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		       u32 stride, u32 index, u32 val, const char *name,
+		       const char *func)
+{
+	reg += index * stride;
+
+	ivpu_dbg(vdev, REG, "%s WR: %s_%d (0x%08x) <= 0x%08x\n", func, name, index, reg, val);
+	writel(val, base + reg);
+}
+
+#endif /* __IVPU_HW_REG_IO_H__ */
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
new file mode 100644
index 000000000000..3adcfa80fc0e
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/genalloc.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_ipc.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+
+#define IPC_MAX_RX_MSG	128
+#define IS_KTHREAD()	(get_current()->flags & PF_KTHREAD)
+
+struct ivpu_ipc_tx_buf {
+	struct ivpu_ipc_hdr ipc;
+	struct vpu_jsm_msg jsm;
+};
+
+struct ivpu_ipc_rx_msg {
+	struct list_head link;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+};
+
+static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
+			      struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
+{
+	ivpu_dbg(vdev, IPC,
+		 "%s: vpu:0x%x (data_addr:0x%08x, data_size:0x%x, channel:0x%x, src_node:0x%x, dst_node:0x%x, status:0x%x)",
+		 c, vpu_addr, ipc_hdr->data_addr, ipc_hdr->data_size, ipc_hdr->channel,
+		 ipc_hdr->src_node, ipc_hdr->dst_node, ipc_hdr->status);
+}
+
+static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c,
+			      struct vpu_jsm_msg *jsm_msg, u32 vpu_addr)
+{
+	u32 *payload = (u32 *)&jsm_msg->payload;
+
+	ivpu_dbg(vdev, JSM,
+		 "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n",
+		 c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result,
+		 payload[0], payload[1], payload[2], payload[3], payload[4]);
+}
+
+static void
+ivpu_ipc_rx_mark_free(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+		      struct vpu_jsm_msg *jsm_msg)
+{
+	ipc_hdr->status = IVPU_IPC_HDR_FREE;
+	if (jsm_msg)
+		jsm_msg->status = VPU_JSM_MSG_FREE;
+	wmb(); /* Flush WC buffers for message statuses */
+}
+
+static void ivpu_ipc_mem_fini(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	ivpu_bo_free_internal(ipc->mem_rx);
+	ivpu_bo_free_internal(ipc->mem_tx);
+}
+
+static int
+ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		    struct vpu_jsm_msg *req)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_tx_buf *tx_buf;
+	u32 tx_buf_vpu_addr;
+	u32 jsm_vpu_addr;
+
+	tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf));
+	if (!tx_buf_vpu_addr) {
+		ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n",
+			 sizeof(*tx_buf));
+		return -ENOMEM;
+	}
+
+	tx_buf = ivpu_to_cpu_addr(ipc->mem_tx, tx_buf_vpu_addr);
+	if (drm_WARN_ON(&vdev->drm, !tx_buf)) {
+		gen_pool_free(ipc->mm_tx, tx_buf_vpu_addr, sizeof(*tx_buf));
+		return -EIO;
+	}
+
+	jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm);
+
+	if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE)
+		ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n",
+			  tx_buf_vpu_addr);
+
+	if (tx_buf->jsm.status != VPU_JSM_MSG_FREE)
+		ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n",
+			  jsm_vpu_addr);
+
+	memset(tx_buf, 0, sizeof(*tx_buf));
+	tx_buf->ipc.data_addr = jsm_vpu_addr;
+	/* TODO: Set data_size to actual JSM message size, not union of all messages */
+	tx_buf->ipc.data_size = sizeof(*req);
+	tx_buf->ipc.channel = cons->channel;
+	tx_buf->ipc.src_node = 0;
+	tx_buf->ipc.dst_node = 1;
+	tx_buf->ipc.status = IVPU_IPC_HDR_ALLOCATED;
+	tx_buf->jsm.type = req->type;
+	tx_buf->jsm.status = VPU_JSM_MSG_ALLOCATED;
+	tx_buf->jsm.payload = req->payload;
+
+	req->request_id = atomic_inc_return(&ipc->request_id);
+	tx_buf->jsm.request_id = req->request_id;
+	cons->request_id = req->request_id;
+	wmb(); /* Flush WC buffers for IPC, JSM msgs */
+
+	cons->tx_vpu_addr = tx_buf_vpu_addr;
+
+	ivpu_jsm_msg_dump(vdev, "TX", &tx_buf->jsm, jsm_vpu_addr);
+	ivpu_ipc_msg_dump(vdev, "TX", &tx_buf->ipc, tx_buf_vpu_addr);
+
+	return 0;
+}
+
+static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	if (vpu_addr)
+		gen_pool_free(ipc->mm_tx, vpu_addr, sizeof(struct ivpu_ipc_tx_buf));
+}
+
+static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
+}
+
+void
+ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	INIT_LIST_HEAD(&cons->link);
+	cons->channel = channel;
+	cons->tx_vpu_addr = 0;
+	cons->request_id = 0;
+	spin_lock_init(&cons->rx_msg_lock);
+	INIT_LIST_HEAD(&cons->rx_msg_list);
+	init_waitqueue_head(&cons->rx_msg_wq);
+
+	spin_lock_irq(&ipc->cons_list_lock);
+	list_add_tail(&cons->link, &ipc->cons_list);
+	spin_unlock_irq(&ipc->cons_list_lock);
+}
+
+void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+	spin_lock_irq(&ipc->cons_list_lock);
+	list_del(&cons->link);
+	spin_unlock_irq(&ipc->cons_list_lock);
+
+	spin_lock_irq(&cons->rx_msg_lock);
+	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
+		list_del(&rx_msg->link);
+		ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+		atomic_dec(&ipc->rx_msg_count);
+		kfree(rx_msg);
+	}
+	spin_unlock_irq(&cons->rx_msg_lock);
+
+	ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
+}
+
+static int
+ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&ipc->lock);
+	if (ret)
+		return ret;
+
+	if (!ipc->on) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	ret = ivpu_ipc_tx_prepare(vdev, cons, req);
+	if (ret)
+		goto unlock;
+
+	ivpu_ipc_tx(vdev, cons->tx_vpu_addr);
+
+unlock:
+	mutex_unlock(&ipc->lock);
+	return ret;
+}
+
+int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		     struct ivpu_ipc_hdr *ipc_buf,
+		     struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+	int wait_ret, ret = 0;
+
+	wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq,
+						    (IS_KTHREAD() && kthread_should_stop()) ||
+						    !list_empty(&cons->rx_msg_list),
+						    msecs_to_jiffies(timeout_ms));
+
+	if (IS_KTHREAD() && kthread_should_stop())
+		return -EINTR;
+
+	if (wait_ret == 0)
+		return -ETIMEDOUT;
+
+	if (wait_ret < 0)
+		return -ERESTARTSYS;
+
+	spin_lock_irq(&cons->rx_msg_lock);
+	rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
+	if (!rx_msg) {
+		spin_unlock_irq(&cons->rx_msg_lock);
+		return -EAGAIN;
+	}
+	list_del(&rx_msg->link);
+	spin_unlock_irq(&cons->rx_msg_lock);
+
+	if (ipc_buf)
+		memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
+	if (rx_msg->jsm_msg) {
+		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+
+		if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+			ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
+			ret = -EBADMSG;
+		}
+
+		if (ipc_payload)
+			memcpy(ipc_payload, rx_msg->jsm_msg, size);
+	}
+
+	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+	atomic_dec(&ipc->rx_msg_count);
+	kfree(rx_msg);
+
+	return ret;
+}
+
+static int
+ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			       enum vpu_ipc_msg_type expected_resp_type,
+			       struct vpu_jsm_msg *resp, u32 channel,
+			       unsigned long timeout_ms)
+{
+	struct ivpu_ipc_consumer cons;
+	int ret;
+
+	ivpu_ipc_consumer_add(vdev, &cons, channel);
+
+	ret = ivpu_ipc_send(vdev, &cons, req);
+	if (ret) {
+		ivpu_warn(vdev, "IPC send failed: %d\n", ret);
+		goto consumer_del;
+	}
+
+	ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms);
+	if (ret) {
+		ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret);
+		goto consumer_del;
+	}
+
+	if (resp->type != expected_resp_type) {
+		ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type);
+		ret = -EBADE;
+	}
+
+consumer_del:
+	ivpu_ipc_consumer_del(vdev, &cons);
+	return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp_type,
+			  struct vpu_jsm_msg *resp, u32 channel,
+			  unsigned long timeout_ms)
+{
+	struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
+	struct vpu_jsm_msg hb_resp;
+	int ret, hb_ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
+	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
+					     channel, timeout_ms);
+	if (ret != -ETIMEDOUT)
+		goto rpm_put;
+
+	hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
+						&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
+						vdev->timeout.jsm);
+	if (hb_ret == -ETIMEDOUT) {
+		ivpu_hw_diagnose_failure(vdev);
+		ivpu_pm_schedule_recovery(vdev);
+	}
+
+rpm_put:
+	ivpu_rpm_put(vdev);
+	return ret;
+}
+
+static bool
+ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	if (cons->channel != ipc_hdr->channel)
+		return false;
+
+	if (!jsm_msg || jsm_msg->request_id == cons->request_id)
+		return true;
+
+	return false;
+}
+
+static void
+ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		  struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+	unsigned long flags;
+
+	lockdep_assert_held(&ipc->cons_list_lock);
+
+	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+	if (!rx_msg) {
+		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		return;
+	}
+
+	atomic_inc(&ipc->rx_msg_count);
+
+	rx_msg->ipc_hdr = ipc_hdr;
+	rx_msg->jsm_msg = jsm_msg;
+
+	spin_lock_irqsave(&cons->rx_msg_lock, flags);
+	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+	spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
+
+	wake_up(&cons->rx_msg_wq);
+}
+
+int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_consumer *cons;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+	unsigned long flags;
+	bool dispatched;
+	u32 vpu_addr;
+
+	/*
+	 * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt.
+	 * Without purge IPC FIFO to 0 next IPC interrupts won't be generated.
+	 */
+	while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
+		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
+		if (vpu_addr == REG_IO_ERROR) {
+			ivpu_err(vdev, "Failed to read IPC rx addr register\n");
+			return -EIO;
+		}
+
+		ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
+		if (!ipc_hdr) {
+			ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr);
+			continue;
+		}
+		ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr);
+
+		jsm_msg = NULL;
+		if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) {
+			jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr);
+			if (!jsm_msg) {
+				ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr);
+				ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL);
+				continue;
+			}
+			ivpu_jsm_msg_dump(vdev, "RX", jsm_msg, ipc_hdr->data_addr);
+		}
+
+		if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) {
+			ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG);
+			ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+			continue;
+		}
+
+		dispatched = false;
+		spin_lock_irqsave(&ipc->cons_list_lock, flags);
+		list_for_each_entry(cons, &ipc->cons_list, link) {
+			if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
+				ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+				dispatched = true;
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+
+		if (!dispatched) {
+			ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
+			ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		}
+	}
+
+	return 0;
+}
+
+int ivpu_ipc_init(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	int ret = -ENOMEM;
+
+	ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+	if (!ipc->mem_tx)
+		return ret;
+
+	ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+	if (!ipc->mem_rx)
+		goto err_free_tx;
+
+	ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT),
+					  -1, "TX_IPC_JSM");
+	if (IS_ERR(ipc->mm_tx)) {
+		ret = PTR_ERR(ipc->mm_tx);
+		ivpu_err(vdev, "Failed to create gen pool, %pe\n", ipc->mm_tx);
+		goto err_free_rx;
+	}
+
+	ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1);
+	if (ret) {
+		ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret);
+		goto err_free_rx;
+	}
+
+	INIT_LIST_HEAD(&ipc->cons_list);
+	spin_lock_init(&ipc->cons_list_lock);
+	drmm_mutex_init(&vdev->drm, &ipc->lock);
+
+	ivpu_ipc_reset(vdev);
+	return 0;
+
+err_free_rx:
+	ivpu_bo_free_internal(ipc->mem_rx);
+err_free_tx:
+	ivpu_bo_free_internal(ipc->mem_tx);
+	return ret;
+}
+
+void ivpu_ipc_fini(struct ivpu_device *vdev)
+{
+	ivpu_ipc_mem_fini(vdev);
+}
+
+void ivpu_ipc_enable(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	mutex_lock(&ipc->lock);
+	ipc->on = true;
+	mutex_unlock(&ipc->lock);
+}
+
+void ivpu_ipc_disable(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_consumer *cons, *c;
+	unsigned long flags;
+
+	mutex_lock(&ipc->lock);
+	ipc->on = false;
+	mutex_unlock(&ipc->lock);
+
+	spin_lock_irqsave(&ipc->cons_list_lock, flags);
+	list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+		wake_up(&cons->rx_msg_wq);
+	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+}
+
+void ivpu_ipc_reset(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	mutex_lock(&ipc->lock);
+
+	memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size);
+	memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size);
+	wmb(); /* Flush WC buffers for TX and RX rings */
+
+	mutex_unlock(&ipc->lock);
+}
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
new file mode 100644
index 000000000000..9838202ecfad
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_IPC_H__
+#define __IVPU_IPC_H__
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+
+#include "vpu_jsm_api.h"
+
+struct ivpu_bo;
+
+/* VPU FW boot notification */
+#define IVPU_IPC_CHAN_BOOT_MSG		0x3ff
+#define IVPU_IPC_BOOT_MSG_DATA_ADDR	0x424f4f54
+
+/* The alignment to be used for IPC Buffers and IPC Data. */
+#define IVPU_IPC_ALIGNMENT	   64
+
+#define IVPU_IPC_HDR_FREE	   0
+#define IVPU_IPC_HDR_ALLOCATED	   0
+
+/**
+ * struct ivpu_ipc_hdr - The IPC message header structure, exchanged
+ * with the VPU device firmware.
+ * @data_addr: The VPU address of the payload (JSM message)
+ * @data_size: The size of the payload.
+ * @channel: The channel used.
+ * @src_node: The Node ID of the sender.
+ * @dst_node: The Node ID of the intended receiver.
+ * @status: IPC buffer usage status
+ */
+struct ivpu_ipc_hdr {
+	u32 data_addr;
+	u32 data_size;
+	u16 channel;
+	u8 src_node;
+	u8 dst_node;
+	u8 status;
+} __packed __aligned(IVPU_IPC_ALIGNMENT);
+
+struct ivpu_ipc_consumer {
+	struct list_head link;
+	u32 channel;
+	u32 tx_vpu_addr;
+	u32 request_id;
+
+	spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+	struct list_head rx_msg_list;
+	wait_queue_head_t rx_msg_wq;
+};
+
+struct ivpu_ipc_info {
+	struct gen_pool *mm_tx;
+	struct ivpu_bo *mem_tx;
+	struct ivpu_bo *mem_rx;
+
+	atomic_t rx_msg_count;
+
+	spinlock_t cons_list_lock; /* Protects cons_list */
+	struct list_head cons_list;
+
+	atomic_t request_id;
+	struct mutex lock; /* Lock on status */
+	bool on;
+};
+
+int ivpu_ipc_init(struct ivpu_device *vdev);
+void ivpu_ipc_fini(struct ivpu_device *vdev);
+
+void ivpu_ipc_enable(struct ivpu_device *vdev);
+void ivpu_ipc_disable(struct ivpu_device *vdev);
+void ivpu_ipc_reset(struct ivpu_device *vdev);
+
+int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			   u32 channel);
+void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
+
+int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+		     unsigned long timeout_ms);
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp_type,
+			  struct vpu_jsm_msg *resp, u32 channel,
+			  unsigned long timeout_ms);
+
+#endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
new file mode 100644
index 000000000000..94068aedf97c
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <drm/drm_file.h>
+
+#include <linux/bitfield.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <uapi/drm/ivpu_accel.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+
+#define CMD_BUF_IDX	     0
+#define JOB_ID_JOB_MASK	     GENMASK(7, 0)
+#define JOB_ID_CONTEXT_MASK  GENMASK(31, 8)
+#define JOB_MAX_BUFFER_COUNT 65535
+
+static unsigned int ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
+static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
+{
+	ivpu_hw_reg_db_set(vdev, cmdq->db_id);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct vpu_job_queue_header *jobq_header;
+	struct ivpu_cmdq *cmdq;
+
+	cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
+	if (!cmdq)
+		return NULL;
+
+	cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC);
+	if (!cmdq->mem)
+		goto cmdq_free;
+
+	cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
+	cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) /
+				  sizeof(struct vpu_job_queue_entry));
+
+	cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr;
+	jobq_header = &cmdq->jobq->header;
+	jobq_header->engine_idx = engine;
+	jobq_header->head = 0;
+	jobq_header->tail = 0;
+	wmb(); /* Flush WC buffer for jobq->header */
+
+	return cmdq;
+
+cmdq_free:
+	kfree(cmdq);
+	return NULL;
+}
+
+static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+	if (!cmdq)
+		return;
+
+	ivpu_bo_free_internal(cmdq->mem);
+	kfree(cmdq);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+	int ret;
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (!cmdq) {
+		cmdq = ivpu_cmdq_alloc(file_priv, engine);
+		if (!cmdq)
+			return NULL;
+		file_priv->cmdq[engine] = cmdq;
+	}
+
+	if (cmdq->db_registered)
+		return cmdq;
+
+	ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
+				   cmdq->mem->vpu_addr, cmdq->mem->base.size);
+	if (ret)
+		return NULL;
+
+	cmdq->db_registered = true;
+
+	return cmdq;
+}
+
+static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		file_priv->cmdq[engine] = NULL;
+		if (cmdq->db_registered)
+			ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id);
+
+		ivpu_cmdq_free(file_priv, cmdq);
+	}
+}
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_release_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+/*
+ * Mark the doorbell as unregistered and reset job queue pointers.
+ * This function needs to be called when the VPU hardware is restarted
+ * and FW looses job queue state. The next time job queue is used it
+ * will be registered again.
+ */
+static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		cmdq->db_registered = false;
+		cmdq->jobq->header.head = 0;
+		cmdq->jobq->header.tail = 0;
+		wmb(); /* Flush WC buffer for jobq header */
+	}
+}
+
+static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_reset_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
+{
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		ivpu_cmdq_reset_all(file_priv);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+	struct vpu_job_queue_header *header = &cmdq->jobq->header;
+	struct vpu_job_queue_entry *entry;
+	u32 tail = READ_ONCE(header->tail);
+	u32 next_entry = (tail + 1) % cmdq->entry_count;
+
+	/* Check if there is space left in job queue */
+	if (next_entry == header->head) {
+		ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
+			 job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+		return -EBUSY;
+	}
+
+	entry = &cmdq->jobq->job[tail];
+	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
+	entry->job_id = job->job_id;
+	entry->flags = 0;
+	wmb(); /* Ensure that tail is updated after filling entry */
+	header->tail = next_entry;
+	wmb(); /* Flush WC buffer for jobq header */
+
+	return 0;
+}
+
+struct ivpu_fence {
+	struct dma_fence base;
+	spinlock_t lock; /* protects base */
+	struct ivpu_device *vdev;
+};
+
+static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct ivpu_fence, base);
+}
+
+static const char *ivpu_fence_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct ivpu_fence *ivpu_fence = to_vpu_fence(fence);
+
+	return dev_name(ivpu_fence->vdev->drm.dev);
+}
+
+static const struct dma_fence_ops ivpu_fence_ops = {
+	.get_driver_name = ivpu_fence_get_driver_name,
+	.get_timeline_name = ivpu_fence_get_timeline_name,
+};
+
+static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
+{
+	struct ivpu_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence->vdev = vdev;
+	spin_lock_init(&fence->lock);
+	dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1);
+
+	return &fence->base;
+}
+
+static void job_get(struct ivpu_job *job, struct ivpu_job **link)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	kref_get(&job->ref);
+	*link = job;
+
+	ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+}
+
+static void job_release(struct kref *ref)
+{
+	struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
+	struct ivpu_device *vdev = job->vdev;
+	u32 i;
+
+	for (i = 0; i < job->bo_count; i++)
+		if (job->bos[i])
+			drm_gem_object_put(&job->bos[i]->base);
+
+	dma_fence_put(job->done_fence);
+	ivpu_file_priv_put(&job->file_priv);
+
+	ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
+	kfree(job);
+
+	/* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */
+	ivpu_rpm_put(vdev);
+}
+
+static void job_put(struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+	kref_put(&job->ref, job_release);
+}
+
+static struct ivpu_job *
+ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_job *job;
+	size_t buf_size;
+	int ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return NULL;
+
+	buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *);
+	job = kzalloc(buf_size, GFP_KERNEL);
+	if (!job)
+		goto err_rpm_put;
+
+	kref_init(&job->ref);
+
+	job->vdev = vdev;
+	job->engine_idx = engine_idx;
+	job->bo_count = bo_count;
+	job->done_fence = ivpu_fence_create(vdev);
+	if (!job->done_fence) {
+		ivpu_warn_ratelimited(vdev, "Failed to create a fence\n");
+		goto err_free_job;
+	}
+
+	job->file_priv = ivpu_file_priv_get(file_priv);
+
+	ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
+
+	return job;
+
+err_free_job:
+	kfree(job);
+err_rpm_put:
+	ivpu_rpm_put(vdev);
+	return NULL;
+}
+
+static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+	struct ivpu_job *job;
+
+	job = xa_erase(&vdev->submitted_jobs_xa, job_id);
+	if (!job)
+		return -ENOENT;
+
+	if (job->file_priv->has_mmu_faults)
+		job_status = VPU_JSM_STATUS_ABORTED;
+
+	job->bos[CMD_BUF_IDX]->job_status = job_status;
+	dma_fence_signal(job->done_fence);
+
+	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
+		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+
+	job_put(job);
+	return 0;
+}
+
+static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
+{
+	struct vpu_ipc_msg_payload_job_done *payload;
+	struct vpu_jsm_msg *job_ret_msg = msg;
+	int ret;
+
+	payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
+
+	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+	if (ret)
+		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+}
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev)
+{
+	struct ivpu_job *job;
+	unsigned long id;
+
+	xa_for_each(&vdev->submitted_jobs_xa, id, job)
+		ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
+}
+
+static int ivpu_direct_job_submission(struct ivpu_job *job)
+{
+	struct ivpu_file_priv *file_priv = job->file_priv;
+	struct ivpu_device *vdev = job->vdev;
+	struct xa_limit job_id_range;
+	struct ivpu_cmdq *cmdq;
+	int ret;
+
+	mutex_lock(&file_priv->lock);
+
+	cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
+	if (!cmdq) {
+		ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
+			  file_priv->ctx.id, job->engine_idx);
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+	job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
+
+	job_get(job, &job);
+	ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+	if (ret) {
+		ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
+		goto err_job_put;
+	}
+
+	ret = ivpu_cmdq_push_job(cmdq, job);
+	if (ret)
+		goto err_xa_erase;
+
+	ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
+		 job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
+		 job->engine_idx, cmdq->jobq->header.tail);
+
+	if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+		ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+		cmdq->jobq->header.head = cmdq->jobq->header.tail;
+		wmb(); /* Flush WC buffer for jobq header */
+	} else {
+		ivpu_cmdq_ring_db(vdev, cmdq);
+	}
+
+	mutex_unlock(&file_priv->lock);
+	return 0;
+
+err_xa_erase:
+	xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_job_put:
+	job_put(job);
+err_unlock:
+	mutex_unlock(&file_priv->lock);
+	return ret;
+}
+
+static int
+ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
+				u32 buf_count, u32 commands_offset)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ww_acquire_ctx acquire_ctx;
+	struct ivpu_bo *bo;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < buf_count; i++) {
+		struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]);
+
+		if (!obj)
+			return -ENOENT;
+
+		job->bos[i] = to_ivpu_bo(obj);
+
+		ret = ivpu_bo_pin(job->bos[i]);
+		if (ret)
+			return ret;
+	}
+
+	bo = job->bos[CMD_BUF_IDX];
+	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+		ivpu_warn(vdev, "Buffer is already in use\n");
+		return -EBUSY;
+	}
+
+	if (commands_offset >= bo->base.size) {
+		ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
+		return -EINVAL;
+	}
+
+	job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
+
+	ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
+					&acquire_ctx);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < buf_count; i++) {
+		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+		if (ret) {
+			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+			goto unlock_reservations;
+		}
+	}
+
+	for (i = 0; i < buf_count; i++)
+		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+
+unlock_reservations:
+	drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
+
+	wmb(); /* Flush write combining buffers */
+
+	return ret;
+}
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	int ret = 0;
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_submit *params = data;
+	struct ivpu_job *job;
+	u32 *buf_handles;
+
+	if (params->engine > DRM_IVPU_ENGINE_COPY)
+		return -EINVAL;
+
+	if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(params->commands_offset, 8))
+		return -EINVAL;
+
+	if (!file_priv->ctx.id)
+		return -EINVAL;
+
+	if (file_priv->has_mmu_faults)
+		return -EBADFD;
+
+	buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL);
+	if (!buf_handles)
+		return -ENOMEM;
+
+	ret = copy_from_user(buf_handles,
+			     (void __user *)params->buffers_ptr,
+			     params->buffer_count * sizeof(u32));
+	if (ret) {
+		ret = -EFAULT;
+		goto free_handles;
+	}
+
+	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
+		 file_priv->ctx.id, params->buffer_count);
+
+	job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
+	if (!job) {
+		ivpu_err(vdev, "Failed to create job\n");
+		ret = -ENOMEM;
+		goto free_handles;
+	}
+
+	ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
+					      params->commands_offset);
+	if (ret) {
+		ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
+		goto job_put;
+	}
+
+	ret = ivpu_direct_job_submission(job);
+	if (ret) {
+		dma_fence_signal(job->done_fence);
+		ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
+	}
+
+job_put:
+	job_put(job);
+free_handles:
+	kfree(buf_handles);
+
+	return ret;
+}
+
+static int ivpu_job_done_thread(void *arg)
+{
+	struct ivpu_device *vdev = (struct ivpu_device *)arg;
+	struct ivpu_ipc_consumer cons;
+	struct vpu_jsm_msg jsm_msg;
+	bool jobs_submitted;
+	unsigned int timeout;
+	int ret;
+
+	ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
+
+	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
+
+	while (!kthread_should_stop()) {
+		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
+		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
+		if (!ret) {
+			ivpu_job_done_message(vdev, &jsm_msg);
+		} else if (ret == -ETIMEDOUT) {
+			if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
+				ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
+				ivpu_hw_diagnose_failure(vdev);
+				ivpu_pm_schedule_recovery(vdev);
+			}
+		}
+	}
+
+	ivpu_ipc_consumer_del(vdev, &cons);
+
+	ivpu_jobs_abort_all(vdev);
+
+	ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
+	return 0;
+}
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
+	if (IS_ERR(thread)) {
+		ivpu_err(vdev, "Failed to start job completion thread\n");
+		return -EIO;
+	}
+
+	get_task_struct(thread);
+	wake_up_process(thread);
+
+	vdev->job_done_thread = thread;
+
+	return 0;
+}
+
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+{
+	kthread_stop(vdev->job_done_thread);
+	put_task_struct(vdev->job_done_thread);
+}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
new file mode 100644
index 000000000000..aa1f0b9479b0
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_JOB_H__
+#define __IVPU_JOB_H__
+
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include "ivpu_gem.h"
+
+struct ivpu_device;
+struct ivpu_file_priv;
+
+/**
+ * struct ivpu_cmdq - Object representing device queue used to send jobs.
+ * @jobq:	   Pointer to job queue memory shared with the device
+ * @mem:           Memory allocated for the job queue, shared with device
+ * @entry_count    Number of job entries in the queue
+ * @db_id:	   Doorbell assigned to this job queue
+ * @db_registered: True if doorbell is registered in device
+ */
+struct ivpu_cmdq {
+	struct vpu_job_queue *jobq;
+	struct ivpu_bo *mem;
+	u32 entry_count;
+	u32 db_id;
+	bool db_registered;
+};
+
+/**
+ * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer.
+ * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW.
+ * This is a unit of execution, and be tracked by the job_id for
+ * any status reporting from VPU FW through IPC JOB RET/DONE message.
+ * @file_priv:		  The client that submitted this job
+ * @job_id:		  Job ID for KMD tracking and job status reporting from VPU FW
+ * @status:		  Status of the Job from IPC JOB RET/DONE message
+ * @batch_buffer:	  CPU vaddr points to the batch buffer memory allocated for the job
+ * @submit_status_offset: Offset within batch buffer where job completion handler
+			  will update the job status
+ */
+struct ivpu_job {
+	struct kref ref;
+	struct ivpu_device *vdev;
+	struct ivpu_file_priv *file_priv;
+	struct dma_fence *done_fence;
+	u64 cmd_buf_vpu_addr;
+	u32 job_id;
+	u32 engine_idx;
+	size_t bo_count;
+	struct ivpu_bo *bos[];
+};
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev);
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev);
+
+#endif /* __IVPU_JOB_H__ */
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
new file mode 100644
index 000000000000..831bfd2b2d39
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include "ivpu_drv.h"
+#include "ivpu_ipc.h"
+#include "ivpu_jsm_msg.h"
+
+int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
+			 u64 jobq_base, u32 jobq_size)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_REGISTER_DB };
+	struct vpu_jsm_msg resp;
+	int ret = 0;
+
+	req.payload.register_db.db_idx = db_id;
+	req.payload.register_db.jobq_base = jobq_base;
+	req.payload.register_db.jobq_size = jobq_size;
+	req.payload.register_db.host_ssid = ctx_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id);
+
+	return 0;
+}
+
+int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_UNREGISTER_DB };
+	struct vpu_jsm_msg resp;
+	int ret = 0;
+
+	req.payload.unregister_db.db_idx = db_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id);
+
+	return 0;
+}
+
+int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.query_engine_hb.engine_idx = engine;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret);
+		return ret;
+	}
+
+	*heartbeat = resp.payload.query_engine_hb_done.heartbeat;
+	return ret;
+}
+
+int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_RESET };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.engine_reset.engine_idx = engine;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_PREEMPT };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.engine_preempt.engine_idx = engine;
+	req.payload.engine_preempt.preempt_id = preempt_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DYNDBG_CONTROL };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
+		return -ENOMEM;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask,
+				  u64 *trace_hw_component_mask)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_GET_CAPABILITY };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret);
+		return ret;
+	}
+
+	*trace_destination_mask = resp.payload.trace_capability.trace_destination_mask;
+	*trace_hw_component_mask = resp.payload.trace_capability.trace_hw_component_mask;
+
+	return ret;
+}
+
+int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
+			      u64 trace_hw_component_mask)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_SET_CONFIG };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	req.payload.trace_config.trace_level = trace_level;
+	req.payload.trace_config.trace_destination_mask = trace_destination_mask;
+	req.payload.trace_config.trace_hw_component_mask = trace_hw_component_mask;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_warn(vdev, "Failed to set config: %d\n", ret);
+
+	return ret;
+}
+
+int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE };
+	struct vpu_jsm_msg resp;
+
+	req.payload.ssid_release.host_ssid = host_ssid;
+
+	return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
+				     VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
new file mode 100644
index 000000000000..ab50d7b017c1
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_JSM_MSG_H__
+#define __IVPU_JSM_MSG_H__
+
+#include "vpu_jsm_api.h"
+
+int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
+			 u64 jobq_base, u32 jobq_size);
+int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id);
+int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat);
+int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine);
+int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id);
+int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size);
+int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask,
+				  u64 *trace_hw_component_mask);
+int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
+			      u64 trace_hw_component_mask);
+int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
+#endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
new file mode 100644
index 000000000000..694e978aba66
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/highmem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw_mtl_reg.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+#include "ivpu_pm.h"
+
+#define IVPU_MMU_IDR0_REF		0x080f3e0f
+#define IVPU_MMU_IDR0_REF_SIMICS	0x080f3e1f
+#define IVPU_MMU_IDR1_REF		0x0e739d18
+#define IVPU_MMU_IDR3_REF		0x0000003c
+#define IVPU_MMU_IDR5_REF		0x00040070
+#define IVPU_MMU_IDR5_REF_SIMICS	0x00000075
+#define IVPU_MMU_IDR5_REF_FPGA		0x00800075
+
+#define IVPU_MMU_CDTAB_ENT_SIZE		64
+#define IVPU_MMU_CDTAB_ENT_COUNT_LOG2	8 /* 256 entries */
+#define IVPU_MMU_CDTAB_ENT_COUNT	((u32)1 << IVPU_MMU_CDTAB_ENT_COUNT_LOG2)
+
+#define IVPU_MMU_STREAM_ID0		0
+#define IVPU_MMU_STREAM_ID3		3
+
+#define IVPU_MMU_STRTAB_ENT_SIZE	64
+#define IVPU_MMU_STRTAB_ENT_COUNT	4
+#define IVPU_MMU_STRTAB_CFG_LOG2SIZE	2
+#define IVPU_MMU_STRTAB_CFG		IVPU_MMU_STRTAB_CFG_LOG2SIZE
+
+#define IVPU_MMU_Q_COUNT_LOG2		4 /* 16 entries */
+#define IVPU_MMU_Q_COUNT		((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
+#define IVPU_MMU_Q_WRAP_BIT		(IVPU_MMU_Q_COUNT << 1)
+#define IVPU_MMU_Q_WRAP_MASK		(IVPU_MMU_Q_WRAP_BIT - 1)
+#define IVPU_MMU_Q_IDX_MASK		(IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_IDX(val)		((val) & IVPU_MMU_Q_IDX_MASK)
+
+#define IVPU_MMU_CMDQ_CMD_SIZE		16
+#define IVPU_MMU_CMDQ_SIZE		(IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
+
+#define IVPU_MMU_EVTQ_CMD_SIZE		32
+#define IVPU_MMU_EVTQ_SIZE		(IVPU_MMU_Q_COUNT * IVPU_MMU_EVTQ_CMD_SIZE)
+
+#define IVPU_MMU_CMD_OPCODE		GENMASK(7, 0)
+
+#define IVPU_MMU_CMD_SYNC_0_CS		GENMASK(13, 12)
+#define IVPU_MMU_CMD_SYNC_0_MSH		GENMASK(23, 22)
+#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR	GENMASK(27, 24)
+#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR	GENMASK(27, 24)
+#define IVPU_MMU_CMD_SYNC_0_MSI_DATA	GENMASK(63, 32)
+
+#define IVPU_MMU_CMD_CFGI_0_SSEC	BIT(10)
+#define IVPU_MMU_CMD_CFGI_0_SSV		BIT(11)
+#define IVPU_MMU_CMD_CFGI_0_SSID	GENMASK(31, 12)
+#define IVPU_MMU_CMD_CFGI_0_SID		GENMASK(63, 32)
+#define IVPU_MMU_CMD_CFGI_1_RANGE	GENMASK(4, 0)
+
+#define IVPU_MMU_CMD_TLBI_0_ASID	GENMASK(63, 48)
+#define IVPU_MMU_CMD_TLBI_0_VMID	GENMASK(47, 32)
+
+#define CMD_PREFETCH_CFG		0x1
+#define CMD_CFGI_STE			0x3
+#define CMD_CFGI_ALL			0x4
+#define CMD_CFGI_CD			0x5
+#define CMD_CFGI_CD_ALL			0x6
+#define CMD_TLBI_NH_ASID		0x11
+#define CMD_TLBI_EL2_ALL		0x20
+#define CMD_TLBI_NSNH_ALL		0x30
+#define CMD_SYNC			0x46
+
+#define IVPU_MMU_EVT_F_UUT		0x01
+#define IVPU_MMU_EVT_C_BAD_STREAMID	0x02
+#define IVPU_MMU_EVT_F_STE_FETCH	0x03
+#define IVPU_MMU_EVT_C_BAD_STE		0x04
+#define IVPU_MMU_EVT_F_BAD_ATS_TREQ	0x05
+#define IVPU_MMU_EVT_F_STREAM_DISABLED	0x06
+#define IVPU_MMU_EVT_F_TRANSL_FORBIDDEN	0x07
+#define IVPU_MMU_EVT_C_BAD_SUBSTREAMID	0x08
+#define IVPU_MMU_EVT_F_CD_FETCH		0x09
+#define IVPU_MMU_EVT_C_BAD_CD		0x0a
+#define IVPU_MMU_EVT_F_WALK_EABT	0x0b
+#define IVPU_MMU_EVT_F_TRANSLATION	0x10
+#define IVPU_MMU_EVT_F_ADDR_SIZE	0x11
+#define IVPU_MMU_EVT_F_ACCESS		0x12
+#define IVPU_MMU_EVT_F_PERMISSION	0x13
+#define IVPU_MMU_EVT_F_TLB_CONFLICT	0x20
+#define IVPU_MMU_EVT_F_CFG_CONFLICT	0x21
+#define IVPU_MMU_EVT_E_PAGE_REQUEST	0x24
+#define IVPU_MMU_EVT_F_VMS_FETCH	0x25
+
+#define IVPU_MMU_EVT_OP_MASK		GENMASK_ULL(7, 0)
+#define IVPU_MMU_EVT_SSID_MASK		GENMASK_ULL(31, 12)
+
+#define IVPU_MMU_Q_BASE_RWA		BIT(62)
+#define IVPU_MMU_Q_BASE_ADDR_MASK	GENMASK_ULL(51, 5)
+#define IVPU_MMU_STRTAB_BASE_RA		BIT(62)
+#define IVPU_MMU_STRTAB_BASE_ADDR_MASK	GENMASK_ULL(51, 6)
+
+#define IVPU_MMU_IRQ_EVTQ_EN		BIT(2)
+#define IVPU_MMU_IRQ_GERROR_EN		BIT(0)
+
+#define IVPU_MMU_CR0_ATSCHK		BIT(4)
+#define IVPU_MMU_CR0_CMDQEN		BIT(3)
+#define IVPU_MMU_CR0_EVTQEN		BIT(2)
+#define IVPU_MMU_CR0_PRIQEN		BIT(1)
+#define IVPU_MMU_CR0_SMMUEN		BIT(0)
+
+#define IVPU_MMU_CR1_TABLE_SH		GENMASK(11, 10)
+#define IVPU_MMU_CR1_TABLE_OC		GENMASK(9, 8)
+#define IVPU_MMU_CR1_TABLE_IC		GENMASK(7, 6)
+#define IVPU_MMU_CR1_QUEUE_SH		GENMASK(5, 4)
+#define IVPU_MMU_CR1_QUEUE_OC		GENMASK(3, 2)
+#define IVPU_MMU_CR1_QUEUE_IC		GENMASK(1, 0)
+#define IVPU_MMU_CACHE_NC		0
+#define IVPU_MMU_CACHE_WB		1
+#define IVPU_MMU_CACHE_WT		2
+#define IVPU_MMU_SH_NSH			0
+#define IVPU_MMU_SH_OSH			2
+#define IVPU_MMU_SH_ISH			3
+
+#define IVPU_MMU_CMDQ_OP		GENMASK_ULL(7, 0)
+
+#define IVPU_MMU_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
+#define IVPU_MMU_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
+#define IVPU_MMU_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
+#define IVPU_MMU_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
+#define IVPU_MMU_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
+#define IVPU_MMU_CD_0_TCR_EPD0		BIT_ULL(14)
+#define IVPU_MMU_CD_0_TCR_EPD1		BIT_ULL(30)
+#define IVPU_MMU_CD_0_ENDI		BIT(15)
+#define IVPU_MMU_CD_0_V			BIT(31)
+#define IVPU_MMU_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
+#define IVPU_MMU_CD_0_TCR_TBI0		BIT_ULL(38)
+#define IVPU_MMU_CD_0_AA64		BIT(41)
+#define IVPU_MMU_CD_0_S			BIT(44)
+#define IVPU_MMU_CD_0_R			BIT(45)
+#define IVPU_MMU_CD_0_A			BIT(46)
+#define IVPU_MMU_CD_0_ASET		BIT(47)
+#define IVPU_MMU_CD_0_ASID		GENMASK_ULL(63, 48)
+
+#define IVPU_MMU_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
+
+#define IVPU_MMU_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
+#define IVPU_MMU_STE_0_S1FMT		GENMASK_ULL(5, 4)
+#define IVPU_MMU_STE_0_S1FMT_LINEAR	0
+#define IVPU_MMU_STE_DWORDS		8
+#define IVPU_MMU_STE_0_CFG_S1_TRANS	5
+#define IVPU_MMU_STE_0_CFG		GENMASK_ULL(3, 1)
+#define IVPU_MMU_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
+#define IVPU_MMU_STE_0_V			BIT(0)
+
+#define IVPU_MMU_STE_1_STRW_NSEL1	0ul
+#define IVPU_MMU_STE_1_CONT		GENMASK_ULL(16, 13)
+#define IVPU_MMU_STE_1_STRW		GENMASK_ULL(31, 30)
+#define IVPU_MMU_STE_1_PRIVCFG		GENMASK_ULL(49, 48)
+#define IVPU_MMU_STE_1_PRIVCFG_UNPRIV	2ul
+#define IVPU_MMU_STE_1_INSTCFG		GENMASK_ULL(51, 50)
+#define IVPU_MMU_STE_1_INSTCFG_DATA	2ul
+#define IVPU_MMU_STE_1_MEV		BIT(19)
+#define IVPU_MMU_STE_1_S1STALLD		BIT(27)
+#define IVPU_MMU_STE_1_S1C_CACHE_NC	0ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WBRA	1ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WT	2ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WB	3ul
+#define IVPU_MMU_STE_1_S1CIR		GENMASK_ULL(3, 2)
+#define IVPU_MMU_STE_1_S1COR		GENMASK_ULL(5, 4)
+#define IVPU_MMU_STE_1_S1CSH		GENMASK_ULL(7, 6)
+#define IVPU_MMU_STE_1_S1DSS		GENMASK_ULL(1, 0)
+#define IVPU_MMU_STE_1_S1DSS_TERMINATE	0x0
+
+#define IVPU_MMU_REG_TIMEOUT_US		(10 * USEC_PER_MSEC)
+#define IVPU_MMU_QUEUE_TIMEOUT_US	(100 * USEC_PER_MSEC)
+
+#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT)))
+
+static char *ivpu_mmu_event_to_str(u32 cmd)
+{
+	switch (cmd) {
+	case IVPU_MMU_EVT_F_UUT:
+		return "Unsupported Upstream Transaction";
+	case IVPU_MMU_EVT_C_BAD_STREAMID:
+		return "Transaction StreamID out of range";
+	case IVPU_MMU_EVT_F_STE_FETCH:
+		return "Fetch of STE caused external abort";
+	case IVPU_MMU_EVT_C_BAD_STE:
+		return "Used STE invalid";
+	case IVPU_MMU_EVT_F_BAD_ATS_TREQ:
+		return "Address Request disallowed for a StreamID";
+	case IVPU_MMU_EVT_F_STREAM_DISABLED:
+		return "Transaction marks non-substream disabled";
+	case IVPU_MMU_EVT_F_TRANSL_FORBIDDEN:
+		return "MMU bypass is disallowed for this StreamID";
+	case IVPU_MMU_EVT_C_BAD_SUBSTREAMID:
+		return "Invalid StreamID";
+	case IVPU_MMU_EVT_F_CD_FETCH:
+		return "Fetch of CD caused external abort";
+	case IVPU_MMU_EVT_C_BAD_CD:
+		return "Fetched CD invalid";
+	case IVPU_MMU_EVT_F_WALK_EABT:
+		return " An external abort occurred fetching a TLB";
+	case IVPU_MMU_EVT_F_TRANSLATION:
+		return "Translation fault";
+	case IVPU_MMU_EVT_F_ADDR_SIZE:
+		return " Output address caused address size fault";
+	case IVPU_MMU_EVT_F_ACCESS:
+		return "Access flag fault";
+	case IVPU_MMU_EVT_F_PERMISSION:
+		return "Permission fault occurred on page access";
+	case IVPU_MMU_EVT_F_TLB_CONFLICT:
+		return "A TLB conflict";
+	case IVPU_MMU_EVT_F_CFG_CONFLICT:
+		return "A configuration cache conflict";
+	case IVPU_MMU_EVT_E_PAGE_REQUEST:
+		return "Page request hint from a client device";
+	case IVPU_MMU_EVT_F_VMS_FETCH:
+		return "Fetch of VMS caused external abort";
+	default:
+		return "Unknown CMDQ command";
+	}
+}
+
+static void ivpu_mmu_config_check(struct ivpu_device *vdev)
+{
+	u32 val_ref;
+	u32 val;
+
+	if (ivpu_is_simics(vdev))
+		val_ref = IVPU_MMU_IDR0_REF_SIMICS;
+	else
+		val_ref = IVPU_MMU_IDR0_REF;
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0);
+	if (val != val_ref)
+		ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref);
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1);
+	if (val != IVPU_MMU_IDR1_REF)
+		ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF);
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3);
+	if (val != IVPU_MMU_IDR3_REF)
+		ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF);
+
+	if (ivpu_is_simics(vdev))
+		val_ref = IVPU_MMU_IDR5_REF_SIMICS;
+	else if (ivpu_is_fpga(vdev))
+		val_ref = IVPU_MMU_IDR5_REF_FPGA;
+	else
+		val_ref = IVPU_MMU_IDR5_REF;
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5);
+	if (val != val_ref)
+		ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref);
+}
+
+static int ivpu_mmu_cdtab_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	size_t size = IVPU_MMU_CDTAB_ENT_COUNT * IVPU_MMU_CDTAB_ENT_SIZE;
+
+	cdtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &cdtab->dma, GFP_KERNEL);
+	if (!cdtab->base)
+		return -ENOMEM;
+
+	ivpu_dbg(vdev, MMU, "CDTAB alloc: dma=%pad size=%zu\n", &cdtab->dma, size);
+
+	return 0;
+}
+
+static int ivpu_mmu_strtab_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_strtab *strtab = &mmu->strtab;
+	size_t size = IVPU_MMU_STRTAB_ENT_COUNT * IVPU_MMU_STRTAB_ENT_SIZE;
+
+	strtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &strtab->dma, GFP_KERNEL);
+	if (!strtab->base)
+		return -ENOMEM;
+
+	strtab->base_cfg = IVPU_MMU_STRTAB_CFG;
+	strtab->dma_q = IVPU_MMU_STRTAB_BASE_RA;
+	strtab->dma_q |= strtab->dma & IVPU_MMU_STRTAB_BASE_ADDR_MASK;
+
+	ivpu_dbg(vdev, MMU, "STRTAB alloc: dma=%pad dma_q=%pad size=%zu\n",
+		 &strtab->dma, &strtab->dma_q, size);
+
+	return 0;
+}
+
+static int ivpu_mmu_cmdq_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_queue *q = &mmu->cmdq;
+
+	q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_CMDQ_SIZE, &q->dma, GFP_KERNEL);
+	if (!q->base)
+		return -ENOMEM;
+
+	q->dma_q = IVPU_MMU_Q_BASE_RWA;
+	q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK;
+	q->dma_q |= IVPU_MMU_Q_COUNT_LOG2;
+
+	ivpu_dbg(vdev, MMU, "CMDQ alloc: dma=%pad dma_q=%pad size=%u\n",
+		 &q->dma, &q->dma_q, IVPU_MMU_CMDQ_SIZE);
+
+	return 0;
+}
+
+static int ivpu_mmu_evtq_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_queue *q = &mmu->evtq;
+
+	q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_EVTQ_SIZE, &q->dma, GFP_KERNEL);
+	if (!q->base)
+		return -ENOMEM;
+
+	q->dma_q = IVPU_MMU_Q_BASE_RWA;
+	q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK;
+	q->dma_q |= IVPU_MMU_Q_COUNT_LOG2;
+
+	ivpu_dbg(vdev, MMU, "EVTQ alloc: dma=%pad dma_q=%pad size=%u\n",
+		 &q->dma, &q->dma_q, IVPU_MMU_EVTQ_SIZE);
+
+	return 0;
+}
+
+static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_mmu_cdtab_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate cdtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_strtab_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_cmdq_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate cmdq: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_evtq_alloc(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to allocate evtq: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val)
+{
+	u32 reg_ack = reg + 4; /* ACK register is 4B after base register */
+	u32 val_ack;
+	int ret;
+
+	REGV_WR32(reg, val);
+
+	ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Failed to write register 0x%x\n", reg);
+
+	return ret;
+}
+
+static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev)
+{
+	u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN;
+	int ret;
+
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0);
+	if (ret)
+		return ret;
+
+	return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl);
+}
+
+static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+
+	return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons),
+			 IVPU_MMU_QUEUE_TIMEOUT_US);
+}
+
+static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
+{
+	struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
+	u64 *queue_buffer = q->base;
+	int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+
+	if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+		ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
+		return -EBUSY;
+	}
+
+	queue_buffer[idx] = data0;
+	queue_buffer[idx + 1] = data1;
+	q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+
+	ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
+
+	return 0;
+}
+
+static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
+	u64 val;
+	int ret;
+
+	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+
+	ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
+	if (ret)
+		return ret;
+
+	clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod);
+
+	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
+	if (ret)
+		ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_cmdq_write_cfgi_all(struct ivpu_device *vdev)
+{
+	u64 data0 = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_CFGI_ALL);
+	u64 data1 = FIELD_PREP(IVPU_MMU_CMD_CFGI_1_RANGE, 0x1f);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "CFGI_ALL", data0, data1);
+}
+
+static int ivpu_mmu_cmdq_write_tlbi_nh_asid(struct ivpu_device *vdev, u16 ssid)
+{
+	u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NH_ASID) |
+		  FIELD_PREP(IVPU_MMU_CMD_TLBI_0_ASID, ssid);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NH_ASID", val, 0);
+}
+
+static int ivpu_mmu_cmdq_write_tlbi_nsnh_all(struct ivpu_device *vdev)
+{
+	u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NSNH_ALL);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NSNH_ALL", val, 0);
+}
+
+static int ivpu_mmu_reset(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	u32 val;
+	int ret;
+
+	memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE);
+	clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE);
+	mmu->cmdq.prod = 0;
+	mmu->cmdq.cons = 0;
+
+	memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE);
+	clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE);
+	mmu->evtq.prod = 0;
+	mmu->evtq.cons = 0;
+
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0);
+	if (ret)
+		return ret;
+
+	val = FIELD_PREP(IVPU_MMU_CR1_TABLE_SH, IVPU_MMU_SH_ISH) |
+	      FIELD_PREP(IVPU_MMU_CR1_TABLE_OC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_TABLE_IC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB);
+	REGV_WR32(MTL_VPU_HOST_MMU_CR1, val);
+
+	REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg);
+
+	REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0);
+
+	val = IVPU_MMU_CR0_CMDQEN;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+	if (ret)
+		return ret;
+
+	REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0);
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0);
+
+	val |= IVPU_MMU_CR0_EVTQEN;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	val |= IVPU_MMU_CR0_ATSCHK;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_irqs_setup(vdev);
+	if (ret)
+		return ret;
+
+	val |= IVPU_MMU_CR0_SMMUEN;
+	return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+}
+
+static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_strtab *strtab = &mmu->strtab;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	u64 *entry = strtab->base + (sid * IVPU_MMU_STRTAB_ENT_SIZE);
+	u64 str[2];
+
+	str[0] = FIELD_PREP(IVPU_MMU_STE_0_CFG, IVPU_MMU_STE_0_CFG_S1_TRANS) |
+		 FIELD_PREP(IVPU_MMU_STE_0_S1CDMAX, IVPU_MMU_CDTAB_ENT_COUNT_LOG2) |
+		 FIELD_PREP(IVPU_MMU_STE_0_S1FMT, IVPU_MMU_STE_0_S1FMT_LINEAR) |
+		 IVPU_MMU_STE_0_V |
+		 (cdtab->dma & IVPU_MMU_STE_0_S1CTXPTR_MASK);
+
+	str[1] = FIELD_PREP(IVPU_MMU_STE_1_S1DSS, IVPU_MMU_STE_1_S1DSS_TERMINATE) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1CIR, IVPU_MMU_STE_1_S1C_CACHE_NC) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1COR, IVPU_MMU_STE_1_S1C_CACHE_NC) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1CSH, IVPU_MMU_SH_NSH) |
+		 FIELD_PREP(IVPU_MMU_STE_1_PRIVCFG, IVPU_MMU_STE_1_PRIVCFG_UNPRIV) |
+		 FIELD_PREP(IVPU_MMU_STE_1_INSTCFG, IVPU_MMU_STE_1_INSTCFG_DATA) |
+		 FIELD_PREP(IVPU_MMU_STE_1_STRW, IVPU_MMU_STE_1_STRW_NSEL1) |
+		 FIELD_PREP(IVPU_MMU_STE_1_CONT, IVPU_MMU_STRTAB_CFG_LOG2SIZE) |
+		 IVPU_MMU_STE_1_MEV |
+		 IVPU_MMU_STE_1_S1STALLD;
+
+	WRITE_ONCE(entry[1], str[1]);
+	WRITE_ONCE(entry[0], str[0]);
+
+	clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE);
+
+	ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]);
+}
+
+static int ivpu_mmu_strtab_init(struct ivpu_device *vdev)
+{
+	ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID0);
+	ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID3);
+
+	return 0;
+}
+
+int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	ret = mutex_lock_interruptible(&mmu->lock);
+	if (ret)
+		return ret;
+
+	if (!mmu->on) {
+		ret = 0;
+		goto unlock;
+	}
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid);
+	if (ret)
+		goto unlock;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+unlock:
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	u64 *entry;
+	u64 cd[4];
+	int ret;
+
+	if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
+		return -EINVAL;
+
+	entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
+
+	if (cd_dma != 0) {
+		cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) |
+			FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
+			IVPU_MMU_CD_0_TCR_EPD1 |
+			IVPU_MMU_CD_0_AA64 |
+			IVPU_MMU_CD_0_R |
+			IVPU_MMU_CD_0_ASET |
+			IVPU_MMU_CD_0_V;
+		cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
+		cd[2] = 0;
+		cd[3] = 0x0000000000007444;
+
+		/* For global context generate memory fault on VPU */
+		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+			cd[0] |= IVPU_MMU_CD_0_A;
+	} else {
+		memset(cd, 0, sizeof(cd));
+	}
+
+	WRITE_ONCE(entry[1], cd[1]);
+	WRITE_ONCE(entry[2], cd[2]);
+	WRITE_ONCE(entry[3], cd[3]);
+	WRITE_ONCE(entry[0], cd[0]);
+
+	clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
+
+	ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+		 cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
+
+	ret = mutex_lock_interruptible(&mmu->lock);
+	if (ret)
+		return ret;
+
+	if (!mmu->on) {
+		ret = 0;
+		goto unlock;
+	}
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		goto unlock;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+unlock:
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma);
+	if (ret)
+		ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma)
+{
+	int ret;
+
+	if (ssid == 0) {
+		ivpu_err(vdev, "Invalid SSID: %u\n", ssid);
+		return -EINVAL;
+	}
+
+	ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma);
+	if (ret)
+		ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret);
+
+	return ret;
+}
+
+int ivpu_mmu_init(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	ivpu_dbg(vdev, MMU, "Init..\n");
+
+	drmm_mutex_init(&vdev->drm, &mmu->lock);
+	ivpu_mmu_config_check(vdev);
+
+	ret = ivpu_mmu_structs_alloc(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_strtab_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_cd_add_gbl(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, MMU, "Init done\n");
+
+	return 0;
+}
+
+int ivpu_mmu_enable(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	mutex_lock(&mmu->lock);
+
+	mmu->on = true;
+
+	ret = ivpu_mmu_reset(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to reset MMU: %d\n", ret);
+		goto err;
+	}
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		goto err;
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev);
+	if (ret)
+		goto err;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+	if (ret)
+		goto err;
+
+	mutex_unlock(&mmu->lock);
+
+	return 0;
+err:
+	mmu->on = false;
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+void ivpu_mmu_disable(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+
+	mutex_lock(&mmu->lock);
+	mmu->on = false;
+	mutex_unlock(&mmu->lock);
+}
+
+static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event)
+{
+	u32 ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
+	u32 op = FIELD_GET(IVPU_MMU_EVT_OP_MASK, event[0]);
+	u64 fetch_addr = ((u64)event[7]) << 32 | event[6];
+	u64 in_addr = ((u64)event[5]) << 32 | event[4];
+	u32 sid = event[1];
+
+	ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n",
+		 op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr);
+}
+
+static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *evtq = &vdev->mmu->evtq;
+	u32 idx = IVPU_MMU_Q_IDX(evtq->cons);
+	u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
+
+	evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC);
+	if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+		return NULL;
+
+	clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
+
+	evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons);
+
+	return evt;
+}
+
+void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
+{
+	bool schedule_recovery = false;
+	u32 *event;
+	u32 ssid;
+
+	ivpu_dbg(vdev, IRQ, "MMU event queue\n");
+
+	while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
+		ivpu_mmu_dump_event(vdev, event);
+
+		ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
+		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+			schedule_recovery = true;
+		else
+			ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+	}
+
+	if (schedule_recovery)
+		ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
+{
+	u32 gerror_val, gerrorn_val, active;
+
+	ivpu_dbg(vdev, IRQ, "MMU error\n");
+
+	gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR);
+	gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN);
+
+	active = gerror_val ^ gerrorn_val;
+	if (!(active & IVPU_MMU_GERROR_ERR_MASK))
+		return;
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active))
+		ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active))
+		ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active))
+		ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n");
+
+	REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val);
+}
+
+int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
+{
+	return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma);
+}
+
+void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid)
+{
+	ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */
+}
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
new file mode 100644
index 000000000000..cb551126806b
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_MMU_H__
+#define __IVPU_MMU_H__
+
+struct ivpu_device;
+
+struct ivpu_mmu_cdtab {
+	void *base;
+	dma_addr_t dma;
+};
+
+struct ivpu_mmu_strtab {
+	void *base;
+	dma_addr_t dma;
+	u64 dma_q;
+	u32 base_cfg;
+};
+
+struct ivpu_mmu_queue {
+	void *base;
+	dma_addr_t dma;
+	u64 dma_q;
+	u32 prod;
+	u32 cons;
+};
+
+struct ivpu_mmu_info {
+	struct mutex lock; /* Protects cdtab, strtab, cmdq, on */
+	struct ivpu_mmu_cdtab cdtab;
+	struct ivpu_mmu_strtab strtab;
+	struct ivpu_mmu_queue cmdq;
+	struct ivpu_mmu_queue evtq;
+	bool on;
+};
+
+int ivpu_mmu_init(struct ivpu_device *vdev);
+void ivpu_mmu_disable(struct ivpu_device *vdev);
+int ivpu_mmu_enable(struct ivpu_device *vdev);
+int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
+void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid);
+int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
+
+void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
+void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
+
+#endif /* __IVPU_MMU_H__ */
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
new file mode 100644
index 000000000000..8ce9b12ac356
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/highmem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+#define IVPU_MMU_PGD_INDEX_MASK          GENMASK(38, 30)
+#define IVPU_MMU_PMD_INDEX_MASK          GENMASK(29, 21)
+#define IVPU_MMU_PTE_INDEX_MASK          GENMASK(20, 12)
+#define IVPU_MMU_ENTRY_FLAGS_MASK        GENMASK(11, 0)
+#define IVPU_MMU_ENTRY_FLAG_NG           BIT(11)
+#define IVPU_MMU_ENTRY_FLAG_AF           BIT(10)
+#define IVPU_MMU_ENTRY_FLAG_USER         BIT(6)
+#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
+#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE    BIT(1)
+#define IVPU_MMU_ENTRY_FLAG_VALID        BIT(0)
+
+#define IVPU_MMU_PAGE_SIZE    SZ_4K
+#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
+#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
+#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
+
+#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
+#define IVPU_MMU_ENTRY_VALID   (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
+#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
+#define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
+				IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
+
+static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+	dma_addr_t pgd_dma;
+	u64 *pgd;
+
+	pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
+	if (!pgd)
+		return -ENOMEM;
+
+	pgtable->pgd = pgd;
+	pgtable->pgd_dma = pgd_dma;
+
+	return 0;
+}
+
+static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+	int pgd_index, pmd_index;
+
+	for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
+		u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
+		u64 *pmd = pgtable->pgd_entries[pgd_index];
+
+		if (!pmd_entries)
+			continue;
+
+		for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
+			if (pmd_entries[pmd_index])
+				dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
+					    pmd_entries[pmd_index],
+					    pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+		}
+
+		kfree(pmd_entries);
+		dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
+			    pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+	}
+
+	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
+		    pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+}
+
+static u64*
+ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
+{
+	u64 **pmd_entries;
+	dma_addr_t pmd_dma;
+	u64 *pmd;
+
+	if (pgtable->pgd_entries[pgd_index])
+		return pgtable->pgd_entries[pgd_index];
+
+	pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+	if (!pmd)
+		return NULL;
+
+	pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+	if (!pmd_entries)
+		goto err_free_pgd;
+
+	pgtable->pgd_entries[pgd_index] = pmd;
+	pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
+	pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
+
+	return pmd;
+
+err_free_pgd:
+	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
+	return NULL;
+}
+
+static u64*
+ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
+		    int pgd_index, int pmd_index)
+{
+	dma_addr_t pte_dma;
+	u64 *pte;
+
+	if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
+		return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
+
+	pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+	if (!pte)
+		return NULL;
+
+	pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
+	pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
+
+	return pte;
+}
+
+static int
+ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			  u64 vpu_addr, dma_addr_t dma_addr, int prot)
+{
+	u64 *pte;
+	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+	/* Allocate PMD - second level page table if needed */
+	if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
+		return -ENOMEM;
+
+	/* Allocate PTE - third level page table if needed */
+	pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
+	if (!pte)
+		return -ENOMEM;
+
+	/* Update PTE - third level page table with DMA address */
+	pte[pte_index] = dma_addr | prot;
+
+	return 0;
+}
+
+static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
+{
+	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+	/* Update PTE with dummy physical address and clear flags */
+	ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
+}
+
+static void
+ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
+{
+	u64 end_addr = vpu_addr + size;
+	u64 *pgd = ctx->pgtable.pgd;
+
+	/* Align to PMD entry (2 MB) */
+	vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
+
+	while (vpu_addr < end_addr) {
+		int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+		u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
+		u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
+
+		while (vpu_addr < end_addr && vpu_addr < pmd_end) {
+			int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+			u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
+
+			clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
+			vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+		}
+		clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
+	}
+	clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
+}
+
+static int
+ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			   u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
+{
+	while (size) {
+		int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+
+		if (ret)
+			return ret;
+
+		vpu_addr += IVPU_MMU_PAGE_SIZE;
+		dma_addr += IVPU_MMU_PAGE_SIZE;
+		size -= IVPU_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
+{
+	while (size) {
+		ivpu_mmu_context_unmap_page(ctx, vpu_addr);
+		vpu_addr += IVPU_MMU_PAGE_SIZE;
+		size -= IVPU_MMU_PAGE_SIZE;
+	}
+}
+
+int
+ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			 u64 vpu_addr, struct sg_table *sgt,  bool llc_coherent)
+{
+	struct scatterlist *sg;
+	int prot;
+	int ret;
+	u64 i;
+
+	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
+		return -EINVAL;
+	/*
+	 * VPU is only 32 bit, but DMA engine is 38 bit
+	 * Ranges < 2 GB are reserved for VPU internal registers
+	 * Limit range to 8 GB
+	 */
+	if (vpu_addr < SZ_2G || vpu_addr > SZ_8G)
+		return -EINVAL;
+
+	prot = IVPU_MMU_ENTRY_MAPPED;
+	if (llc_coherent)
+		prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
+
+	mutex_lock(&ctx->lock);
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		u64 dma_addr = sg_dma_address(sg) - sg->offset;
+		size_t size = sg_dma_len(sg) + sg->offset;
+
+		ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
+		if (ret) {
+			ivpu_err(vdev, "Failed to map context pages\n");
+			mutex_unlock(&ctx->lock);
+			return ret;
+		}
+		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
+		vpu_addr += size;
+	}
+
+	mutex_unlock(&ctx->lock);
+
+	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
+	if (ret)
+		ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+	return ret;
+}
+
+void
+ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			   u64 vpu_addr, struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int ret;
+	u64 i;
+
+	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
+		ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+
+	mutex_lock(&ctx->lock);
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		size_t size = sg_dma_len(sg) + sg->offset;
+
+		ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
+		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
+		vpu_addr += size;
+	}
+
+	mutex_unlock(&ctx->lock);
+
+	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
+	if (ret)
+		ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+}
+
+int
+ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
+				    const struct ivpu_addr_range *range,
+				    u64 size, struct drm_mm_node *node)
+{
+	lockdep_assert_held(&ctx->lock);
+
+	return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
+					  0, range->start, range->end, DRM_MM_INSERT_BEST);
+}
+
+void
+ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+{
+	lockdep_assert_held(&ctx->lock);
+
+	drm_mm_remove_node(node);
+}
+
+static int
+ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
+{
+	u64 start, end;
+	int ret;
+
+	mutex_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->bo_list);
+
+	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
+	if (ret)
+		return ret;
+
+	if (!context_id) {
+		start = vdev->hw->ranges.global_low.start;
+		end = vdev->hw->ranges.global_high.end;
+	} else {
+		start = vdev->hw->ranges.user_low.start;
+		end = vdev->hw->ranges.user_high.end;
+	}
+
+	drm_mm_init(&ctx->mm, start, end - start);
+	ctx->id = context_id;
+
+	return 0;
+}
+
+static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+{
+	drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
+
+	mutex_destroy(&ctx->lock);
+	ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
+	drm_mm_takedown(&ctx->mm);
+}
+
+int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
+}
+
+void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
+}
+
+void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
+{
+	struct ivpu_file_priv *file_priv;
+
+	xa_lock(&vdev->context_xa);
+
+	file_priv = xa_load(&vdev->context_xa, ssid);
+	if (file_priv)
+		file_priv->has_mmu_faults = true;
+
+	xa_unlock(&vdev->context_xa);
+}
+
+int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
+{
+	int ret;
+
+	drm_WARN_ON(&vdev->drm, !ctx_id);
+
+	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
+	if (ret) {
+		ivpu_err(vdev, "Failed to set page table: %d\n", ret);
+		goto err_context_fini;
+	}
+
+	return 0;
+
+err_context_fini:
+	ivpu_mmu_context_fini(vdev, ctx);
+	return ret;
+}
+
+void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+{
+	drm_WARN_ON(&vdev->drm, !ctx->id);
+
+	ivpu_mmu_clear_pgtable(vdev, ctx->id);
+	ivpu_mmu_context_fini(vdev, ctx);
+}
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
new file mode 100644
index 000000000000..ddf11b95023a
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_MMU_CONTEXT_H__
+#define __IVPU_MMU_CONTEXT_H__
+
+#include <drm/drm_mm.h>
+
+struct ivpu_device;
+struct ivpu_file_priv;
+struct ivpu_addr_range;
+
+#define IVPU_MMU_PGTABLE_ENTRIES	512
+
+struct ivpu_mmu_pgtable {
+	u64             **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES];
+	u64		*pgd_entries[IVPU_MMU_PGTABLE_ENTRIES];
+	u64		*pgd;
+	dma_addr_t	pgd_dma;
+};
+
+struct ivpu_mmu_context {
+	struct mutex lock; /* protects: mm, pgtable, bo_list */
+	struct drm_mm mm;
+	struct ivpu_mmu_pgtable pgtable;
+	struct list_head bo_list;
+	u32 id;
+};
+
+int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
+
+int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
+void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
+void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
+
+int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
+					const struct ivpu_addr_range *range,
+					u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
+					 struct drm_mm_node *node);
+
+int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			     u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
+void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+				u64 vpu_addr, struct sg_table *sgt);
+
+#endif /* __IVPU_MMU_CONTEXT_H__ */
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
new file mode 100644
index 000000000000..553bcbd787b3
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/reboot.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_fw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_mmu.h"
+#include "ivpu_pm.h"
+
+static bool ivpu_disable_recovery;
+module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
+MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+
+#define PM_RESCHEDULE_LIMIT     5
+
+static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+
+	ivpu_cmdq_reset_all_contexts(vdev);
+	ivpu_ipc_reset(vdev);
+	ivpu_fw_load(vdev);
+	fw->entry_point = fw->cold_boot_entry_point;
+}
+
+static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	struct vpu_boot_params *bp = fw->mem->kvaddr;
+
+	if (!bp->save_restore_ret_address) {
+		ivpu_pm_prepare_cold_boot(vdev);
+		return;
+	}
+
+	ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address);
+	fw->entry_point = bp->save_restore_ret_address;
+}
+
+static int ivpu_suspend(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_shutdown(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int ivpu_resume(struct ivpu_device *vdev)
+{
+	int ret;
+
+retry:
+	ret = ivpu_hw_power_up(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
+		ivpu_hw_power_down(vdev);
+		return ret;
+	}
+
+	ret = ivpu_boot(vdev);
+	if (ret) {
+		ivpu_mmu_disable(vdev);
+		ivpu_hw_power_down(vdev);
+		if (!ivpu_fw_is_cold_boot(vdev)) {
+			ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
+			ivpu_pm_prepare_cold_boot(vdev);
+			goto retry;
+		} else {
+			ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
+		}
+	}
+
+	return ret;
+}
+
+static void ivpu_pm_recovery_work(struct work_struct *work)
+{
+	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
+	struct ivpu_device *vdev =  pm->vdev;
+	char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
+	int ret;
+
+	ret = pci_reset_function(to_pci_dev(vdev->drm.dev));
+	if (ret)
+		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+
+	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
+}
+
+void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
+{
+	struct ivpu_pm_info *pm = vdev->pm;
+
+	if (ivpu_disable_recovery) {
+		ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
+		return;
+	}
+
+	if (ivpu_is_fpga(vdev)) {
+		ivpu_err(vdev, "Recovery not available on FPGA\n");
+		return;
+	}
+
+	/* Schedule recovery if it's not in progress */
+	if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
+		ivpu_hw_irq_disable(vdev);
+		queue_work(system_long_wq, &pm->recovery_work);
+	}
+}
+
+int ivpu_pm_suspend_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Suspend..\n");
+
+	ret = ivpu_suspend(vdev);
+	if (ret && vdev->pm->suspend_reschedule_counter) {
+		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
+			 vdev->pm->suspend_reschedule_counter);
+		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
+		vdev->pm->suspend_reschedule_counter--;
+		return -EBUSY;
+	} else if (!vdev->pm->suspend_reschedule_counter) {
+		ivpu_warn(vdev, "Failed to enter idle, force suspend\n");
+		ivpu_pm_prepare_cold_boot(vdev);
+	} else {
+		ivpu_pm_prepare_warm_boot(vdev);
+	}
+
+	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	pci_save_state(to_pci_dev(dev));
+	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
+
+	ivpu_dbg(vdev, PM, "Suspend done.\n");
+
+	return ret;
+}
+
+int ivpu_pm_resume_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Resume..\n");
+
+	pci_set_power_state(to_pci_dev(dev), PCI_D0);
+	pci_restore_state(to_pci_dev(dev));
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to resume: %d\n", ret);
+
+	ivpu_dbg(vdev, PM, "Resume done.\n");
+
+	return ret;
+}
+
+int ivpu_pm_runtime_suspend_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
+
+	if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
+		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
+			 vdev->pm->suspend_reschedule_counter);
+		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
+		vdev->pm->suspend_reschedule_counter--;
+		return -EAGAIN;
+	}
+
+	ret = ivpu_suspend(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
+
+	if (!vdev->pm->suspend_reschedule_counter) {
+		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
+		ivpu_pm_prepare_cold_boot(vdev);
+	} else {
+		ivpu_pm_prepare_warm_boot(vdev);
+	}
+
+	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
+
+	return 0;
+}
+
+int ivpu_pm_runtime_resume_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Runtime resume..\n");
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
+
+	ivpu_dbg(vdev, PM, "Runtime resume done.\n");
+
+	return ret;
+}
+
+int ivpu_rpm_get(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_dbg(vdev, RPM, "rpm_get count %d\n", atomic_read(&vdev->drm.dev->power.usage_count));
+
+	ret = pm_runtime_resume_and_get(vdev->drm.dev);
+	if (!drm_WARN_ON(&vdev->drm, ret < 0))
+		vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	return ret;
+}
+
+void ivpu_rpm_put(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, RPM, "rpm_put count %d\n", atomic_read(&vdev->drm.dev->power.usage_count));
+
+	pm_runtime_mark_last_busy(vdev->drm.dev);
+	pm_runtime_put_autosuspend(vdev->drm.dev);
+}
+
+void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+
+	pm_runtime_get_sync(vdev->drm.dev);
+
+	ivpu_dbg(vdev, PM, "Pre-reset..\n");
+	atomic_set(&vdev->pm->in_reset, 1);
+	ivpu_shutdown(vdev);
+	ivpu_pm_prepare_cold_boot(vdev);
+	ivpu_jobs_abort_all(vdev);
+	ivpu_dbg(vdev, PM, "Pre-reset done.\n");
+}
+
+void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Post-reset..\n");
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
+	atomic_set(&vdev->pm->in_reset, 0);
+	ivpu_dbg(vdev, PM, "Post-reset done.\n");
+
+	pm_runtime_put_autosuspend(vdev->drm.dev);
+}
+
+int ivpu_pm_init(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+	struct ivpu_pm_info *pm = vdev->pm;
+
+	pm->vdev = vdev;
+	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	atomic_set(&pm->in_reset, 0);
+	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+
+	pm_runtime_use_autosuspend(dev);
+
+	if (ivpu_disable_recovery)
+		pm_runtime_set_autosuspend_delay(dev, -1);
+	else if (ivpu_is_silicon(vdev))
+		pm_runtime_set_autosuspend_delay(dev, 100);
+	else
+		pm_runtime_set_autosuspend_delay(dev, 60000);
+
+	return 0;
+}
+
+void ivpu_pm_enable(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	ivpu_dbg(vdev, RPM, "Enable RPM count %d\n", atomic_read(&dev->power.usage_count));
+}
+
+void ivpu_pm_disable(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+
+	ivpu_dbg(vdev, RPM, "Disable RPM count %d\n", atomic_read(&dev->power.usage_count));
+
+	pm_runtime_get_noresume(vdev->drm.dev);
+	pm_runtime_forbid(vdev->drm.dev);
+}
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
new file mode 100644
index 000000000000..dc1b3758e13f
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_PM_H__
+#define __IVPU_PM_H__
+
+#include <linux/types.h>
+
+struct ivpu_device;
+
+struct ivpu_pm_info {
+	struct ivpu_device *vdev;
+	struct work_struct recovery_work;
+	atomic_t in_reset;
+	bool is_warmboot;
+	u32 suspend_reschedule_counter;
+};
+
+int ivpu_pm_init(struct ivpu_device *vdev);
+void ivpu_pm_enable(struct ivpu_device *vdev);
+void ivpu_pm_disable(struct ivpu_device *vdev);
+
+int ivpu_pm_suspend_cb(struct device *dev);
+int ivpu_pm_resume_cb(struct device *dev);
+int ivpu_pm_runtime_suspend_cb(struct device *dev);
+int ivpu_pm_runtime_resume_cb(struct device *dev);
+
+void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev);
+void ivpu_pm_reset_done_cb(struct pci_dev *pdev);
+
+int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
+void ivpu_rpm_put(struct ivpu_device *vdev);
+
+void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+
+#endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
new file mode 100644
index 000000000000..6b71be92ba65
--- /dev/null
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -0,0 +1,349 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef VPU_BOOT_API_H
+#define VPU_BOOT_API_H
+
+/*
+ * =========== FW API version information beginning ================
+ *  The bellow values will be used to construct the version info this way:
+ *  fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
+ *  VPU_BOOT_API_VER_MINOR;
+ *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ */
+
+/*
+ * Major version changes that break backward compatibility.
+ * Major version must start from 1 and can only be incremented.
+ */
+#define VPU_BOOT_API_VER_MAJOR 3
+
+/*
+ * Minor version changes when API backward compatibility is preserved.
+ * Resets to 0 if Major version is incremented.
+ */
+#define VPU_BOOT_API_VER_MINOR 12
+
+/*
+ * API header changed (field names, documentation, formatting) but API itself has not been changed
+ */
+#define VPU_BOOT_API_VER_PATCH 2
+
+/*
+ * Index in the API version table
+ * Must be unique for each API
+ */
+#define VPU_BOOT_API_VER_INDEX 0
+/* ------------ FW API version information end ---------------------*/
+
+#pragma pack(push, 1)
+
+/*
+ * Firmware image header format
+ */
+#define VPU_FW_HEADER_SIZE    4096
+#define VPU_FW_HEADER_VERSION 0x1
+#define VPU_FW_VERSION_SIZE   32
+#define VPU_FW_API_VER_NUM    16
+
+struct vpu_firmware_header {
+	u32 header_version;
+	u32 image_format;
+	u64 image_load_address;
+	u32 image_size;
+	u64 entry_point;
+	u8 vpu_version[VPU_FW_VERSION_SIZE];
+	u32 compression_type;
+	u64 firmware_version_load_address;
+	u32 firmware_version_size;
+	u64 boot_params_load_address;
+	u32 api_version[VPU_FW_API_VER_NUM];
+	/* Size of memory require for firmware execution */
+	u32 runtime_size;
+	u32 shave_nn_fw_size;
+};
+
+/*
+ * Firmware boot parameters format
+ */
+
+#define VPU_BOOT_PLL_COUNT     3
+#define VPU_BOOT_PLL_OUT_COUNT 4
+
+/** Values for boot_type field */
+#define VPU_BOOT_TYPE_COLDBOOT 0
+#define VPU_BOOT_TYPE_WARMBOOT 1
+
+/** Value for magic filed */
+#define VPU_BOOT_PARAMS_MAGIC 0x10000
+
+/** VPU scheduling mode. By default, OS scheduling is used. */
+#define VPU_SCHEDULING_MODE_OS 0
+#define VPU_SCHEDULING_MODE_HW 1
+
+enum VPU_BOOT_L2_CACHE_CFG_TYPE {
+	VPU_BOOT_L2_CACHE_CFG_UPA = 0,
+	VPU_BOOT_L2_CACHE_CFG_NN = 1,
+	VPU_BOOT_L2_CACHE_CFG_NUM = 2
+};
+
+/**
+ * Logging destinations.
+ *
+ * Logging output can be directed to different logging destinations. This enum
+ * defines the list of logging destinations supported by the VPU firmware (NOTE:
+ * a specific VPU FW binary may support only a subset of such output
+ * destinations, depending on the target platform and compile options).
+ */
+enum vpu_trace_destination {
+	VPU_TRACE_DESTINATION_PIPEPRINT = 0x1,
+	VPU_TRACE_DESTINATION_VERBOSE_TRACING = 0x2,
+	VPU_TRACE_DESTINATION_NORTH_PEAK = 0x4,
+};
+
+/*
+ * Processor bit shifts (for loggable HW components).
+ */
+#define VPU_TRACE_PROC_BIT_ARM	     0
+#define VPU_TRACE_PROC_BIT_LRT	     1
+#define VPU_TRACE_PROC_BIT_LNN	     2
+#define VPU_TRACE_PROC_BIT_SHV_0     3
+#define VPU_TRACE_PROC_BIT_SHV_1     4
+#define VPU_TRACE_PROC_BIT_SHV_2     5
+#define VPU_TRACE_PROC_BIT_SHV_3     6
+#define VPU_TRACE_PROC_BIT_SHV_4     7
+#define VPU_TRACE_PROC_BIT_SHV_5     8
+#define VPU_TRACE_PROC_BIT_SHV_6     9
+#define VPU_TRACE_PROC_BIT_SHV_7     10
+#define VPU_TRACE_PROC_BIT_SHV_8     11
+#define VPU_TRACE_PROC_BIT_SHV_9     12
+#define VPU_TRACE_PROC_BIT_SHV_10    13
+#define VPU_TRACE_PROC_BIT_SHV_11    14
+#define VPU_TRACE_PROC_BIT_SHV_12    15
+#define VPU_TRACE_PROC_BIT_SHV_13    16
+#define VPU_TRACE_PROC_BIT_SHV_14    17
+#define VPU_TRACE_PROC_BIT_SHV_15    18
+#define VPU_TRACE_PROC_BIT_ACT_SHV_0 19
+#define VPU_TRACE_PROC_BIT_ACT_SHV_1 20
+#define VPU_TRACE_PROC_BIT_ACT_SHV_2 21
+#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
+#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
+
+/* KMB HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_KMB_LAST  VPU_TRACE_PROC_BIT_SHV_15
+
+struct vpu_boot_l2_cache_config {
+	u8 use;
+	u8 cfg;
+};
+
+struct vpu_warm_boot_section {
+	u32 src;
+	u32 dst;
+	u32 size;
+	u32 core_id;
+	u32 is_clear_op;
+};
+
+struct vpu_boot_params {
+	u32 magic;
+	u32 vpu_id;
+	u32 vpu_count;
+	u32 pad0[5];
+	/* Clock frequencies: 0x20 - 0xFF */
+	u32 frequency;
+	u32 pll[VPU_BOOT_PLL_COUNT][VPU_BOOT_PLL_OUT_COUNT];
+	u32 perf_clk_frequency;
+	u32 pad1[42];
+	/* Memory regions: 0x100 - 0x1FF */
+	u64 ipc_header_area_start;
+	u32 ipc_header_area_size;
+	u64 shared_region_base;
+	u32 shared_region_size;
+	u64 ipc_payload_area_start;
+	u32 ipc_payload_area_size;
+	u64 global_aliased_pio_base;
+	u32 global_aliased_pio_size;
+	u32 autoconfig;
+	struct vpu_boot_l2_cache_config cache_defaults[VPU_BOOT_L2_CACHE_CFG_NUM];
+	u64 global_memory_allocator_base;
+	u32 global_memory_allocator_size;
+	/**
+	 * ShaveNN FW section VPU base address
+	 * On VPU2.7 HW this address must be within 2GB range starting from L2C_PAGE_TABLE base
+	 */
+	u64 shave_nn_fw_base;
+	u64 save_restore_ret_address; /* stores the address of FW's restore entry point */
+	u32 pad2[43];
+	/* IRQ re-direct numbers: 0x200 - 0x2FF */
+	s32 watchdog_irq_mss;
+	s32 watchdog_irq_nce;
+	/* ARM -> VPU doorbell interrupt. ARM is notifying VPU of async command or compute job. */
+	u32 host_to_vpu_irq;
+	/* VPU -> ARM job done interrupt. VPU is notifying ARM of compute job completion. */
+	u32 job_done_irq;
+	/* VPU -> ARM IRQ line to use to request MMU update. */
+	u32 mmu_update_request_irq;
+	/* ARM -> VPU IRQ line to use to notify of MMU update completion. */
+	u32 mmu_update_done_irq;
+	/* ARM -> VPU IRQ line to use to request power level change. */
+	u32 set_power_level_irq;
+	/* VPU -> ARM IRQ line to use to notify of power level change completion. */
+	u32 set_power_level_done_irq;
+	/* VPU -> ARM IRQ line to use to notify of VPU idle state change */
+	u32 set_vpu_idle_update_irq;
+	/* VPU -> ARM IRQ line to use to request counter reset. */
+	u32 metric_query_event_irq;
+	/* ARM -> VPU IRQ line to use to notify of counter reset completion. */
+	u32 metric_query_event_done_irq;
+	/* VPU -> ARM IRQ line to use to notify of preemption completion. */
+	u32 preemption_done_irq;
+	/* Padding. */
+	u32 pad3[52];
+	/* Silicon information: 0x300 - 0x3FF */
+	u32 host_version_id;
+	u32 si_stepping;
+	u64 device_id;
+	u64 feature_exclusion;
+	u64 sku;
+	/** PLL ratio for minimum clock frequency */
+	u32 min_freq_pll_ratio;
+	/** PLL ratio for maximum clock frequency */
+	u32 max_freq_pll_ratio;
+	/**
+	 * Initial log level threshold (messages with log level severity less than
+	 * the threshold will not be logged); applies to every enabled logging
+	 * destination and loggable HW component. See 'mvLog_t' enum for acceptable
+	 * values.
+	 */
+	u32 default_trace_level;
+	u32 boot_type;
+	u64 punit_telemetry_sram_base;
+	u64 punit_telemetry_sram_size;
+	u32 vpu_telemetry_enable;
+	u64 crit_tracing_buff_addr;
+	u32 crit_tracing_buff_size;
+	u64 verbose_tracing_buff_addr;
+	u32 verbose_tracing_buff_size;
+	u64 verbose_tracing_sw_component_mask; /* TO BE REMOVED */
+	/**
+	 * Mask of destinations to which logging messages are delivered; bitwise OR
+	 * of values defined in vpu_trace_destination enum.
+	 */
+	u32 trace_destination_mask;
+	/**
+	 * Mask of hardware components for which logging is enabled; bitwise OR of
+	 * bits defined by the VPU_TRACE_PROC_BIT_* macros.
+	 */
+	u64 trace_hw_component_mask;
+	/** Mask of trace message formats supported by the driver */
+	u64 tracing_buff_message_format_mask;
+	u64 trace_reserved_1[2];
+	/**
+	 * Period at which the VPU reads the temp sensor values into MMIO, on
+	 * platforms where that is necessary (in ms). 0 to disable reads.
+	 */
+	u32 temp_sensor_period_ms;
+	/** PLL ratio for efficient clock frequency */
+	u32 pn_freq_pll_ratio;
+	u32 pad4[28];
+	/* Warm boot information: 0x400 - 0x43F */
+	u32 warm_boot_sections_count;
+	u32 warm_boot_start_address_reference;
+	u32 warm_boot_section_info_address_offset;
+	u32 pad5[13];
+	/* Power States transitions timestamps: 0x440 - 0x46F*/
+	struct {
+		/* VPU_IDLE -> VPU_ACTIVE transition initiated timestamp */
+		u64 vpu_active_state_requested;
+		/* VPU_IDLE -> VPU_ACTIVE transition completed timestamp */
+		u64 vpu_active_state_achieved;
+		/* VPU_ACTIVE -> VPU_IDLE transition initiated timestamp */
+		u64 vpu_idle_state_requested;
+		/* VPU_ACTIVE -> VPU_IDLE transition completed timestamp */
+		u64 vpu_idle_state_achieved;
+		/* VPU_IDLE -> VPU_STANDBY transition initiated timestamp */
+		u64 vpu_standby_state_requested;
+		/* VPU_IDLE -> VPU_STANDBY transition completed timestamp */
+		u64 vpu_standby_state_achieved;
+	} power_states_timestamps;
+	/* VPU scheduling mode. Values defined by VPU_SCHEDULING_MODE_* macros. */
+	u32 vpu_scheduling_mode;
+	/* Present call period in milliseconds. */
+	u32 vpu_focus_present_timer_ms;
+	/* Unused/reserved: 0x478 - 0xFFF */
+	u32 pad6[738];
+};
+
+/*
+ * Magic numbers set between host and vpu to detect corruptio of tracing init
+ */
+
+#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
+
+/* Tracing buffer message format definitions */
+#define VPU_TRACING_FORMAT_STRING 0
+#define VPU_TRACING_FORMAT_MIPI	  2
+/*
+ * Header of the tracing buffer.
+ * The below defined header will be stored at the beginning of
+ * each allocated tracing buffer, followed by a series of 256b
+ * of ASCII trace message entries.
+ */
+struct vpu_tracing_buffer_header {
+	/**
+	 * Magic number set by host to detect corruption
+	 * @see VPU_TRACING_BUFFER_CANARY
+	 */
+	u32 host_canary_start;
+	/* offset from start of buffer for trace entries */
+	u32 read_index;
+	u32 pad_to_cache_line_size_0[14];
+	/* End of first cache line */
+
+	/**
+	 * Magic number set by host to detect corruption
+	 * @see VPU_TRACING_BUFFER_CANARY
+	 */
+	u32 vpu_canary_start;
+	/* offset from start of buffer from write start */
+	u32 write_index;
+	/* counter for buffer wrapping */
+	u32 wrap_count;
+	/* legacy field - do not use */
+	u32 reserved_0;
+	/**
+	 * Size of the log buffer include this header (@header_size) and space
+	 * reserved for all messages. If @alignment` is greater that 0 the @Size
+	 * must be multiple of @Alignment.
+	 */
+	u32 size;
+	/* Header version */
+	u16 header_version;
+	/* Header size */
+	u16 header_size;
+	/*
+	 * Format of the messages in the trace buffer
+	 * 0 - null terminated string
+	 * 1 - size + null terminated string
+	 * 2 - MIPI-SysT encoding
+	 */
+	u32 format;
+	/*
+	 * Message alignment
+	 * 0 - messages are place 1 after another
+	 * n - every message starts and multiple on offset
+	 */
+	u32 alignment; /* 64, 128, 256 */
+	/* Name of the logging entity, i.e "LRT", "LNN", "SHV0", etc */
+	char name[16];
+	u32 pad_to_cache_line_size_1[4];
+	/* End of second cache line */
+};
+
+#pragma pack(pop)
+
+#endif
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
new file mode 100644
index 000000000000..2949ec8365bd
--- /dev/null
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -0,0 +1,1008 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+/**
+ * @file
+ * @brief JSM shared definitions
+ *
+ * @ingroup Jsm
+ * @brief JSM shared definitions
+ * @{
+ */
+#ifndef VPU_JSM_API_H
+#define VPU_JSM_API_H
+
+/*
+ * Major version changes that break backward compatibility
+ */
+#define VPU_JSM_API_VER_MAJOR 3
+
+/*
+ * Minor version changes when API backward compatibility is preserved.
+ */
+#define VPU_JSM_API_VER_MINOR 0
+
+/*
+ * API header changed (field names, documentation, formatting) but API itself has not been changed
+ */
+#define VPU_JSM_API_VER_PATCH 1
+
+/*
+ * Index in the API version table
+ */
+#define VPU_JSM_API_VER_INDEX 4
+
+/*
+ * Number of Priority Bands for Hardware Scheduling
+ * Bands: RealTime, Focus, Normal, Idle
+ */
+#define VPU_HWS_NUM_PRIORITY_BANDS 4
+
+/* Max number of impacted contexts that can be dealt with the engine reset command */
+#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3
+
+/** Pack the API structures for now, once alignment issues are fixed this can be removed */
+#pragma pack(push, 1)
+
+/*
+ * Engine indexes.
+ */
+#define VPU_ENGINE_COMPUTE 0
+#define VPU_ENGINE_COPY	   1
+#define VPU_ENGINE_NB	   2
+
+/*
+ * VPU status values.
+ */
+#define VPU_JSM_STATUS_SUCCESS				 0x0U
+#define VPU_JSM_STATUS_PARSING_ERR			 0x1U
+#define VPU_JSM_STATUS_PROCESSING_ERR			 0x2U
+#define VPU_JSM_STATUS_PREEMPTED			 0x3U
+#define VPU_JSM_STATUS_ABORTED				 0x4U
+#define VPU_JSM_STATUS_USER_CTX_VIOL_ERR		 0x5U
+#define VPU_JSM_STATUS_GLOBAL_CTX_VIOL_ERR		 0x6U
+#define VPU_JSM_STATUS_MVNCI_WRONG_INPUT_FORMAT		 0x7U
+#define VPU_JSM_STATUS_MVNCI_UNSUPPORTED_NETWORK_ELEMENT 0x8U
+#define VPU_JSM_STATUS_MVNCI_INVALID_HANDLE		 0x9U
+#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES		 0xAU
+#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED		 0xBU
+#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR		 0xCU
+/* Job status returned when the job was preempted mid-inference */
+#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE		 0xDU
+
+/*
+ * Host <-> VPU IPC channels.
+ * ASYNC commands use a high priority channel, other messages use low-priority ones.
+ */
+#define VPU_IPC_CHAN_ASYNC_CMD 0
+#define VPU_IPC_CHAN_GEN_CMD   10
+#define VPU_IPC_CHAN_JOB_RET   11
+
+/*
+ * Job flags bit masks.
+ */
+#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+
+/*
+ * Sizes of the reserved areas in jobs, in bytes.
+ */
+#define VPU_JOB_RESERVED_BYTES	     16
+/*
+ * Sizes of the reserved areas in job queues, in bytes.
+ */
+#define VPU_JOB_QUEUE_RESERVED_BYTES 52
+
+/*
+ * Max length (including trailing NULL char) of trace entity name (e.g., the
+ * name of a logging destination or a loggable HW component).
+ */
+#define VPU_TRACE_ENTITY_NAME_MAX_LEN 32
+
+/*
+ * Max length (including trailing NULL char) of a dyndbg command.
+ *
+ * NOTE: 96 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is
+ * 128 bytes (multiple of 64 bytes, the cache line size).
+ */
+#define VPU_DYNDBG_CMD_MAX_LEN 96
+
+/*
+ * Job format.
+ */
+struct vpu_job_queue_entry {
+	u64 batch_buf_addr; /**< Address of VPU commands batch buffer */
+	u32 job_id;	  /**< Job ID */
+	u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
+	u64 root_page_table_addr; /**< Address of root page table to use for this job */
+	u64 root_page_table_update_counter; /**< Page tables update events counter */
+	u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
+	u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+	u8 reserved_0[VPU_JOB_RESERVED_BYTES];
+};
+
+/*
+ * Job queue control registers.
+ */
+struct vpu_job_queue_header {
+	u32 engine_idx;
+	u32 head;
+	u32 tail;
+	u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES];
+};
+
+/*
+ * Job queue format.
+ */
+struct vpu_job_queue {
+	struct vpu_job_queue_header header;
+	struct vpu_job_queue_entry job[];
+};
+
+/**
+ * Logging entity types.
+ *
+ * This enum defines the different types of entities involved in logging.
+ */
+enum vpu_trace_entity_type {
+	/** Logging destination (entity where logs can be stored / printed). */
+	VPU_TRACE_ENTITY_TYPE_DESTINATION = 1,
+	/** Loggable HW component (HW entity that can be logged). */
+	VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
+};
+
+/*
+ * Host <-> VPU IPC messages types.
+ */
+enum vpu_ipc_msg_type {
+	VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
+	/* IPC Host -> Device, Async commands */
+	VPU_JSM_MSG_ASYNC_CMD = 0x1100,
+	VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
+	VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
+	VPU_JSM_MSG_REGISTER_DB = 0x1102,
+	VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
+	VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104,
+	VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105,
+	VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106,
+	VPU_JSM_MSG_SET_POWER_LEVEL = 0x1107,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_OPEN = 0x1108,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_CLOSE = 0x1109,
+	/** Configure logging (used to modify configuration passed in boot params). */
+	VPU_JSM_MSG_TRACE_SET_CONFIG = 0x110a,
+	/** Return current logging configuration. */
+	VPU_JSM_MSG_TRACE_GET_CONFIG = 0x110b,
+	/**
+	 * Get masks of destinations and HW components supported by the firmware
+	 * (may vary between HW generations and FW compile
+	 * time configurations)
+	 */
+	VPU_JSM_MSG_TRACE_GET_CAPABILITY = 0x110c,
+	/** Get the name of a destination or HW component. */
+	VPU_JSM_MSG_TRACE_GET_NAME = 0x110d,
+	/**
+	 * Release resource associated with host ssid . All jobs that belong to the host_ssid
+	 * aborted and removed from internal scheduling queues. All doorbells assigned
+	 * to the host_ssid are unregistered and any internal FW resources belonging to
+	 * the host_ssid are released.
+	 */
+	VPU_JSM_MSG_SSID_RELEASE = 0x110e,
+	/**
+	 * Start collecting metric data.
+	 * @see vpu_jsm_metric_streamer_start
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_START = 0x110f,
+	/**
+	 * Stop collecting metric data. This command will return success if it is called
+	 * for a metric stream that has already been stopped or was never started.
+	 * @see vpu_jsm_metric_streamer_stop
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_STOP = 0x1110,
+	/**
+	 * Update current and next buffer for metric data collection. This command can
+	 * also be used to request information about the number of collected samples
+	 * and the amount of data written to the buffer.
+	 * @see vpu_jsm_metric_streamer_update
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_UPDATE = 0x1111,
+	/**
+	 * Request description of selected metric groups and metric counters within
+	 * each group. The VPU will write the description of groups and counters to
+	 * the buffer specified in the command structure.
+	 * @see vpu_jsm_metric_streamer_start
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112,
+	/** Control command: Priority band setup */
+	VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113,
+	/** Control command: Create command queue */
+	VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114,
+	/** Control command: Destroy command queue */
+	VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115,
+	/** Control command: Set context scheduling properties */
+	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116,
+	/*
+	 * Register a doorbell to notify VPU of new work. The doorbell may later be
+	 * deallocated or reassigned to another context.
+	 */
+	VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+	/* IPC Host -> Device, General commands */
+	VPU_JSM_MSG_GENERAL_CMD = 0x1200,
+	VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
+	/**
+	 * Control dyndbg behavior by executing a dyndbg command; equivalent to
+	 * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
+	 */
+	VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+	/* IPC Device -> Host, Job completion */
+	VPU_JSM_MSG_JOB_DONE = 0x2100,
+	/* IPC Device -> Host, Async command completion */
+	VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
+	VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
+	VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201,
+	VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202,
+	VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203,
+	VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204,
+	VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205,
+	VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206,
+	VPU_JSM_MSG_SET_POWER_LEVEL_DONE = 0x2207,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE = 0x2208,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE = 0x2209,
+	/** Response to VPU_JSM_MSG_TRACE_SET_CONFIG. */
+	VPU_JSM_MSG_TRACE_SET_CONFIG_RSP = 0x220a,
+	/** Response to VPU_JSM_MSG_TRACE_GET_CONFIG. */
+	VPU_JSM_MSG_TRACE_GET_CONFIG_RSP = 0x220b,
+	/** Response to VPU_JSM_MSG_TRACE_GET_CAPABILITY. */
+	VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c,
+	/** Response to VPU_JSM_MSG_TRACE_GET_NAME. */
+	VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d,
+	/** Response to VPU_JSM_MSG_SSID_RELEASE. */
+	VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_START.
+	 * VPU will return an error result if metric collection cannot be started,
+	 * e.g. when the specified metric mask is invalid.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_START_DONE = 0x220f,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_STOP.
+	 * Returns information about collected metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE = 0x2210,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_UPDATE.
+	 * Returns information about collected metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE = 0x2211,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_INFO.
+	 * Returns a description of the metric groups and metric counters.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE = 0x2212,
+	/**
+	 * Asynchronous event sent from the VPU to the host either when the current
+	 * metric buffer is full or when the VPU has collected a multiple of
+	 * @notify_sample_count samples as indicated through the start command
+	 * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected
+	 * metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213,
+	/** Response to control command: Priority band setup */
+	VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214,
+	/** Response to control command: Create command queue */
+	VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215,
+	/** Response to control command: Destroy command queue */
+	VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
+	/** Response to control command: Set context scheduling properties */
+	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+	/* IPC Device -> Host, General command completion */
+	VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
+	VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
+	/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
+	VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+};
+
+enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
+
+/*
+ * Host <-> LRT IPC message payload definitions
+ */
+struct vpu_ipc_msg_payload_engine_reset {
+	/* Engine to be reset. */
+	u32 engine_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+struct vpu_ipc_msg_payload_engine_preempt {
+	/* Engine to be preempted. */
+	u32 engine_idx;
+	/* ID of the preemption request. */
+	u32 preempt_id;
+};
+
+/*
+ * @brief Register doorbell command structure.
+ * This structure supports doorbell registration for only OS scheduling.
+ * @see VPU_JSM_MSG_REGISTER_DB
+ */
+struct vpu_ipc_msg_payload_register_db {
+	/* Index of the doorbell to register. */
+	u32 db_idx;
+	/* Reserved */
+	u32 reserved_0;
+	/* Virtual address in Global GTT pointing to the start of job queue. */
+	u64 jobq_base;
+	/* Size of the job queue in bytes. */
+	u32 jobq_size;
+	/* Host sub-stream ID for the context assigned to the doorbell. */
+	u32 host_ssid;
+};
+
+/**
+ * @brief Unregister doorbell command structure.
+ * Request structure to unregister a doorbell for both HW and OS scheduling.
+ * @see VPU_JSM_MSG_UNREGISTER_DB
+ */
+struct vpu_ipc_msg_payload_unregister_db {
+	/* Index of the doorbell to unregister. */
+	u32 db_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+struct vpu_ipc_msg_payload_query_engine_hb {
+	/* Engine to return heartbeat value. */
+	u32 engine_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+struct vpu_ipc_msg_payload_power_level {
+	/**
+	 * Requested power level. The power level value is in the
+	 * range [0, power_level_count-1] where power_level_count
+	 * is the number of available power levels as returned by
+	 * the get power level count command. A power level of 0
+	 * corresponds to the maximum possible power level, while
+	 * power_level_count-1 corresponds to the minimum possible
+	 * power level. Values outside of this range are not
+	 * considered to be valid.
+	 */
+	u32 power_level;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+struct vpu_ipc_msg_payload_ssid_release {
+	/* Host sub-stream ID for the context to be released. */
+	u32 host_ssid;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+/**
+ * @brief Metric streamer start command structure.
+ * This structure is also used with VPU_JSM_MSG_METRIC_STREAMER_INFO to request metric
+ * groups and metric counters description from the firmware.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_START
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_streamer_start {
+	/**
+	 * Bitmask to select the desired metric groups.
+	 * A metric group can belong only to one metric streamer instance at a time.
+	 * Since each metric streamer instance has a unique set of metric groups, it
+	 * can also identify a metric streamer instance if more than one instance was
+	 * started. If the VPU device does not support multiple metric streamer instances,
+	 * then VPU_JSM_MSG_METRIC_STREAMER_START will return an error even if the second
+	 * instance has different groups to the first.
+	 */
+	u64 metric_group_mask;
+	/** Sampling rate in nanoseconds. */
+	u64 sampling_rate;
+	/**
+	 * If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message
+	 * after every @notify_sample_count samples is collected or dropped by the VPU.
+	 * If set to UINT_MAX the VPU will only generate a notification when the metric
+	 * buffer is full. If set to 0 the VPU will never generate a notification.
+	 */
+	u32 notify_sample_count;
+	u32 reserved_0;
+	/**
+	 * Address and size of the buffer where the VPU will write metric data. The
+	 * VPU writes all counters from enabled metric groups one after another. If
+	 * there is no space left to write data at the next sample period the VPU
+	 * will switch to the next buffer (@see next_buffer_addr) and will optionally
+	 * send a notification to the host driver if @notify_sample_count is non-zero.
+	 * If @next_buffer_addr is NULL the VPU will stop collecting metric data.
+	 */
+	u64 buffer_addr;
+	u64 buffer_size;
+	/**
+	 * Address and size of the next buffer to write metric data to after the initial
+	 * buffer is full. If the address is NULL the VPU will stop collecting metric
+	 * data.
+	 */
+	u64 next_buffer_addr;
+	u64 next_buffer_size;
+};
+
+/**
+ * @brief Metric streamer stop command structure.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_STOP
+ */
+struct vpu_jsm_metric_streamer_stop {
+	/** Bitmask to select the desired metric groups. */
+	u64 metric_group_mask;
+};
+
+/**
+ * Provide VPU FW with buffers to write metric data.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE
+ */
+struct vpu_jsm_metric_streamer_update {
+	/** Metric group mask that identifies metric streamer instance. */
+	u64 metric_group_mask;
+	/**
+	 * Address and size of the buffer where the VPU will write metric data. If
+	 * the buffer address is 0 or same as the currently used buffer the VPU will
+	 * continue writing metric data to the current buffer. In this case the
+	 * buffer size is ignored and the size of the current buffer is unchanged.
+	 * If the address is non-zero and differs from the current buffer address the
+	 * VPU will immediately switch data collection to the new buffer.
+	 */
+	u64 buffer_addr;
+	u64 buffer_size;
+	/**
+	 * Address and size of the next buffer to write metric data after the initial
+	 * buffer is full. If the address is NULL the VPU will stop collecting metric
+	 * data but will continue to record dropped samples.
+	 *
+	 * Note that there is a hazard possible if both buffer_addr and the next_buffer_addr
+	 * are non-zero in same update request. It is the host's responsibility to ensure
+	 * that both addresses make sense even if the VPU just switched to writing samples
+	 * from the current to the next buffer.
+	 */
+	u64 next_buffer_addr;
+	u64 next_buffer_size;
+};
+
+struct vpu_ipc_msg_payload_blob_deinit {
+	/* 64-bit unique ID for the blob to be de-initialized. */
+	u64 blob_id;
+};
+
+struct vpu_ipc_msg_payload_job_done {
+	/* Engine to which the job was submitted. */
+	u32 engine_idx;
+	/* Index of the doorbell to which the job was submitted */
+	u32 db_idx;
+	/* ID of the completed job */
+	u32 job_id;
+	/* Status of the completed job */
+	u32 job_status;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+struct vpu_jsm_engine_reset_context {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Flags: 0: cause of hang; 1: collateral damage of reset */
+	u64 flags;
+};
+
+struct vpu_ipc_msg_payload_engine_reset_done {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Number of impacted contexts */
+	u32 num_impacted_contexts;
+	/* Array of impacted command queue ids and their flags */
+	struct vpu_jsm_engine_reset_context
+		impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS];
+};
+
+struct vpu_ipc_msg_payload_engine_preempt_done {
+	/* Engine preempted. */
+	u32 engine_idx;
+	/* ID of the preemption request. */
+	u32 preempt_id;
+};
+
+/**
+ * Response structure for register doorbell command for both OS
+ * and HW scheduling.
+ * @see VPU_JSM_MSG_REGISTER_DB
+ * @see VPU_JSM_MSG_HWS_REGISTER_DB
+ */
+struct vpu_ipc_msg_payload_register_db_done {
+	/* Index of the registered doorbell. */
+	u32 db_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+/**
+ * Response structure for unregister doorbell command for both OS
+ * and HW scheduling.
+ * @see VPU_JSM_MSG_UNREGISTER_DB
+ */
+struct vpu_ipc_msg_payload_unregister_db_done {
+	/* Index of the unregistered doorbell. */
+	u32 db_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+struct vpu_ipc_msg_payload_query_engine_hb_done {
+	/* Engine returning heartbeat value. */
+	u32 engine_idx;
+	/* Reserved */
+	u32 reserved_0;
+	/* Heartbeat value. */
+	u64 heartbeat;
+};
+
+struct vpu_ipc_msg_payload_get_power_level_count_done {
+	/**
+	 * Number of supported power levels. The maximum possible
+	 * value of power_level_count is 16 but this may vary across
+	 * implementations.
+	 */
+	u32 power_level_count;
+	/* Reserved */
+	u32 reserved_0;
+	/**
+	 * Power consumption limit for each supported power level in
+	 * [0-100%] range relative to power level 0.
+	 */
+	u8 power_limit[16];
+};
+
+struct vpu_ipc_msg_payload_blob_deinit_done {
+	/* 64-bit unique ID for the blob de-initialized. */
+	u64 blob_id;
+};
+
+/* HWS priority band setup request / response */
+struct vpu_ipc_msg_payload_hws_priority_band_setup {
+	/*
+	 * Grace period in 100ns units when preempting another priority band for
+	 * this priority band
+	 */
+	u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * Default quantum in 100ns units for scheduling across processes
+	 * within a priority band
+	 */
+	u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * Default grace period in 100ns units for processes that preempt each
+	 * other within a priority band
+	 */
+	u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * For normal priority band, specifies the target VPU percentage
+	 * in situations when it's starved by the focus band.
+	 */
+	u32 normal_band_percentage;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+/* HWS create command queue request */
+struct vpu_ipc_msg_payload_hws_create_cmdq {
+	/* Process id */
+	u64 process_id;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Command queue base */
+	u64 cmdq_base;
+	/* Command queue size */
+	u32 cmdq_size;
+	/* Reserved */
+	u32 reserved_0;
+};
+
+/* HWS create command queue response */
+struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
+	/* Process id */
+	u64 process_id;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/* HWS destroy command queue request / response */
+struct vpu_ipc_msg_payload_hws_destroy_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/* HWS set context scheduling properties request / response */
+struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Priority band to assign to work of this context */
+	u32 priority_band;
+	/* Inside realtime band assigns a further priority */
+	u32 realtime_priority_level;
+	/* Priority relative to other contexts in the same process */
+	u32 in_process_priority;
+	/* Zero padding / Reserved */
+	u32 reserved_1;
+	/* Context quantum relative to other contexts of same priority in the same process */
+	u64 context_quantum;
+	/* Grace period when preempting context of the same priority within the same process */
+	u64 grace_period_same_priority;
+	/* Grace period when preempting context of a lower priority within the same process */
+	u64 grace_period_lower_priority;
+};
+
+/*
+ * @brief Register doorbell command structure.
+ * This structure supports doorbell registration for both HW and OS scheduling.
+ * Note: Queue base and size are added here so that the same structure can be used for
+ * OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored
+ * and cmdq_base and cmdq_size will be used. For HW scheduling, cmdq_base and cmdq_size will be
+ * ignored and cmdq_id is used.
+ * @see VPU_JSM_MSG_HWS_REGISTER_DB
+ */
+struct vpu_jsm_hws_register_db {
+	/* Index of the doorbell to register. */
+	u32 db_id;
+	/* Host sub-stream ID for the context assigned to the doorbell. */
+	u32 host_ssid;
+	/* ID of the command queue associated with the doorbell. */
+	u64 cmdq_id;
+	/* Virtual address pointing to the start of command queue. */
+	u64 cmdq_base;
+	/* Size of the command queue in bytes. */
+	u64 cmdq_size;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
+ * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
+ *
+ * The payload is interpreted differently depending on the type of message:
+ *
+ * - For VPU_JSM_MSG_TRACE_SET_CONFIG, the payload specifies the desired
+ *   logging configuration to be set.
+ *
+ * - For VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, the payload reports the logging
+ *   configuration that was set after a VPU_JSM_MSG_TRACE_SET_CONFIG request.
+ *   The host can compare this payload with the one it sent in the
+ *   VPU_JSM_MSG_TRACE_SET_CONFIG request to check whether or not the
+ *   configuration was set as desired.
+ *
+ * - VPU_JSM_MSG_TRACE_GET_CONFIG_RSP, the payload reports the current logging
+ *   configuration.
+ */
+struct vpu_ipc_msg_payload_trace_config {
+	/**
+	 * Logging level (currently set or to be set); see 'mvLog_t' enum for
+	 * acceptable values. The specified logging level applies to all
+	 * destinations and HW components
+	 */
+	u32 trace_level;
+	/**
+	 * Bitmask of logging destinations (currently enabled or to be enabled);
+	 * bitwise OR of values defined in logging_destination enum.
+	 */
+	u32 trace_destination_mask;
+	/**
+	 * Bitmask of loggable HW components (currently enabled or to be enabled);
+	 * bitwise OR of values defined in loggable_hw_component enum.
+	 */
+	u64 trace_hw_component_mask;
+	u64 reserved_0; /**< Reserved for future extensions. */
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP messages.
+ */
+struct vpu_ipc_msg_payload_trace_capability_rsp {
+	u32 trace_destination_mask; /**< Bitmask of supported logging destinations. */
+	u32 reserved_0;
+	u64 trace_hw_component_mask; /**< Bitmask of supported loggable HW components. */
+	u64 reserved_1; /**< Reserved for future extensions. */
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_NAME requests.
+ */
+struct vpu_ipc_msg_payload_trace_get_name {
+	/**
+	 * The type of the entity to query name for; see logging_entity_type for
+	 * possible values.
+	 */
+	u32 entity_type;
+	u32 reserved_0;
+	/**
+	 * The ID of the entity to query name for; possible values depends on the
+	 * entity type.
+	 */
+	u64 entity_id;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_NAME_RSP responses.
+ */
+struct vpu_ipc_msg_payload_trace_get_name_rsp {
+	/**
+	 * The type of the entity whose name was queried; see logging_entity_type
+	 * for possible values.
+	 */
+	u32 entity_type;
+	u32 reserved_0;
+	/**
+	 * The ID of the entity whose name was queried; possible values depends on
+	 * the entity type.
+	 */
+	u64 entity_id;
+	/** Reserved for future extensions. */
+	u64 reserved_1;
+	/** The name of the entity. */
+	char entity_name[VPU_TRACE_ENTITY_NAME_MAX_LEN];
+};
+
+/**
+ * Data sent from the VPU to the host in all metric streamer response messages
+ * and in asynchronous notification.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_START_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION
+ */
+struct vpu_jsm_metric_streamer_done {
+	/** Metric group mask that identifies metric streamer instance. */
+	u64 metric_group_mask;
+	/**
+	 * Size in bytes of single sample - total size of all enabled counters.
+	 * Some VPU implementations may align sample_size to more than 8 bytes.
+	 */
+	u32 sample_size;
+	u32 reserved_0;
+	/**
+	 * Number of samples collected since the metric streamer was started.
+	 * This will be 0 if the metric streamer was not started.
+	 */
+	u32 samples_collected;
+	/**
+	 * Number of samples dropped since the metric streamer was started. This
+	 * is incremented every time the metric streamer is not able to write
+	 * collected samples because the current buffer is full and there is no
+	 * next buffer to switch to.
+	 */
+	u32 samples_dropped;
+	/** Address of the buffer that contains the latest metric data. */
+	u64 buffer_addr;
+	/**
+	 * Number of bytes written into the metric data buffer. In response to the
+	 * VPU_JSM_MSG_METRIC_STREAMER_INFO request this field contains the size of
+	 * all group and counter descriptors. The size is updated even if the buffer
+	 * in the request was NULL or too small to hold descriptors of all counters
+	 */
+	u64 bytes_written;
+};
+
+/**
+ * Metric group description placed in the metric buffer after successful completion
+ * of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more
+ * @vpu_jsm_metric_counter_descriptor records.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_group_descriptor {
+	/**
+	 * Offset to the next metric group (8-byte aligned). If this offset is 0 this
+	 * is the last descriptor. The value of metric_info_size must be greater than
+	 * or equal to sizeof(struct vpu_jsm_metric_group_descriptor) + name_string_size
+	 * + description_string_size and must be 8-byte aligned.
+	 */
+	u32 next_metric_group_info_offset;
+	/**
+	 * Offset to the first metric counter description record (8-byte aligned).
+	 * @see vpu_jsm_metric_counter_descriptor
+	 */
+	u32 next_metric_counter_info_offset;
+	/** Index of the group. This corresponds to bit index in metric_group_mask. */
+	u32 group_id;
+	/** Number of counters in the metric group. */
+	u32 num_counters;
+	/** Data size for all counters, must be a multiple of 8 bytes.*/
+	u32 metric_group_data_size;
+	/**
+	 * Metric group domain number. Cannot use multiple, simultaneous metric groups
+	 * from the same domain.
+	 */
+	u32 domain;
+	/**
+	 * Counter name string size. The string must include a null termination character.
+	 * The FW may use a fixed size name or send a different name for each counter.
+	 * If the VPU uses fixed size strings, all characters from the end of the name
+	 * to the of the fixed size character array must be zeroed.
+	 */
+	u32 name_string_size;
+	/** Counter description string size, @see name_string_size */
+	u32 description_string_size;
+	u64 reserved_0;
+	/**
+	 * Right after this structure, the VPU writes name and description of
+	 * the metric group.
+	 */
+};
+
+/**
+ * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_counter_descriptor {
+	/**
+	 * Offset to the next counter in a group (8-byte aligned). If this offset is
+	 * 0 this is the last counter in the group.
+	 */
+	u32 next_metric_counter_info_offset;
+	/**
+	 * Offset to the counter data from the start of samples in this metric group.
+	 * Note that metric_data_offset % metric_data_size must be 0.
+	 */
+	u32 metric_data_offset;
+	/** Size of the metric counter data in bytes. */
+	u32 metric_data_size;
+	/** Metric type, see Level Zero API for definitions. */
+	u32 tier;
+	/** Metric type, see set_metric_type_t for definitions. */
+	u32 metric_type;
+	/** Metric type, see set_value_type_t for definitions. */
+	u32 metric_value_type;
+	/**
+	 * Counter name string size. The string must include a null termination character.
+	 * The FW may use a fixed size name or send a different name for each counter.
+	 * If the VPU uses fixed size strings, all characters from the end of the name
+	 * to the of the fixed size character array must be zeroed.
+	 */
+	u32 name_string_size;
+	/** Counter description string size, @see name_string_size */
+	u32 description_string_size;
+	/** Counter component name string size, @see name_string_size */
+	u32 component_string_size;
+	/** Counter string size, @see name_string_size */
+	u32 units_string_size;
+	u64 reserved_0;
+	/**
+	 * Right after this structure, the VPU writes name, description
+	 * component and unit strings.
+	 */
+};
+
+/**
+ * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests.
+ *
+ * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug
+ * feature, which allows developers to selectively enable / disable MVLOG_DEBUG
+ * messages. This is equivalent to the Dynamic Debug functionality provided by
+ * Linux
+ * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html)
+ * The host can control Dynamic Debug behavior by sending dyndbg commands, which
+ * have the same syntax as Linux
+ * dyndbg commands.
+ *
+ * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host
+ * still has to set the logging level to MVLOG_DEBUG, using the
+ * VPU_JSM_MSG_TRACE_SET_CONFIG command.
+ *
+ * The host can see the current dynamic debug configuration by executing a
+ * special 'show' command. The dyndbg configuration will be printed to the
+ * configured logging destination using MVLOG_INFO logging level.
+ */
+struct vpu_ipc_msg_payload_dyndbg_control {
+	/**
+	 * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated
+	 * string.
+	 */
+	char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
+};
+
+/*
+ * Payloads union, used to define complete message format.
+ */
+union vpu_ipc_msg_payload {
+	struct vpu_ipc_msg_payload_engine_reset engine_reset;
+	struct vpu_ipc_msg_payload_engine_preempt engine_preempt;
+	struct vpu_ipc_msg_payload_register_db register_db;
+	struct vpu_ipc_msg_payload_unregister_db unregister_db;
+	struct vpu_ipc_msg_payload_query_engine_hb query_engine_hb;
+	struct vpu_ipc_msg_payload_power_level power_level;
+	struct vpu_jsm_metric_streamer_start metric_streamer_start;
+	struct vpu_jsm_metric_streamer_stop metric_streamer_stop;
+	struct vpu_jsm_metric_streamer_update metric_streamer_update;
+	struct vpu_ipc_msg_payload_blob_deinit blob_deinit;
+	struct vpu_ipc_msg_payload_ssid_release ssid_release;
+	struct vpu_jsm_hws_register_db hws_register_db;
+	struct vpu_ipc_msg_payload_job_done job_done;
+	struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done;
+	struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done;
+	struct vpu_ipc_msg_payload_register_db_done register_db_done;
+	struct vpu_ipc_msg_payload_unregister_db_done unregister_db_done;
+	struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done;
+	struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done;
+	struct vpu_jsm_metric_streamer_done metric_streamer_done;
+	struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done;
+	struct vpu_ipc_msg_payload_trace_config trace_config;
+	struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability;
+	struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
+	struct vpu_ipc_msg_payload_trace_get_name_rsp trace_get_name_rsp;
+	struct vpu_ipc_msg_payload_dyndbg_control dyndbg_control;
+	struct vpu_ipc_msg_payload_hws_priority_band_setup hws_priority_band_setup;
+	struct vpu_ipc_msg_payload_hws_create_cmdq hws_create_cmdq;
+	struct vpu_ipc_msg_payload_hws_create_cmdq_rsp hws_create_cmdq_rsp;
+	struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
+	struct vpu_ipc_msg_payload_hws_set_context_sched_properties
+		hws_set_context_sched_properties;
+};
+
+/*
+ * Host <-> LRT IPC message base structure.
+ *
+ * NOTE: All instances of this object must be aligned on a 64B boundary
+ * to allow proper handling of VPU cache operations.
+ */
+struct vpu_jsm_msg {
+	/* Reserved */
+	u64 reserved_0;
+	/* Message type, see vpu_ipc_msg_type enum. */
+	u32 type;
+	/* Buffer status, see vpu_ipc_msg_status enum. */
+	u32 status;
+	/*
+	 * Request ID, provided by the host in a request message and passed
+	 * back by VPU in the response message.
+	 */
+	u32 request_id;
+	/* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
+	u32 result;
+	u64 reserved_1;
+	/* Message payload depending on message type, see vpu_ipc_msg_payload union. */
+	union vpu_ipc_msg_payload payload;
+};
+
+#pragma pack(pop)
+
+#endif
+
+///@}