45 files changed, 1637 insertions, 500 deletions
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index 5119bccd1917..ad8ac6e315b6 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,3 +1,2 @@
-- Add import and export BO support
 - Add debugfs support
 - Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 00d215ac866e..2cff5419bd2f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -361,7 +361,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
 	aie2_hwctx_restart(xdna, hwctx);
 	mutex_unlock(&xdna->dev_lock);
 
-	return DRM_GPU_SCHED_STAT_NOMINAL;
+	return DRM_GPU_SCHED_STAT_RESET;
 }
 
 static const struct drm_sched_backend_ops sched_ops = {
@@ -566,7 +566,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 			.size = MAX_CHAIN_CMDBUF_SIZE,
 		};
 
-		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
 		if (IS_ERR(abo)) {
 			ret = PTR_ERR(abo);
 			goto free_cmd_bufs;
@@ -758,27 +758,42 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
-	struct mm_struct *mm = abo->mem.notifier.mm;
-	struct hmm_range range = { 0 };
+	struct amdxdna_umap *mapp;
 	unsigned long timeout;
+	struct mm_struct *mm;
+	bool found;
 	int ret;
 
-	XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
-		       abo->mem.userptr, abo->mem.size);
-	range.notifier = &abo->mem.notifier;
-	range.start = abo->mem.userptr;
-	range.end = abo->mem.userptr + abo->mem.size;
-	range.hmm_pfns = abo->mem.pfns;
-	range.default_flags = HMM_PFN_REQ_FAULT;
+	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+	found = false;
+	down_write(&xdna->notifier_lock);
+	list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+		if (mapp->invalid) {
+			found = true;
+			break;
+		}
+	}
 
-	if (!mmget_not_zero(mm))
+	if (!found) {
+		abo->mem.map_invalid = false;
+		up_write(&xdna->notifier_lock);
+		return 0;
+	}
+	kref_get(&mapp->refcnt);
+	up_write(&xdna->notifier_lock);
+
+	XDNA_DBG(xdna, "populate memory range %lx %lx",
+		 mapp->vma->vm_start, mapp->vma->vm_end);
+	mm = mapp->notifier.mm;
+	if (!mmget_not_zero(mm)) {
+		amdxdna_umap_put(mapp);
 		return -EFAULT;
+	}
 
-	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
-again:
-	range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+	mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
 	mmap_read_lock(mm);
-	ret = hmm_range_fault(&range);
+	ret = hmm_range_fault(&mapp->range);
 	mmap_read_unlock(mm);
 	if (ret) {
 		if (time_after(jiffies, timeout)) {
@@ -786,21 +801,27 @@ again:
 			goto put_mm;
 		}
 
-		if (ret == -EBUSY)
+		if (ret == -EBUSY) {
+			amdxdna_umap_put(mapp);
 			goto again;
+		}
 
 		goto put_mm;
 	}
 
-	down_read(&xdna->notifier_lock);
-	if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
-		up_read(&xdna->notifier_lock);
+	down_write(&xdna->notifier_lock);
+	if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
+		up_write(&xdna->notifier_lock);
+		amdxdna_umap_put(mapp);
 		goto again;
 	}
-	abo->mem.map_invalid = false;
-	up_read(&xdna->notifier_lock);
+	mapp->invalid = false;
+	up_write(&xdna->notifier_lock);
+	amdxdna_umap_put(mapp);
+	goto again;
 
 put_mm:
+	amdxdna_umap_put(mapp);
 	mmput(mm);
 	return ret;
 }
@@ -827,7 +848,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 		goto up_sem;
 	}
 
-	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
+				 hwctx->client->filp->client_id);
 	if (ret) {
 		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
 		goto free_chain;
@@ -908,10 +930,6 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
 	struct drm_gem_object *gobj = to_gobj(abo);
 	long ret;
 
-	down_write(&xdna->notifier_lock);
-	abo->mem.map_invalid = true;
-	mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
-	up_write(&xdna->notifier_lock);
 	ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
 				    true, MAX_SCHEDULE_TIMEOUT);
 	if (!ret || ret == -ERESTARTSYS)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index bf4219e32cc1..82412eec9a4b 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
 	if (!payload)
 		return -EINVAL;
 
-	if (!slot_cf_has_space(offset, payload_len))
+	if (!slot_has_space(*buf, offset, payload_len))
 		return -ENOSPC;
 
 	buf->cu_idx = cu_idx;
@@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
 	if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
 		return -EINVAL;
 
-	if (!slot_dpu_has_space(offset, arg_sz))
+	if (!slot_has_space(*buf, offset, arg_sz))
 		return -ENOSPC;
 
 	buf->inst_buf_addr = sn->buffer;
@@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
 	memcpy(buf->args, sn->prop_args, arg_sz);
 
 	/* Accurate buf size to hint firmware to do necessary copy */
-	*size += sizeof(*buf) + arg_sz;
+	*size = sizeof(*buf) + arg_sz;
 	return 0;
 }
 
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 4e02e744b470..6df9065b13f6 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -319,18 +319,16 @@ struct async_event_msg_resp {
 } __packed;
 
 #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_cf_has_space(offset, payload_size) \
-	(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
-	 offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
+#define slot_has_space(slot, offset, payload_size)		\
+	(MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) +	\
+	 sizeof(typeof(slot)))
+
 struct cmd_chain_slot_execbuf_cf {
 	__u32 cu_idx;
 	__u32 arg_cnt;
 	__u32 args[] __counted_by(arg_cnt);
 };
 
-#define slot_dpu_has_space(offset, payload_size) \
-	(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
-	 offsetof(struct cmd_chain_slot_dpu, args[0]))
 struct cmd_chain_slot_dpu {
 	__u64 inst_buf_addr;
 	__u32 inst_size;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 5a058e565b01..c6cf7068d23c 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -512,12 +512,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
 		goto release_fw;
 	}
 
-	ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
-	if (ret) {
-		XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
-		goto free_irq;
-	}
-
 	psp_conf.fw_size = fw->size;
 	psp_conf.fw_buf = fw->data;
 	for (i = 0; i < PSP_MAX_REGS; i++)
@@ -526,14 +520,14 @@ static int aie2_init(struct amdxdna_dev *xdna)
 	if (!ndev->psp_hdl) {
 		XDNA_ERR(xdna, "failed to create psp");
 		ret = -ENOMEM;
-		goto disable_sva;
+		goto free_irq;
 	}
 	xdna->dev_handle = ndev;
 
 	ret = aie2_hw_start(xdna);
 	if (ret) {
 		XDNA_ERR(xdna, "start npu failed, ret %d", ret);
-		goto disable_sva;
+		goto free_irq;
 	}
 
 	ret = aie2_mgmt_fw_query(ndev);
@@ -584,8 +578,6 @@ async_event_free:
 	aie2_error_async_events_free(ndev);
 stop_hw:
 	aie2_hw_stop(xdna);
-disable_sva:
-	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
 free_irq:
 	pci_free_irq_vectors(pdev);
 release_fw:
@@ -601,7 +593,6 @@ static void aie2_fini(struct amdxdna_dev *xdna)
 
 	aie2_hw_stop(xdna);
 	aie2_error_async_events_free(ndev);
-	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
 	pci_free_irq_vectors(pdev);
 }
 
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
index dc3a072ce3b6..f28a060a8810 100644
--- a/drivers/accel/amdxdna/aie2_psp.c
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *
 	psp->ddev = ddev;
 	memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
 
-	psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
-	psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL);
+	psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
+	psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
 	if (!psp->fw_buffer) {
 		drm_err(ddev, "no memory for fw buffer");
 		return NULL;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 43442b9e273b..be073224bd69 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -496,11 +496,11 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
 				      struct amdxdna_drm_exec_cmd *args)
 {
 	struct amdxdna_dev *xdna = client->xdna;
-	u32 *arg_bo_hdls;
+	u32 *arg_bo_hdls = NULL;
 	u32 cmd_bo_hdl;
 	int ret;
 
-	if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
+	if (args->arg_count > MAX_ARG_COUNT) {
 		XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
 		return -EINVAL;
 	}
@@ -512,14 +512,16 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
 	}
 
 	cmd_bo_hdl = (u32)args->cmd_handles;
-	arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
-	if (!arg_bo_hdls)
-		return -ENOMEM;
-	ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
-			     args->arg_count * sizeof(u32));
-	if (ret) {
-		ret = -EFAULT;
-		goto free_cmd_bo_hdls;
+	if (args->arg_count) {
+		arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+		if (!arg_bo_hdls)
+			return -ENOMEM;
+		ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+				     args->arg_count * sizeof(u32));
+		if (ret) {
+			ret = -EFAULT;
+			goto free_cmd_bo_hdls;
+		}
 	}
 
 	ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 606433d73236..0f85a0105178 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -9,7 +9,10 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/gpu_scheduler.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direct.h>
 #include <linux/iosys-map.h>
+#include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 
 #include "amdxdna_ctx.h"
@@ -18,94 +21,116 @@
 
 #define XDNA_MAX_CMD_BO_SIZE	SZ_32K
 
+MODULE_IMPORT_NS("DMA_BUF");
+
 static int
-amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
+amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 {
 	struct amdxdna_client *client = abo->client;
 	struct amdxdna_dev *xdna = client->xdna;
 	struct amdxdna_mem *mem = &abo->mem;
+	struct amdxdna_gem_obj *heap;
 	u64 offset;
 	u32 align;
 	int ret;
 
+	mutex_lock(&client->mm_lock);
+
+	heap = client->dev_heap;
+	if (!heap) {
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
+	if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
+		XDNA_ERR(xdna, "Invalid dev heap userptr");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
+	if (mem->size == 0 || mem->size > heap->mem.size) {
+		XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx",
+			 mem->size, heap->mem.size);
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
 	align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
-	ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node,
+	ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node,
 					 mem->size, align,
 					 0, DRM_MM_INSERT_BEST);
 	if (ret) {
 		XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
-		return ret;
+		goto unlock_out;
 	}
 
 	mem->dev_addr = abo->mm_node.start;
-	offset = mem->dev_addr - abo->dev_heap->mem.dev_addr;
-	mem->userptr = abo->dev_heap->mem.userptr + offset;
-	mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT];
-	mem->nr_pages = mem->size >> PAGE_SHIFT;
-
-	if (use_vmap) {
-		mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL);
-		if (!mem->kva) {
-			XDNA_ERR(xdna, "Failed to vmap");
-			drm_mm_remove_node(&abo->mm_node);
-			return -EFAULT;
-		}
-	}
+	offset = mem->dev_addr - heap->mem.dev_addr;
+	mem->userptr = heap->mem.userptr + offset;
+	mem->kva = heap->mem.kva + offset;
 
-	return 0;
+	drm_gem_object_get(to_gobj(heap));
+
+unlock_out:
+	mutex_unlock(&client->mm_lock);
+
+	return ret;
 }
 
-static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
+static void
+amdxdna_gem_destroy_obj(struct amdxdna_gem_obj *abo)
 {
-	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
-	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
-	struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
+	mutex_destroy(&abo->lock);
+	kfree(abo);
+}
 
-	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
-	if (abo->pinned)
-		amdxdna_gem_unpin(abo);
+static void
+amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_gem_obj *heap;
 
-	if (abo->type == AMDXDNA_BO_DEV) {
-		mutex_lock(&abo->client->mm_lock);
-		drm_mm_remove_node(&abo->mm_node);
-		mutex_unlock(&abo->client->mm_lock);
+	mutex_lock(&abo->client->mm_lock);
 
-		vunmap(abo->mem.kva);
-		drm_gem_object_put(to_gobj(abo->dev_heap));
-		drm_gem_object_release(gobj);
-		mutex_destroy(&abo->lock);
-		kfree(abo);
-		return;
-	}
+	drm_mm_remove_node(&abo->mm_node);
 
-	if (abo->type == AMDXDNA_BO_DEV_HEAP)
-		drm_mm_takedown(&abo->mm);
+	heap = abo->client->dev_heap;
+	drm_gem_object_put(to_gobj(heap));
 
-	drm_gem_vunmap_unlocked(gobj, &map);
-	mutex_destroy(&abo->lock);
-	drm_gem_shmem_free(&abo->base);
+	mutex_unlock(&abo->client->mm_lock);
 }
 
-static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
-	.free = amdxdna_gem_obj_free,
-};
-
 static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
 				   const struct mmu_notifier_range *range,
 				   unsigned long cur_seq)
 {
-	struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj,
-						   mem.notifier);
-	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+	struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier);
+	struct amdxdna_gem_obj *abo = mapp->abo;
+	struct amdxdna_dev *xdna;
 
-	XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
-		 abo->mem.userptr, abo->mem.size, abo->type);
+	xdna = to_xdna_dev(to_gobj(abo)->dev);
+	XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d",
+		 mapp->vma->vm_start, mapp->vma->vm_end, abo->type);
 
 	if (!mmu_notifier_range_blockable(range))
 		return false;
 
+	down_write(&xdna->notifier_lock);
+	abo->mem.map_invalid = true;
+	mapp->invalid = true;
+	mmu_interval_set_seq(&mapp->notifier, cur_seq);
+	up_write(&xdna->notifier_lock);
+
 	xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
 
+	if (range->event == MMU_NOTIFY_UNMAP) {
+		down_write(&xdna->notifier_lock);
+		if (!mapp->unmapped) {
+			queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+			mapp->unmapped = true;
+		}
+		up_write(&xdna->notifier_lock);
+	}
+
 	return true;
 }
 
@@ -113,102 +138,311 @@ static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
 	.invalidate = amdxdna_hmm_invalidate,
 };
 
-static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo)
+static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo,
+				   struct vm_area_struct *vma)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+	struct amdxdna_umap *mapp;
+
+	down_read(&xdna->notifier_lock);
+	list_for_each_entry(mapp, &abo->mem.umap_list, node) {
+		if (!vma || mapp->vma == vma) {
+			if (!mapp->unmapped) {
+				queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work);
+				mapp->unmapped = true;
+			}
+			if (vma)
+				break;
+		}
+	}
+	up_read(&xdna->notifier_lock);
+}
 
-	if (!xdna->dev_info->ops->hmm_invalidate)
-		return;
+static void amdxdna_umap_release(struct kref *ref)
+{
+	struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt);
+	struct vm_area_struct *vma = mapp->vma;
+	struct amdxdna_dev *xdna;
+
+	mmu_interval_notifier_remove(&mapp->notifier);
+	if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping)
+		mapping_clear_unevictable(vma->vm_file->f_mapping);
+
+	xdna = to_xdna_dev(to_gobj(mapp->abo)->dev);
+	down_write(&xdna->notifier_lock);
+	list_del(&mapp->node);
+	up_write(&xdna->notifier_lock);
 
-	mmu_interval_notifier_remove(&abo->mem.notifier);
-	kvfree(abo->mem.pfns);
-	abo->mem.pfns = NULL;
+	kvfree(mapp->range.hmm_pfns);
+	kfree(mapp);
 }
 
-static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr,
-				size_t len)
+void amdxdna_umap_put(struct amdxdna_umap *mapp)
+{
+	kref_put(&mapp->refcnt, amdxdna_umap_release);
+}
+
+static void amdxdna_hmm_unreg_work(struct work_struct *work)
+{
+	struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap,
+						 hmm_unreg_work);
+
+	amdxdna_umap_put(mapp);
+}
+
+static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
+				struct vm_area_struct *vma)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+	unsigned long len = vma->vm_end - vma->vm_start;
+	unsigned long addr = vma->vm_start;
+	struct amdxdna_umap *mapp;
 	u32 nr_pages;
 	int ret;
 
 	if (!xdna->dev_info->ops->hmm_invalidate)
 		return 0;
 
-	if (abo->mem.pfns)
-		return -EEXIST;
+	mapp = kzalloc(sizeof(*mapp), GFP_KERNEL);
+	if (!mapp)
+		return -ENOMEM;
 
 	nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
-	abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns),
-				 GFP_KERNEL);
-	if (!abo->mem.pfns)
-		return -ENOMEM;
+	mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns),
+					GFP_KERNEL);
+	if (!mapp->range.hmm_pfns) {
+		ret = -ENOMEM;
+		goto free_map;
+	}
 
-	ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier,
+	ret = mmu_interval_notifier_insert_locked(&mapp->notifier,
 						  current->mm,
 						  addr,
 						  len,
 						  &amdxdna_hmm_ops);
 	if (ret) {
 		XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
-		kvfree(abo->mem.pfns);
+		goto free_pfns;
 	}
-	abo->mem.userptr = addr;
 
+	mapp->range.notifier = &mapp->notifier;
+	mapp->range.start = vma->vm_start;
+	mapp->range.end = vma->vm_end;
+	mapp->range.default_flags = HMM_PFN_REQ_FAULT;
+	mapp->vma = vma;
+	mapp->abo = abo;
+	kref_init(&mapp->refcnt);
+
+	if (abo->mem.userptr == AMDXDNA_INVALID_ADDR)
+		abo->mem.userptr = addr;
+	INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work);
+	if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping)
+		mapping_set_unevictable(vma->vm_file->f_mapping);
+
+	down_write(&xdna->notifier_lock);
+	list_add_tail(&mapp->node, &abo->mem.umap_list);
+	up_write(&xdna->notifier_lock);
+
+	return 0;
+
+free_pfns:
+	kvfree(mapp->range.hmm_pfns);
+free_map:
+	kfree(mapp);
 	return ret;
 }
 
+static void amdxdna_gem_dev_obj_free(struct drm_gem_object *gobj)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+	if (abo->pinned)
+		amdxdna_gem_unpin(abo);
+
+	amdxdna_gem_heap_free(abo);
+	drm_gem_object_release(gobj);
+	amdxdna_gem_destroy_obj(abo);
+}
+
+static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo,
+				struct vm_area_struct *vma)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+	unsigned long num_pages = vma_pages(vma);
+	unsigned long offset = 0;
+	int ret;
+
+	if (!is_import_bo(abo)) {
+		ret = drm_gem_shmem_mmap(&abo->base, vma);
+		if (ret) {
+			XDNA_ERR(xdna, "Failed shmem mmap %d", ret);
+			return ret;
+		}
+
+		/* The buffer is based on memory pages. Fix the flag. */
+		vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
+		ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+				      &num_pages);
+		if (ret) {
+			XDNA_ERR(xdna, "Failed insert pages %d", ret);
+			vma->vm_ops->close(vma);
+			return ret;
+		}
+
+		return 0;
+	}
+
+	vma->vm_private_data = NULL;
+	vma->vm_ops = NULL;
+	ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0);
+	if (ret) {
+		XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret);
+		return ret;
+	}
+
+	do {
+		vm_fault_t fault_ret;
+
+		fault_ret = handle_mm_fault(vma, vma->vm_start + offset,
+					    FAULT_FLAG_WRITE, NULL);
+		if (fault_ret & VM_FAULT_ERROR) {
+			vma->vm_ops->close(vma);
+			XDNA_ERR(xdna, "Fault in page failed");
+			return -EFAULT;
+		}
+
+		offset += PAGE_SIZE;
+	} while (--num_pages);
+
+	/* Drop the reference drm_gem_mmap_obj() acquired.*/
+	drm_gem_object_put(to_gobj(abo));
+
+	return 0;
+}
+
 static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
 				struct vm_area_struct *vma)
 {
+	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
 	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
-	unsigned long num_pages;
 	int ret;
 
-	ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size);
+	ret = amdxdna_hmm_register(abo, vma);
 	if (ret)
 		return ret;
 
+	ret = amdxdna_insert_pages(abo, vma);
+	if (ret) {
+		XDNA_ERR(xdna, "Failed insert pages, ret %d", ret);
+		goto hmm_unreg;
+	}
+
+	XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx",
+		 drm_vma_node_offset_addr(&gobj->vma_node), abo->type,
+		 vma->vm_start, gobj->size);
+	return 0;
+
+hmm_unreg:
+	amdxdna_hmm_unregister(abo, vma);
+	return ret;
+}
+
+static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	struct drm_gem_object *gobj = dma_buf->priv;
+	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+	unsigned long num_pages = vma_pages(vma);
+	int ret;
+
+	vma->vm_ops = &drm_gem_shmem_vm_ops;
+	vma->vm_private_data = gobj;
+
+	drm_gem_object_get(gobj);
 	ret = drm_gem_shmem_mmap(&abo->base, vma);
 	if (ret)
-		goto hmm_unreg;
+		goto put_obj;
 
-	num_pages = gobj->size >> PAGE_SHIFT;
-	/* Try to insert the pages */
+	/* The buffer is based on memory pages. Fix the flag. */
 	vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
-	ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages);
+	ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages,
+			      &num_pages);
 	if (ret)
-		XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret);
+		goto close_vma;
 
 	return 0;
 
-hmm_unreg:
-	amdxdna_hmm_unregister(abo);
+close_vma:
+	vma->vm_ops->close(vma);
+put_obj:
+	drm_gem_object_put(gobj);
 	return ret;
 }
 
-static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf)
+static const struct dma_buf_ops amdxdna_dmabuf_ops = {
+	.attach = drm_gem_map_attach,
+	.detach = drm_gem_map_detach,
+	.map_dma_buf = drm_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.mmap = amdxdna_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
 {
-	return drm_gem_shmem_vm_ops.fault(vmf);
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	exp_info.ops = &amdxdna_dmabuf_ops;
+	exp_info.size = gobj->size;
+	exp_info.flags = flags;
+	exp_info.priv = gobj;
+	exp_info.resv = gobj->resv;
+
+	return drm_gem_dmabuf_export(gobj->dev, &exp_info);
 }
 
-static void amdxdna_gem_vm_open(struct vm_area_struct *vma)
+static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
 {
-	drm_gem_shmem_vm_ops.open(vma);
+	dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(abo->dma_buf, abo->attach);
+	dma_buf_put(abo->dma_buf);
+	drm_gem_object_release(to_gobj(abo));
+	kfree(abo);
 }
 
-static void amdxdna_gem_vm_close(struct vm_area_struct *vma)
+static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
 {
-	struct drm_gem_object *gobj = vma->vm_private_data;
+	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
+
+	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+
+	amdxdna_hmm_unregister(abo, NULL);
+	flush_workqueue(xdna->notifier_wq);
+
+	if (abo->pinned)
+		amdxdna_gem_unpin(abo);
 
-	amdxdna_hmm_unregister(to_xdna_obj(gobj));
-	drm_gem_shmem_vm_ops.close(vma);
+	if (abo->type == AMDXDNA_BO_DEV_HEAP)
+		drm_mm_takedown(&abo->mm);
+
+	drm_gem_vunmap(gobj, &map);
+	mutex_destroy(&abo->lock);
+
+	if (is_import_bo(abo)) {
+		amdxdna_imported_obj_free(abo);
+		return;
+	}
+
+	drm_gem_shmem_free(&abo->base);
 }
 
-static const struct vm_operations_struct amdxdna_gem_vm_ops = {
-	.fault = amdxdna_gem_vm_fault,
-	.open = amdxdna_gem_vm_open,
-	.close = amdxdna_gem_vm_close,
+static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
+	.free = amdxdna_gem_dev_obj_free,
 };
 
 static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
@@ -220,7 +454,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
 	.vmap = drm_gem_shmem_object_vmap,
 	.vunmap = drm_gem_shmem_object_vunmap,
 	.mmap = amdxdna_gem_obj_mmap,
-	.vm_ops = &amdxdna_gem_vm_ops,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+	.export = amdxdna_gem_prime_export,
 };
 
 static struct amdxdna_gem_obj *
@@ -239,6 +474,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
 	abo->mem.userptr = AMDXDNA_INVALID_ADDR;
 	abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
 	abo->mem.size = size;
+	INIT_LIST_HEAD(&abo->mem.umap_list);
 
 	return abo;
 }
@@ -258,6 +494,51 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
 	return to_gobj(abo);
 }
 
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attach;
+	struct amdxdna_gem_obj *abo;
+	struct drm_gem_object *gobj;
+	struct sg_table *sgt;
+	int ret;
+
+	get_dma_buf(dma_buf);
+
+	attach = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attach)) {
+		ret = PTR_ERR(attach);
+		goto put_buf;
+	}
+
+	sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto fail_detach;
+	}
+
+	gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+	if (IS_ERR(gobj)) {
+		ret = PTR_ERR(gobj);
+		goto fail_unmap;
+	}
+
+	abo = to_xdna_obj(gobj);
+	abo->attach = attach;
+	abo->dma_buf = dma_buf;
+
+	return gobj;
+
+fail_unmap:
+	dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
+fail_detach:
+	dma_buf_detach(dma_buf, attach);
+put_buf:
+	dma_buf_put(dma_buf);
+
+	return ERR_PTR(ret);
+}
+
 static struct amdxdna_gem_obj *
 amdxdna_drm_alloc_shmem(struct drm_device *dev,
 			struct amdxdna_drm_create_bo *args,
@@ -286,6 +567,7 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
 			    struct drm_file *filp)
 {
 	struct amdxdna_client *client = filp->driver_priv;
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
 	struct amdxdna_dev *xdna = to_xdna_dev(dev);
 	struct drm_gem_shmem_object *shmem;
 	struct amdxdna_gem_obj *abo;
@@ -312,18 +594,26 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
 
 	shmem->map_wc = false;
 	abo = to_xdna_obj(&shmem->base);
-
 	abo->type = AMDXDNA_BO_DEV_HEAP;
 	abo->client = client;
 	abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
 	drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
 
+	ret = drm_gem_vmap(to_gobj(abo), &map);
+	if (ret) {
+		XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret);
+		goto release_obj;
+	}
+	abo->mem.kva = map.vaddr;
+
 	client->dev_heap = abo;
 	drm_gem_object_get(to_gobj(abo));
 	mutex_unlock(&client->mm_lock);
 
 	return abo;
 
+release_obj:
+	drm_gem_object_put(to_gobj(abo));
 mm_unlock:
 	mutex_unlock(&client->mm_lock);
 	return ERR_PTR(ret);
@@ -332,58 +622,32 @@ mm_unlock:
 struct amdxdna_gem_obj *
 amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
 			 struct amdxdna_drm_create_bo *args,
-			 struct drm_file *filp, bool use_vmap)
+			 struct drm_file *filp)
 {
 	struct amdxdna_client *client = filp->driver_priv;
 	struct amdxdna_dev *xdna = to_xdna_dev(dev);
 	size_t aligned_sz = PAGE_ALIGN(args->size);
-	struct amdxdna_gem_obj *abo, *heap;
+	struct amdxdna_gem_obj *abo;
 	int ret;
 
-	mutex_lock(&client->mm_lock);
-	heap = client->dev_heap;
-	if (!heap) {
-		ret = -EINVAL;
-		goto mm_unlock;
-	}
-
-	if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
-		XDNA_ERR(xdna, "Invalid dev heap userptr");
-		ret = -EINVAL;
-		goto mm_unlock;
-	}
-
-	if (args->size > heap->mem.size) {
-		XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx",
-			 args->size, heap->mem.size);
-		ret = -EINVAL;
-		goto mm_unlock;
-	}
-
 	abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
-	if (IS_ERR(abo)) {
-		ret = PTR_ERR(abo);
-		goto mm_unlock;
-	}
+	if (IS_ERR(abo))
+		return abo;
+
 	to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
 	abo->type = AMDXDNA_BO_DEV;
 	abo->client = client;
-	abo->dev_heap = heap;
-	ret = amdxdna_gem_insert_node_locked(abo, use_vmap);
+
+	ret = amdxdna_gem_heap_alloc(abo);
 	if (ret) {
 		XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
-		goto mm_unlock;
+		amdxdna_gem_destroy_obj(abo);
+		return ERR_PTR(ret);
 	}
 
-	drm_gem_object_get(to_gobj(heap));
 	drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
 
-	mutex_unlock(&client->mm_lock);
 	return abo;
-
-mm_unlock:
-	mutex_unlock(&client->mm_lock);
-	return ERR_PTR(ret);
 }
 
 static struct amdxdna_gem_obj *
@@ -391,10 +655,10 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
 			  struct amdxdna_drm_create_bo *args,
 			  struct drm_file *filp)
 {
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
 	struct amdxdna_dev *xdna = to_xdna_dev(dev);
 	struct drm_gem_shmem_object *shmem;
 	struct amdxdna_gem_obj *abo;
-	struct iosys_map map;
 	int ret;
 
 	if (args->size > XDNA_MAX_CMD_BO_SIZE) {
@@ -417,7 +681,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
 	abo->type = AMDXDNA_BO_CMD;
 	abo->client = filp->driver_priv;
 
-	ret = drm_gem_vmap_unlocked(to_gobj(abo), &map);
+	ret = drm_gem_vmap(to_gobj(abo), &map);
 	if (ret) {
 		XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
 		goto release_obj;
@@ -451,7 +715,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
 		abo = amdxdna_drm_create_dev_heap(dev, args, filp);
 		break;
 	case AMDXDNA_BO_DEV:
-		abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false);
+		abo = amdxdna_drm_alloc_dev_bo(dev, args, filp);
 		break;
 	case AMDXDNA_BO_CMD:
 		abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
@@ -483,17 +747,13 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
 	int ret;
 
-	switch (abo->type) {
-	case AMDXDNA_BO_SHMEM:
-	case AMDXDNA_BO_DEV_HEAP:
-		ret = drm_gem_shmem_pin(&abo->base);
-		break;
-	case AMDXDNA_BO_DEV:
-		ret = drm_gem_shmem_pin(&abo->dev_heap->base);
-		break;
-	default:
-		ret = -EOPNOTSUPP;
-	}
+	if (abo->type == AMDXDNA_BO_DEV)
+		abo = abo->client->dev_heap;
+
+	if (is_import_bo(abo))
+		return 0;
+
+	ret = drm_gem_shmem_pin(&abo->base);
 
 	XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
 	return ret;
@@ -503,9 +763,6 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
 {
 	int ret;
 
-	if (abo->type == AMDXDNA_BO_DEV)
-		abo = abo->dev_heap;
-
 	mutex_lock(&abo->lock);
 	ret = amdxdna_gem_pin_nolock(abo);
 	mutex_unlock(&abo->lock);
@@ -516,7 +773,10 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
 void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
 {
 	if (abo->type == AMDXDNA_BO_DEV)
-		abo = abo->dev_heap;
+		abo = abo->client->dev_heap;
+
+	if (is_import_bo(abo))
+		return;
 
 	mutex_lock(&abo->lock);
 	drm_gem_shmem_unpin(&abo->base);
@@ -606,10 +866,14 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
 		goto put_obj;
 	}
 
-	if (abo->type == AMDXDNA_BO_DEV)
-		drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
-	else
+	if (is_import_bo(abo))
+		drm_clflush_sg(abo->base.sgt);
+	else if (abo->mem.kva)
+		drm_clflush_virt_range(abo->mem.kva + args->offset, args->size);
+	else if (abo->base.pages)
 		drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
+	else
+		drm_WARN(&xdna->ddev, 1, "Can not get flush memory");
 
 	amdxdna_gem_unpin(abo);
 
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index 8ccc0375dd9d..ae29db94a9d3 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -6,6 +6,20 @@
 #ifndef _AMDXDNA_GEM_H_
 #define _AMDXDNA_GEM_H_
 
+#include <linux/hmm.h>
+
+struct amdxdna_umap {
+	struct vm_area_struct		*vma;
+	struct mmu_interval_notifier	notifier;
+	struct hmm_range		range;
+	struct work_struct		hmm_unreg_work;
+	struct amdxdna_gem_obj		*abo;
+	struct list_head		node;
+	struct kref			refcnt;
+	bool				invalid;
+	bool				unmapped;
+};
+
 struct amdxdna_mem {
 	u64				userptr;
 	void				*kva;
@@ -13,8 +27,7 @@ struct amdxdna_mem {
 	size_t				size;
 	struct page			**pages;
 	u32				nr_pages;
-	struct mmu_interval_notifier	notifier;
-	unsigned long			*pfns;
+	struct list_head		umap_list;
 	bool				map_invalid;
 };
 
@@ -28,12 +41,14 @@ struct amdxdna_gem_obj {
 
 	/* Below members is uninitialized when needed */
 	struct drm_mm			mm; /* For AMDXDNA_BO_DEV_HEAP */
-	struct amdxdna_gem_obj		*dev_heap; /* For AMDXDNA_BO_DEV */
 	struct drm_mm_node		mm_node; /* For AMDXDNA_BO_DEV */
 	u32				assigned_hwctx;
+	struct dma_buf			*dma_buf;
+	struct dma_buf_attachment	*attach;
 };
 
 #define to_gobj(obj)    (&(obj)->base.base)
+#define is_import_bo(obj) ((obj)->attach)
 
 static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
 {
@@ -47,12 +62,16 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
 	drm_gem_object_put(to_gobj(abo));
 }
 
+void amdxdna_umap_put(struct amdxdna_umap *mapp);
+
 struct drm_gem_object *
 amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
+struct drm_gem_object *
+amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
 struct amdxdna_gem_obj *
 amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
 			 struct amdxdna_drm_create_bo *args,
-			 struct drm_file *filp, bool use_vmap);
+			 struct drm_file *filp);
 
 int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
 int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index f5b8497cf5ad..f2bf1d374cc7 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -226,6 +226,7 @@ const struct drm_driver amdxdna_drm_drv = {
 	.num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
 
 	.gem_create_object = amdxdna_gem_create_object_cb,
+	.gem_prime_import = amdxdna_gem_prime_import,
 };
 
 static const struct amdxdna_dev_info *
@@ -266,12 +267,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		fs_reclaim_release(GFP_KERNEL);
 	}
 
+	xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0);
+	if (!xdna->notifier_wq)
+		return -ENOMEM;
+
 	mutex_lock(&xdna->dev_lock);
 	ret = xdna->dev_info->ops->init(xdna);
 	mutex_unlock(&xdna->dev_lock);
 	if (ret) {
 		XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
-		return ret;
+		goto destroy_notifier_wq;
 	}
 
 	ret = amdxdna_sysfs_init(xdna);
@@ -301,6 +306,8 @@ failed_dev_fini:
 	mutex_lock(&xdna->dev_lock);
 	xdna->dev_info->ops->fini(xdna);
 	mutex_unlock(&xdna->dev_lock);
+destroy_notifier_wq:
+	destroy_workqueue(xdna->notifier_wq);
 	return ret;
 }
 
@@ -310,6 +317,8 @@ static void amdxdna_remove(struct pci_dev *pdev)
 	struct device *dev = &pdev->dev;
 	struct amdxdna_client *client;
 
+	destroy_workqueue(xdna->notifier_wq);
+
 	pm_runtime_get_noresume(dev);
 	pm_runtime_forbid(dev);
 
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 37848a8d8031..ab79600911aa 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,6 +6,7 @@
 #ifndef _AMDXDNA_PCI_DRV_H_
 #define _AMDXDNA_PCI_DRV_H_
 
+#include <linux/workqueue.h>
 #include <linux/xarray.h>
 
 #define XDNA_INFO(xdna, fmt, args...)	drm_info(&(xdna)->ddev, fmt, ##args)
@@ -98,6 +99,7 @@ struct amdxdna_dev {
 	struct list_head		client_list;
 	struct amdxdna_fw_ver		fw_ver;
 	struct rw_semaphore		notifier_lock; /* for mmu notifier*/
+	struct workqueue_struct		*notifier_wq;
 };
 
 /*
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index aa826033b0ce..ca3357acd127 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -20,8 +20,6 @@
 
 DEFINE_XARRAY_ALLOC(accel_minors_xa);
 
-static struct dentry *accel_debugfs_root;
-
 static const struct device_type accel_sysfs_device_minor = {
 	.name = "accel_minor"
 };
@@ -74,17 +72,6 @@ static const struct drm_info_list accel_debugfs_list[] = {
 #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
 
 /**
- * accel_debugfs_init() - Initialize debugfs for device
- * @dev: Pointer to the device instance.
- *
- * This function creates a root directory for the device in debugfs.
- */
-void accel_debugfs_init(struct drm_device *dev)
-{
-	drm_debugfs_dev_init(dev, accel_debugfs_root);
-}
-
-/**
  * accel_debugfs_register() - Register debugfs for device
  * @dev: Pointer to the device instance.
  *
@@ -194,7 +181,6 @@ static const struct file_operations accel_stub_fops = {
 void accel_core_exit(void)
 {
 	unregister_chrdev(ACCEL_MAJOR, "accel");
-	debugfs_remove(accel_debugfs_root);
 	accel_sysfs_destroy();
 	WARN_ON(!xa_empty(&accel_minors_xa));
 }
@@ -209,8 +195,6 @@ int __init accel_core_init(void)
 		goto error;
 	}
 
-	accel_debugfs_root = debugfs_create_dir("accel", NULL);
-
 	ret = register_chrdev(ACCEL_MAJOR, "accel", &accel_stub_fops);
 	if (ret < 0)
 		DRM_ERROR("Cannot register ACCEL major: %d\n", ret);
diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig
index be85336107f9..1919fbb169c7 100644
--- a/drivers/accel/habanalabs/Kconfig
+++ b/drivers/accel/habanalabs/Kconfig
@@ -6,7 +6,7 @@
 config DRM_ACCEL_HABANALABS
 	tristate "HabanaLabs AI accelerators"
 	depends on DRM_ACCEL
-	depends on X86_64
+	depends on X86 && X86_64
 	depends on PCI && HAS_IOMEM
 	select GENERIC_ALLOCATOR
 	select HWMON
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 68eebed3b050..80fa08bf57bd 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1066,28 +1066,11 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (device_id == hdev->pdev->device);
 }
 
-static void stringify_time_of_last_heartbeat(struct hl_device *hdev, char *time_str, size_t size,
-						bool is_pq_hb)
-{
-	time64_t seconds = is_pq_hb ? hdev->heartbeat_debug_info.last_pq_heartbeat_ts
-					: hdev->heartbeat_debug_info.last_eq_heartbeat_ts;
-	struct tm tm;
-
-	if (!seconds)
-		return;
-
-	time64_to_tm(seconds, 0, &tm);
-
-	snprintf(time_str, size, "%ld-%02d-%02d %02d:%02d:%02d (UTC)",
-		tm.tm_year + 1900, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
-}
-
 static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
 {
 	struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info;
 	u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	char pq_time_str[64] = "N/A", eq_time_str[64] = "N/A";
 
 	if (!prop->cpucp_info.eq_health_check_supported)
 		return true;
@@ -1095,17 +1078,15 @@ static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
 	if (!hdev->eq_heartbeat_received) {
 		dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
 
-		stringify_time_of_last_heartbeat(hdev, pq_time_str, sizeof(pq_time_str), true);
-		stringify_time_of_last_heartbeat(hdev, eq_time_str, sizeof(eq_time_str), false);
 		dev_err(hdev->dev,
-			"EQ: {CI %u, HB counter %u, last HB time: %s}, PQ: {PI: %u, CI: %u (%u), last HB time: %s}\n",
+			"EQ: {CI %u, HB counter %u, last HB time: %ptTs}, PQ: {PI: %u, CI: %u (%u), last HB time: %ptTs}\n",
 			hdev->event_queue.ci,
 			heartbeat_debug_info->heartbeat_event_counter,
-			eq_time_str,
+			&hdev->heartbeat_debug_info.last_eq_heartbeat_ts,
 			hdev->kernel_queues[cpu_q_id].pi,
 			atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
 			atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask,
-			pq_time_str);
+			&hdev->heartbeat_debug_info.last_pq_heartbeat_ts);
 
 		hl_eq_dump(hdev, &hdev->event_queue);
 
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8729a0c57d78..dc80ca921d90 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,8 +17,6 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 
-#include <asm/msr.h>
-
 /* make sure there is space for all the signed info */
 static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
 
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 9d58efa2ff38..82f66520ec18 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -446,7 +446,7 @@ static DEVICE_ATTR_RO(parent_device);
 static const struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
 	.size = PAGE_SIZE,
-	.read_new = eeprom_read_handler
+	.read = eeprom_read_handler
 };
 
 static struct attribute *hl_dev_attrs[] = {
@@ -479,7 +479,7 @@ static const struct bin_attribute *const hl_dev_bin_attrs[] = {
 
 static struct attribute_group hl_dev_attr_group = {
 	.attrs = hl_dev_attrs,
-	.bin_attrs_new = hl_dev_bin_attrs,
+	.bin_attrs = hl_dev_bin_attrs,
 };
 
 static struct attribute_group hl_dev_clks_attr_group;
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 0825851656a2..cd24ccd20ba6 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
 		return -EINVAL;
 
 	ret = ivpu_rpm_get(vdev);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent)
 		return -EINVAL;
 
 	ret = ivpu_rpm_get(vdev);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	if (active_percent)
@@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t
 	if (ret < 0)
 		return ret;
 
-	buf[size] = '\0';
+	buf[ret] = '\0';
 	ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
 		     &process_quantum);
 	if (ret != 4)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 4fa73189502e..3d6d52492536 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #include <linux/firmware.h>
@@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 		args->value = vdev->platform;
 		break;
 	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-		args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
+		args->value = ivpu_hw_dpu_max_freq_get(vdev);
 		break;
 	case DRM_IVPU_PARAM_NUM_CONTEXTS:
 		args->value = ivpu_get_context_count(vdev);
@@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev)
 {
 	int ret;
 
+	drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+
 	/* Update boot params located at first 4KB of FW memory */
 	ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
 
@@ -421,9 +424,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev)
 {
 	ivpu_hw_irq_disable(vdev);
 	disable_irq(vdev->irq);
-	cancel_work_sync(&vdev->irq_ipc_work);
-	cancel_work_sync(&vdev->irq_dct_work);
-	cancel_work_sync(&vdev->context_abort_work);
+	flush_work(&vdev->irq_ipc_work);
+	flush_work(&vdev->irq_dct_work);
+	flush_work(&vdev->context_abort_work);
 	ivpu_ipc_disable(vdev);
 	ivpu_mmu_disable(vdev);
 }
@@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
 	vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
 	atomic64_set(&vdev->unique_id_counter, 0);
+	atomic_set(&vdev->job_timeout_counter, 0);
 	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
 	xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
@@ -700,6 +704,7 @@ static struct pci_device_id ivpu_pci_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) },
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 92753effb1c9..62ab1c654e63 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #ifndef __IVPU_DRV_H__
@@ -26,6 +26,7 @@
 #define PCI_DEVICE_ID_ARL	0xad1d
 #define PCI_DEVICE_ID_LNL	0x643e
 #define PCI_DEVICE_ID_PTL_P	0xb03e
+#define PCI_DEVICE_ID_WCL	0xfd3e
 
 #define IVPU_HW_IP_37XX 37
 #define IVPU_HW_IP_40XX 40
@@ -154,6 +155,7 @@ struct ivpu_device {
 	struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
 	struct xarray submitted_jobs_xa;
 	struct ivpu_ipc_consumer job_done_consumer;
+	atomic_t job_timeout_counter;
 
 	atomic64_t unique_id_counter;
 
@@ -164,6 +166,7 @@ struct ivpu_device {
 		int boot;
 		int jsm;
 		int tdr;
+		int inference;
 		int autosuspend;
 		int d0i3_entry_msg;
 		int state_dump_msg;
@@ -206,10 +209,11 @@ extern bool ivpu_force_snoop;
 #define IVPU_TEST_MODE_D0I3_MSG_ENABLE    BIT(5)
 #define IVPU_TEST_MODE_MIP_DISABLE        BIT(6)
 #define IVPU_TEST_MODE_DISABLE_TIMEOUTS   BIT(8)
-#define IVPU_TEST_MODE_TURBO		  BIT(9)
-#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10)
-#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(11)
-#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(12)
+#define IVPU_TEST_MODE_TURBO_ENABLE       BIT(9)
+#define IVPU_TEST_MODE_TURBO_DISABLE      BIT(10)
+#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11)
+#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(12)
+#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(13)
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
@@ -239,6 +243,7 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
 	case PCI_DEVICE_ID_LNL:
 		return IVPU_HW_IP_40XX;
 	case PCI_DEVICE_ID_PTL_P:
+	case PCI_DEVICE_ID_WCL:
 		return IVPU_HW_IP_50XX;
 	default:
 		dump_stack();
@@ -255,6 +260,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
 		return IVPU_HW_BTRS_MTL;
 	case PCI_DEVICE_ID_LNL:
 	case PCI_DEVICE_ID_PTL_P:
+	case PCI_DEVICE_ID_WCL:
 		return IVPU_HW_BTRS_LNL;
 	default:
 		dump_stack();
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 7a1bb92d8c81..9db741695401 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #include <linux/firmware.h>
@@ -55,18 +55,18 @@ static struct {
 	int gen;
 	const char *name;
 } fw_names[] = {
-	{ IVPU_HW_IP_37XX, "vpu_37xx.bin" },
+	{ IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" },
 	{ IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
-	{ IVPU_HW_IP_40XX, "vpu_40xx.bin" },
+	{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" },
 	{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
-	{ IVPU_HW_IP_50XX, "vpu_50xx.bin" },
+	{ IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" },
 	{ IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
 };
 
 /* Production fw_names from the table above */
-MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin");
-MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin");
-MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin");
 
 static int ivpu_fw_request(struct ivpu_device *vdev)
 {
@@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
 	fw->dvfs_mode = 0;
 
 	fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
-	fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
-	fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
 	ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
 
+	if (fw_hdr->preemption_buffer_1_max_size)
+		fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
+	else
+		fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
+
+	if (fw_hdr->preemption_buffer_2_max_size)
+		fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
+	else
+		fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+	ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
+		 fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
+
 	if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
 								 fw_hdr->ro_section_size,
 								 fw_hdr->image_load_address,
@@ -534,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
 		 boot_params->d0i3_entry_vpu_ts);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
 		 boot_params->system_time_us);
-	ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n",
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n",
 		 boot_params->power_profile);
 }
 
@@ -566,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 
 	boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
 	boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
-	boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
 
 	/*
 	 * This param is a debug firmware feature.  It switches default clock
@@ -637,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->d0i3_residency_time_us = 0;
 	boot_params->d0i3_entry_vpu_ts = 0;
 	if (IVPU_WA(disable_d0i2))
-		boot_params->power_profile = 1;
+		boot_params->power_profile |= BIT(1);
 
 	boot_params->system_time_us = ktime_to_us(ktime_get_real());
 	wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 1d0b2bd9d65c..9a3935be1c05 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -39,6 +39,7 @@ struct ivpu_fw_info {
 	u64 read_only_addr;
 	u32 read_only_size;
 	u32 sched_mode;
+	u64 last_heartbeat;
 };
 
 int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 8741c73b92ce..59cfcf3eaded 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -28,9 +28,19 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
 {
 	ivpu_dbg(vdev, BO,
 		 "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
-		 action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+		 action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id,
 		 (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
-		 (bool)bo->base.base.import_attach);
+		 (bool)drm_gem_is_imported(&bo->base.base));
+}
+
+static inline int ivpu_bo_lock(struct ivpu_bo *bo)
+{
+	return dma_resv_lock(bo->base.base.resv, NULL);
+}
+
+static inline void ivpu_bo_unlock(struct ivpu_bo *bo)
+{
+	dma_resv_unlock(bo->base.base.resv);
 }
 
 /*
@@ -43,22 +53,22 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
 int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
 {
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
 	int ret = 0;
 
-	mutex_lock(&bo->lock);
-
 	ivpu_dbg_bo(vdev, bo, "pin");
-	drm_WARN_ON(&vdev->drm, !bo->ctx);
 
-	if (!bo->mmu_mapped) {
-		struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+	sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+	if (IS_ERR(sgt)) {
+		ret = PTR_ERR(sgt);
+		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+		return ret;
+	}
 
-		if (IS_ERR(sgt)) {
-			ret = PTR_ERR(sgt);
-			ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
-			goto unlock;
-		}
+	ivpu_bo_lock(bo);
 
+	if (!bo->mmu_mapped) {
+		drm_WARN_ON(&vdev->drm, !bo->ctx);
 		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
 					       ivpu_bo_is_snooped(bo));
 		if (ret) {
@@ -69,7 +79,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
 	}
 
 unlock:
-	mutex_unlock(&bo->lock);
+	ivpu_bo_unlock(bo);
 
 	return ret;
 }
@@ -84,7 +94,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	if (!drm_dev_enter(&vdev->drm, &idx))
 		return -ENODEV;
 
-	mutex_lock(&bo->lock);
+	ivpu_bo_lock(bo);
 
 	ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
@@ -94,9 +104,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
 	}
 
-	ivpu_dbg_bo(vdev, bo, "alloc");
-
-	mutex_unlock(&bo->lock);
+	ivpu_bo_unlock(bo);
 
 	drm_dev_exit(idx);
 
@@ -107,7 +115,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
 {
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 
-	lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
+	lockdep_assert(dma_resv_held(bo->base.base.resv) || !kref_read(&bo->base.base.refcount));
 
 	if (bo->mmu_mapped) {
 		drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -122,17 +130,15 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
 		bo->ctx = NULL;
 	}
 
-	if (bo->base.base.import_attach)
+	if (drm_gem_is_imported(&bo->base.base))
 		return;
 
-	dma_resv_lock(bo->base.base.resv, NULL);
 	if (bo->base.sgt) {
 		dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
 		sg_free_table(bo->base.sgt);
 		kfree(bo->base.sgt);
 		bo->base.sgt = NULL;
 	}
-	dma_resv_unlock(bo->base.base.resv);
 }
 
 void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
@@ -144,12 +150,12 @@ void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
 
 	mutex_lock(&vdev->bo_list_lock);
 	list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
-		mutex_lock(&bo->lock);
+		ivpu_bo_lock(bo);
 		if (bo->ctx == ctx) {
 			ivpu_dbg_bo(vdev, bo, "unbind");
 			ivpu_bo_unbind_locked(bo);
 		}
-		mutex_unlock(&bo->lock);
+		ivpu_bo_unlock(bo);
 	}
 	mutex_unlock(&vdev->bo_list_lock);
 }
@@ -169,7 +175,6 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
 	bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
 
 	INIT_LIST_HEAD(&bo->bo_list_node);
-	mutex_init(&bo->lock);
 
 	return &bo->base.base;
 }
@@ -215,7 +220,7 @@ fail_detach:
 	return ERR_PTR(ret);
 }
 
-static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
+static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, u32 ctx_id)
 {
 	struct drm_gem_shmem_object *shmem;
 	struct ivpu_bo *bo;
@@ -233,6 +238,7 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
 		return ERR_CAST(shmem);
 
 	bo = to_ivpu_bo(&shmem->base);
+	bo->ctx_id = ctx_id;
 	bo->base.map_wc = flags & DRM_IVPU_BO_WC;
 	bo->flags = flags;
 
@@ -240,6 +246,8 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla
 	list_add_tail(&bo->bo_list_node, &vdev->bo_list);
 	mutex_unlock(&vdev->bo_list_lock);
 
+	ivpu_dbg_bo(vdev, bo, "alloc");
+
 	return bo;
 }
 
@@ -277,12 +285,16 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
 	list_del(&bo->bo_list_node);
 	mutex_unlock(&vdev->bo_list_lock);
 
-	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+	drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) &&
+		    !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+	drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0);
+	drm_WARN_ON(&vdev->drm, bo->base.vaddr);
 
 	ivpu_bo_unbind_locked(bo);
-	mutex_destroy(&bo->lock);
+	drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
+	drm_WARN_ON(&vdev->drm, bo->ctx);
 
-	drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+	drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
 	drm_gem_shmem_free(&bo->base);
 }
 
@@ -314,7 +326,7 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 	if (size == 0)
 		return -EINVAL;
 
-	bo = ivpu_bo_alloc(vdev, size, args->flags);
+	bo = ivpu_bo_alloc(vdev, size, args->flags, file_priv->ctx.id);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
 			 bo, file_priv->ctx.id, args->size, args->flags);
@@ -322,7 +334,10 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 	}
 
 	ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
-	if (!ret)
+	if (ret)
+		ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)",
+			 bo, file_priv->ctx.id, args->size, args->flags);
+	else
 		args->vpu_addr = bo->vpu_addr;
 
 	drm_gem_object_put(&bo->base.base);
@@ -345,7 +360,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
 	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
 
-	bo = ivpu_bo_alloc(vdev, size, flags);
+	bo = ivpu_bo_alloc(vdev, size, flags, IVPU_GLOBAL_CONTEXT_MMU_SSID);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
 			 bo, range->start, size, flags);
@@ -361,9 +376,9 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 		goto err_put;
 
 	if (flags & DRM_IVPU_BO_MAPPABLE) {
-		dma_resv_lock(bo->base.base.resv, NULL);
-		ret = drm_gem_shmem_vmap(&bo->base, &map);
-		dma_resv_unlock(bo->base.base.resv);
+		ivpu_bo_lock(bo);
+		ret = drm_gem_shmem_vmap_locked(&bo->base, &map);
+		ivpu_bo_unlock(bo);
 
 		if (ret)
 			goto err_put;
@@ -386,9 +401,9 @@ void ivpu_bo_free(struct ivpu_bo *bo)
 	struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
 
 	if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
-		dma_resv_lock(bo->base.base.resv, NULL);
-		drm_gem_shmem_vunmap(&bo->base, &map);
-		dma_resv_unlock(bo->base.base.resv);
+		ivpu_bo_lock(bo);
+		drm_gem_shmem_vunmap_locked(&bo->base, &map);
+		ivpu_bo_unlock(bo);
 	}
 
 	drm_gem_object_put(&bo->base.base);
@@ -407,12 +422,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	bo = to_ivpu_bo(obj);
 
-	mutex_lock(&bo->lock);
+	ivpu_bo_lock(bo);
 	args->flags = bo->flags;
 	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
 	args->vpu_addr = bo->vpu_addr;
 	args->size = obj->size;
-	mutex_unlock(&bo->lock);
+	ivpu_bo_unlock(bo);
 
 	drm_gem_object_put(obj);
 	return ret;
@@ -449,10 +464,10 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 {
-	mutex_lock(&bo->lock);
+	ivpu_bo_lock(bo);
 
 	drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
-		   bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
+		   bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size,
 		   bo->flags, kref_read(&bo->base.base.refcount));
 
 	if (bo->base.pages)
@@ -461,12 +476,12 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 	if (bo->mmu_mapped)
 		drm_printf(p, " mmu_mapped");
 
-	if (bo->base.base.import_attach)
+	if (drm_gem_is_imported(&bo->base.base))
 		drm_printf(p, " imported");
 
 	drm_printf(p, "\n");
 
-	mutex_unlock(&bo->lock);
+	ivpu_bo_unlock(bo);
 }
 
 void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a222a9ec9d61..aa8ff14f7aae 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -17,10 +17,10 @@ struct ivpu_bo {
 	struct list_head bo_list_node;
 	struct drm_mm_node mm_node;
 
-	struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
 	u64 vpu_addr;
 	u32 flags;
 	u32 job_status; /* Valid only for command buffer */
+	u32 ctx_id;
 	bool mmu_mapped;
 };
 
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index ec9a3629da3a..08dcc31b56f4 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -94,12 +94,14 @@ static void timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.boot = -1;
 		vdev->timeout.jsm = -1;
 		vdev->timeout.tdr = -1;
+		vdev->timeout.inference = -1;
 		vdev->timeout.autosuspend = -1;
 		vdev->timeout.d0i3_entry_msg = -1;
 	} else if (ivpu_is_fpga(vdev)) {
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 15000;
 		vdev->timeout.tdr = 30000;
+		vdev->timeout.inference = 900000;
 		vdev->timeout.autosuspend = -1;
 		vdev->timeout.d0i3_entry_msg = 500;
 		vdev->timeout.state_dump_msg = 10000;
@@ -107,6 +109,7 @@ static void timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 10000;
+		vdev->timeout.inference = 300000;
 		vdev->timeout.autosuspend = 100;
 		vdev->timeout.d0i3_entry_msg = 100;
 		vdev->timeout.state_dump_msg = 10;
@@ -114,12 +117,13 @@ static void timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
+		vdev->timeout.inference = 60000;
 		if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
 			vdev->timeout.autosuspend = 10;
 		else
 			vdev->timeout.autosuspend = 100;
 		vdev->timeout.d0i3_entry_msg = 5;
-		vdev->timeout.state_dump_msg = 10;
+		vdev->timeout.state_dump_msg = 100;
 	}
 }
 
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 16435f2756d0..d79668fe1609 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #ifndef __IVPU_HW_H__
@@ -82,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
 	return range->end - range->start;
 }
 
-static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
 {
-	return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
+	return ivpu_hw_btrs_dpu_max_freq_get(vdev);
 }
 
-static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev)
 {
-	ivpu_hw_ip_irq_clear(vdev);
+	return ivpu_hw_btrs_dpu_freq_get(vdev);
 }
 
-static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
 {
-	return ivpu_hw_btrs_pll_freq_get(vdev);
+	ivpu_hw_ip_irq_clear(vdev);
 }
 
 static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 56c56012b980..b236c7234daa 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
+#include <linux/units.h>
+
 #include "ivpu_drv.h"
 #include "ivpu_hw.h"
 #include "ivpu_hw_btrs.h"
@@ -28,17 +30,13 @@
 
 #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
 
-#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
-#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
-#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF   WP_CONFIG(0, 0)
 
 #define PLL_CDYN_DEFAULT               0x80
 #define PLL_EPP_DEFAULT                0x80
 #define PLL_CONFIG_DEFAULT             0x0
-#define PLL_SIMULATION_FREQ            10000000
-#define PLL_REF_CLK_FREQ               50000000
+#define PLL_REF_CLK_FREQ               50000000ull
+#define PLL_RATIO_TO_FREQ(x)           ((x) * PLL_REF_CLK_FREQ)
+
 #define PLL_TIMEOUT_US		       (1500 * USEC_PER_MSEC)
 #define IDLE_TIMEOUT_US		       (5 * USEC_PER_MSEC)
 #define TIMEOUT_US                     (150 * USEC_PER_MSEC)
@@ -62,6 +60,8 @@
 #define DCT_ENABLE                     0x1
 #define DCT_DISABLE                    0x0
 
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
+
 int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
 {
 	REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
 
 	hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
 	hw->sku = BTRS_MTL_TILE_SKU_BOTH;
-	hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
+	hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
 
 	return 0;
 }
@@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
 
 	prepare_wp_request(vdev, &wp, enable);
 
-	ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
-		 PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
+	ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
+		 pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
 
 	ret = wp_request_send(vdev, &wp);
 	if (ret) {
@@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
 		return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
 }
 
+static u32 pll_config_get_mtl(struct ivpu_device *vdev)
+{
+	return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
+}
+
+static u32 pll_config_get_lnl(struct ivpu_device *vdev)
+{
+	return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
+}
+
+static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
+{
+	return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
+}
+
+static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
+{
+	return PLL_RATIO_TO_FREQ(ratio) / 2;
+}
+
+static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
+{
+	if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+		return pll_ratio_to_dpu_freq_mtl(ratio);
+	else
+		return pll_ratio_to_dpu_freq_lnl(ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
+{
+	return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
+}
+
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev)
+{
+	if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
+		return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev));
+	else
+		return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev));
+}
+
 /* Handler for IRQs from Buttress core (irqB) */
 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
 {
@@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
 	if (!status)
 		return false;
 
-	if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
-		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
-			 REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
+	if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+		u32 pll = pll_config_get_mtl(vdev);
+
+		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+			 pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
+	}
 
 	if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
 		ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@@ -633,8 +677,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
 		queue_work(system_wq, &vdev->irq_dct_work);
 	}
 
-	if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
-		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
+	if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
+		u32 pll = pll_config_get_lnl(vdev);
+
+		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
+			 pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
+	}
 
 	if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
 		ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@@ -717,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
 	REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
 }
 
-static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
-{
-	u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
-	u32 cpu_clock;
-
-	if ((config & 0xff) == MTL_PLL_RATIO_4_3)
-		cpu_clock = pll_clock * 2 / 4;
-	else
-		cpu_clock = pll_clock * 2 / 5;
-
-	return cpu_clock;
-}
-
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
-{
-	struct ivpu_hw_info *hw = vdev->hw;
-
-	if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
-		return pll_ratio_to_freq_mtl(ratio, hw->config);
-	else
-		return PLL_RATIO_TO_FREQ(ratio);
-}
-
-static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
-{
-	u32 pll_curr_ratio;
-
-	pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
-	pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
-
-	if (!ivpu_is_silicon(vdev))
-		return PLL_SIMULATION_FREQ;
-
-	return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
-}
-
-static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
-{
-	u32 pll_curr_ratio;
-
-	pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
-	pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
-
-	return PLL_RATIO_TO_FREQ(pll_curr_ratio);
-}
-
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
-{
-	if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
-		return pll_freq_get_mtl(vdev);
-	else
-		return pll_freq_get_lnl(vdev);
-}
-
 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
 {
 	if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h
index 1fd71b4d4ab0..d2d82651976d 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.h
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #ifndef __IVPU_HW_BTRS_H__
@@ -13,9 +13,8 @@
 
 #define PLL_PROFILING_FREQ_DEFAULT   38400000
 #define PLL_PROFILING_FREQ_HIGH      400000000
-#define PLL_RATIO_TO_FREQ(x)         ((x) * PLL_REF_CLK_FREQ)
 
-#define DCT_DEFAULT_ACTIVE_PERCENT 15u
+#define DCT_DEFAULT_ACTIVE_PERCENT 30u
 #define DCT_PERIOD_US		   35300u
 
 int ivpu_hw_btrs_info_init(struct ivpu_device *vdev);
@@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
 void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
 void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
 void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
+u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
 bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
 int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
 void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
-u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
-u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
 u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
 u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 823f6a57dc54..2bf9882ab52e 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -683,6 +683,7 @@ static void pwr_island_delay_set(struct ivpu_device *vdev)
 		return;
 
 	switch (ivpu_device_id(vdev)) {
+	case PCI_DEVICE_ID_WCL:
 	case PCI_DEVICE_ID_PTL_P:
 		post = high ? 18 : 0;
 		post1 = 0;
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 0e096fd9b95d..39f83225c181 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
 	struct ivpu_ipc_consumer cons;
 	int ret;
 
-	drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+	drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) &&
+		    pm_runtime_enabled(vdev->drm.dev));
 
 	ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
 
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 004059e4f1e8..060f1fc031d3 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #include <drm/drm_file.h>
@@ -100,6 +100,43 @@ err_free_cmdq:
 	return NULL;
 }
 
+/**
+ * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue.
+ * @cmdq: Pointer to the command queue structure.
+ *
+ * Returns the number of entries that can fit in the command queue memory.
+ */
+static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
+{
+	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
+
+	return size / sizeof(struct vpu_job_queue_entry);
+}
+
+/**
+ * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode.
+ * @vdev: Pointer to the ivpu device structure.
+ * @flags: Input flags to determine the command queue flags.
+ *
+ * Returns the calculated command queue flags, considering both the input flags
+ * and the current test mode settings.
+ */
+static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags)
+{
+	u32 cmdq_flags = 0;
+
+	if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX))
+		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+	/* Test mode can override the TURBO flag coming from the application */
+	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE)
+		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE)
+		cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+	return cmdq_flags;
+}
+
 static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
 {
 	ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
@@ -107,8 +144,7 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
 	kfree(cmdq);
 }
 
-static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority,
-					  bool is_legacy)
+static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags)
 {
 	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_cmdq *cmdq = NULL;
@@ -121,10 +157,6 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
 		ivpu_err(vdev, "Failed to allocate command queue\n");
 		return NULL;
 	}
-
-	cmdq->priority = priority;
-	cmdq->is_legacy = is_legacy;
-
 	ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
 			      &file_priv->cmdq_id_next, GFP_KERNEL);
 	if (ret < 0) {
@@ -132,7 +164,15 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
 		goto err_free_cmdq;
 	}
 
-	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id);
+	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
+	cmdq->priority = priority;
+
+	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
+	cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
+	cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags);
+
+	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n",
+		 cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags);
 	return cmdq;
 
 err_free_cmdq:
@@ -188,27 +228,14 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
 	return ret;
 }
 
-static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
+static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
 {
-	jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
 	jobq->header.head = 0;
 	jobq->header.tail = 0;
 
-	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
-		ivpu_dbg(vdev, JOB, "Turbo mode enabled");
-		jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
-	}
-
 	wmb(); /* Flush WC buffer for jobq->header */
 }
 
-static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
-{
-	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
-
-	return size / sizeof(struct vpu_job_queue_entry);
-}
-
 static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
 {
 	struct ivpu_device *vdev = file_priv->vdev;
@@ -219,10 +246,7 @@ static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
 	if (cmdq->db_id)
 		return 0;
 
-	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
-	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
-
-	ivpu_cmdq_jobq_init(vdev, cmdq->jobq);
+	ivpu_cmdq_jobq_reset(vdev, cmdq->jobq);
 
 	if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
 		ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
@@ -247,6 +271,10 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm
 	if (!cmdq->db_id)
 		return 0;
 
+	ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
+	if (!ret)
+		ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id);
+
 	if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
 		ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
 		if (!ret)
@@ -254,10 +282,6 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm
 				 cmdq->id, file_priv->ctx.id);
 	}
 
-	ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
-	if (!ret)
-		ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id);
-
 	xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
 	cmdq->db_id = 0;
 
@@ -291,9 +315,10 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_pr
 			break;
 
 	if (!cmdq) {
-		cmdq = ivpu_cmdq_create(file_priv, priority, true);
+		cmdq = ivpu_cmdq_create(file_priv, priority, 0);
 		if (!cmdq)
 			return NULL;
+		cmdq->is_legacy = true;
 	}
 
 	return cmdq;
@@ -470,8 +495,8 @@ static void ivpu_job_destroy(struct ivpu_job *job)
 	struct ivpu_device *vdev = job->vdev;
 	u32 i;
 
-	ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
-		 job->job_id, job->file_priv->ctx.id, job->engine_idx);
+	ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d",
+		 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx);
 
 	for (i = 0; i < job->bo_count; i++)
 		if (job->bos[i])
@@ -564,8 +589,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
 	dma_fence_signal(job->done_fence);
 
 	trace_job("done", job);
-	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
-		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
+		 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
 
 	ivpu_job_destroy(job);
 	ivpu_stop_job_timeout_detection(vdev);
@@ -664,8 +689,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
 	}
 
 	trace_job("submit", job);
-	ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n",
-		 job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority,
+	ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n",
+		 job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority,
 		 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
 
 	mutex_unlock(&file_priv->lock);
@@ -681,8 +706,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
 err_erase_xa:
 	xa_erase(&vdev->submitted_jobs_xa, job->job_id);
 err_unlock:
-	mutex_unlock(&vdev->submitted_jobs_lock);
 	mutex_unlock(&file_priv->lock);
+	mutex_unlock(&vdev->submitted_jobs_lock);
 	ivpu_rpm_put(vdev);
 	return ret;
 }
@@ -777,7 +802,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv,
 		goto err_free_handles;
 	}
 
-	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count);
+	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n",
+		 file_priv->ctx.id, cmdq_id, buffer_count);
 
 	job = ivpu_job_create(file_priv, engine, buffer_count);
 	if (!job) {
@@ -873,23 +899,31 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *
 int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
 	struct drm_ivpu_cmdq_create *args = data;
 	struct ivpu_cmdq *cmdq;
+	int ret;
 
-	if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
+	if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
 		return -ENODEV;
 
 	if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->lock);
 
-	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
+	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags);
 	if (cmdq)
 		args->cmdq_id = cmdq->id;
 
 	mutex_unlock(&file_priv->lock);
 
+	ivpu_rpm_put(vdev);
+
 	return cmdq ? 0 : -ENOMEM;
 }
 
@@ -899,28 +933,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file
 	struct ivpu_device *vdev = file_priv->vdev;
 	struct drm_ivpu_cmdq_destroy *args = data;
 	struct ivpu_cmdq *cmdq;
-	u32 cmdq_id;
+	u32 cmdq_id = 0;
 	int ret;
 
 	if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ))
 		return -ENODEV;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->lock);
 
 	cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id);
 	if (!cmdq || cmdq->is_legacy) {
 		ret = -ENOENT;
-		goto err_unlock;
+	} else {
+		cmdq_id = cmdq->id;
+		ivpu_cmdq_destroy(file_priv, cmdq);
+		ret = 0;
 	}
 
-	cmdq_id = cmdq->id;
-	ivpu_cmdq_destroy(file_priv, cmdq);
 	mutex_unlock(&file_priv->lock);
-	ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
-	return 0;
 
-err_unlock:
-	mutex_unlock(&file_priv->lock);
+	/* Abort any pending jobs only if cmdq was destroyed */
+	if (!ret)
+		ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
+
+	ivpu_rpm_put(vdev);
+
 	return ret;
 }
 
@@ -970,7 +1011,8 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
 		return;
 
 	if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
-		ivpu_jsm_reset_engine(vdev, 0);
+		if (ivpu_jsm_reset_engine(vdev, 0))
+			return;
 
 	mutex_lock(&vdev->context_list_lock);
 	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
@@ -993,7 +1035,8 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
 	if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
 		goto runtime_put;
 
-	ivpu_jsm_hws_resume_engine(vdev, 0);
+	if (ivpu_jsm_hws_resume_engine(vdev, 0))
+		return;
 	/*
 	 * In hardware scheduling mode NPU already has stopped processing jobs
 	 * and won't send us any further notifications, thus we have to free job related resources
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 219ab8afefab..0256b2dfefc1 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -7,6 +7,7 @@
 #include "ivpu_hw.h"
 #include "ivpu_ipc.h"
 #include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
 #include "vpu_jsm_api.h"
 
 const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
@@ -163,8 +164,10 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
 
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
-	if (ret)
+	if (ret) {
 		ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+		ivpu_pm_trigger_recovery(vdev, "Engine reset failed");
+	}
 
 	return ret;
 }
@@ -354,8 +357,10 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine)
 
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
-	if (ret)
+	if (ret) {
 		ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret);
+		ivpu_pm_trigger_recovery(vdev, "Engine resume failed");
+	}
 
 	return ret;
 }
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index ffe7b10f8a76..2a043baf10ca 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -4,6 +4,7 @@
  */
 
 #include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_gem.h"
@@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 	    args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
@@ -96,6 +101,8 @@ err_free_ms:
 	kfree(ms);
 unlock:
 	mutex_unlock(&file_priv->ms_lock);
+
+	ivpu_rpm_put(vdev);
 	return ret;
 }
 
@@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
 	if (!args->metric_group_mask)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
 unlock:
 	mutex_unlock(&file_priv->ms_lock);
 
+	ivpu_rpm_put(vdev);
 	return ret;
 }
 
@@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
 	struct drm_ivpu_metric_streamer_stop *args = data;
+	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_ms_instance *ms;
+	int ret;
 
 	if (!args->metric_group_mask)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	mutex_unlock(&file_priv->ms_lock);
 
+	ivpu_rpm_put(vdev);
 	return ms ? 0 : -EINVAL;
 }
 
@@ -281,6 +300,9 @@ unlock:
 void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
 {
 	struct ivpu_ms_instance *ms, *tmp;
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	pm_runtime_get_sync(vdev->drm.dev);
 
 	mutex_lock(&file_priv->ms_lock);
 
@@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
 		free_instance(file_priv, ms);
 
 	mutex_unlock(&file_priv->ms_lock);
+
+	pm_runtime_put_autosuspend(vdev->drm.dev);
 }
 
 void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index b5891e91f7ab..eacda1dbe840 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -33,6 +33,10 @@ static unsigned long ivpu_tdr_timeout_ms;
 module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
 MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
 
+static unsigned long ivpu_inference_timeout_ms;
+module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default");
+
 #define PM_RESCHEDULE_LIMIT     5
 
 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -44,6 +48,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
 	ivpu_fw_log_reset(vdev);
 	ivpu_fw_load(vdev);
 	fw->entry_point = fw->cold_boot_entry_point;
+	fw->last_heartbeat = 0;
 }
 
 static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
@@ -189,7 +194,30 @@ static void ivpu_job_timeout_work(struct work_struct *work)
 {
 	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
 	struct ivpu_device *vdev = pm->vdev;
+	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+	unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms :
+					     vdev->timeout.inference;
+	u64 inference_max_retries;
+	u64 heartbeat;
+
+	if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
+		ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
+		goto recovery;
+	}
+
+	inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms);
+	if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) {
+		ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n",
+			 inference_max_retries);
+		goto recovery;
+	}
 
+	vdev->fw->last_heartbeat = heartbeat;
+	ivpu_start_job_timeout_detection(vdev);
+	return;
+
+recovery:
+	atomic_set(&vdev->job_timeout_counter, 0);
 	ivpu_pm_trigger_recovery(vdev, "TDR");
 }
 
@@ -204,6 +232,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
 void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
 {
 	cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+	atomic_set(&vdev->job_timeout_counter, 0);
 }
 
 int ivpu_pm_suspend_cb(struct device *dev)
@@ -428,16 +457,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
 	active_us = (DCT_PERIOD_US * active_percent) / 100;
 	inactive_us = DCT_PERIOD_US - active_us;
 
+	vdev->pm->dct_active_percent = active_percent;
+
+	ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
+		 active_percent, active_us, inactive_us);
+
 	ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
 	if (ret) {
 		ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
 		return ret;
 	}
 
-	vdev->pm->dct_active_percent = active_percent;
-
-	ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n",
-		 active_percent, active_us, inactive_us);
 	return 0;
 }
 
@@ -445,15 +475,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev)
 {
 	int ret;
 
+	vdev->pm->dct_active_percent = 0;
+
+	ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
+
 	ret = ivpu_jsm_dct_disable(vdev);
 	if (ret) {
 		ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
 		return ret;
 	}
 
-	vdev->pm->dct_active_percent = 0;
-
-	ivpu_dbg(vdev, PM, "DCT disabled\n");
 	return 0;
 }
 
@@ -466,7 +497,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
 	if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
 		return;
 
-	if (vdev->pm->dct_active_percent)
+	if (enable)
 		ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
 	else
 		ret = ivpu_pm_dct_disable(vdev);
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 97102feaf8dd..268ab7744a8b 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2024 Intel Corporation
+ * Copyright (C) 2024-2025 Intel Corporation
  */
 
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/units.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_gem.h"
@@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 
 static DEVICE_ATTR_RO(sched_mode);
 
+/**
+ * DOC: npu_max_frequency
+ *
+ * The npu_max_frequency shows maximum frequency in MHz of the NPU's data
+ * processing unit
+ */
+static ssize_t
+npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	u32 freq = ivpu_hw_dpu_max_freq_get(vdev);
+
+	return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_max_frequency_mhz);
+
+/**
+ * DOC: npu_current_frequency_mhz
+ *
+ * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's
+ * data processing unit
+ */
+static ssize_t
+npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	u32 freq = 0;
+
+	/* Read frequency only if device is active, otherwise frequency is 0 */
+	if (pm_runtime_get_if_active(vdev->drm.dev) > 0) {
+		freq = ivpu_hw_dpu_freq_get(vdev);
+
+		pm_runtime_put_autosuspend(vdev->drm.dev);
+	}
+
+	return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
+}
+
+static DEVICE_ATTR_RO(npu_current_frequency_mhz);
+
 static struct attribute *ivpu_dev_attrs[] = {
 	&dev_attr_npu_busy_time_us.attr,
 	&dev_attr_npu_memory_utilization.attr,
 	&dev_attr_sched_mode.attr,
+	&dev_attr_npu_max_frequency_mhz.attr,
+	&dev_attr_npu_current_frequency_mhz.attr,
 	NULL,
 };
 
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 908e68ea1c39..218468bbbcad 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -26,7 +26,7 @@
  * Minor version changes when API backward compatibility is preserved.
  * Resets to 0 if Major version is incremented.
  */
-#define VPU_BOOT_API_VER_MINOR 26
+#define VPU_BOOT_API_VER_MINOR 28
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -76,8 +76,15 @@ struct vpu_firmware_header {
 	 * submission queue size and device capabilities.
 	 */
 	u32 preemption_buffer_2_size;
+	/*
+	 * Maximum preemption buffer size that the FW can use: no need for the host
+	 * driver to allocate more space than that specified by these fields.
+	 * A value of 0 means no declared limit.
+	 */
+	u32 preemption_buffer_1_max_size;
+	u32 preemption_buffer_2_max_size;
 	/* Space reserved for future preemption-related fields. */
-	u32 preemption_reserved[6];
+	u32 preemption_reserved[4];
 	/* FW image read only section start address, 4KB aligned */
 	u64 ro_section_start_address;
 	/* FW image read only section size, 4KB aligned */
@@ -134,7 +141,7 @@ enum vpu_trace_destination {
 /*
  * Processor bit shifts (for loggable HW components).
  */
-#define VPU_TRACE_PROC_BIT_ARM	     0
+#define VPU_TRACE_PROC_BIT_RESERVED  0
 #define VPU_TRACE_PROC_BIT_LRT	     1
 #define VPU_TRACE_PROC_BIT_LNN	     2
 #define VPU_TRACE_PROC_BIT_SHV_0     3
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 7215c144158c..4b6b2b3d2583 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,7 +22,7 @@
 /*
  * Minor version changes when API backward compatibility is preserved.
  */
-#define VPU_JSM_API_VER_MINOR 25
+#define VPU_JSM_API_VER_MINOR 29
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -53,8 +53,7 @@
  * Engine indexes.
  */
 #define VPU_ENGINE_COMPUTE 0
-#define VPU_ENGINE_COPY	   1
-#define VPU_ENGINE_NB	   2
+#define VPU_ENGINE_NB	   1
 
 /*
  * VPU status values.
@@ -126,11 +125,13 @@ enum {
 	 * When set, indicates that job queue uses native fences (as inline commands
 	 * in job queue). Such queues may also use legacy fences (as commands in batch buffers).
 	 * When cleared, indicates the job queue only uses legacy fences.
-	 * NOTE: For queues using native fences, VPU expects that all jobs in the queue
-	 * are immediately followed by an inline command object. This object is expected
-	 * to be a fence signal command in most cases, but can also be a NOP in case the host
-	 * does not need per-job fence signalling. Other inline commands objects can be
-	 * inserted between "job and inline command" pairs.
+	 * NOTES:
+	 *   1. For queues using native fences, VPU expects that all jobs in the queue
+	 *      are immediately followed by an inline command object. This object is expected
+	 *      to be a fence signal command in most cases, but can also be a NOP in case the host
+	 *      does not need per-job fence signalling. Other inline commands objects can be
+	 *      inserted between "job and inline command" pairs.
+	 *  2. Native fence queues are only supported on VPU 40xx onwards.
 	 */
 	VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
 
@@ -275,6 +276,8 @@ struct vpu_inline_cmd {
 			u64 value;
 			/* User VA of the log buffer in which to add log entry on completion. */
 			u64 log_buffer_va;
+			/* NPU private data. */
+			u64 npu_private_data;
 		} fence;
 		/* Other commands do not have a payload. */
 		/* Payload definition for future inline commands can be inserted here. */
@@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update {
 	/** Metric group mask that identifies metric streamer instance. */
 	u64 metric_group_mask;
 	/**
-	 * Address and size of the buffer where the VPU will write metric data. If
-	 * the buffer address is 0 or same as the currently used buffer the VPU will
-	 * continue writing metric data to the current buffer. In this case the
-	 * buffer size is ignored and the size of the current buffer is unchanged.
-	 * If the address is non-zero and differs from the current buffer address the
-	 * VPU will immediately switch data collection to the new buffer.
+	 * Address and size of the buffer where the VPU will write metric data.
+	 * This member dictates how the update operation should perform:
+	 * 1. client needs information about the number of collected samples and the
+	 *   amount of data written to the current buffer
+	 * 2. client wants to switch to a new buffer
+	 *
+	 * Case 1. is identified by the buffer address being 0 or the same as the
+	 * currently used buffer address. In this case the buffer size is ignored and
+	 * the size of the current buffer is unchanged. The VPU will return an update
+	 * in the vpu_jsm_metric_streamer_done structure. The internal writing position
+	 * into the buffer is not changed.
+	 *
+	 * Case 2. is identified by the address being non-zero and differs from the
+	 * current buffer address. The VPU will immediately switch data collection to
+	 * the new buffer. Then the VPU will return an update in the
+	 * vpu_jsm_metric_streamer_done structure.
 	 */
 	u64 buffer_addr;
 	u64 buffer_size;
@@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	/*
 	 * Default quantum in 100ns units for scheduling across processes
 	 * within a priority band
+	 * Minimum value supported by NPU is 1ms (10000 in 100ns units).
 	 */
 	u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
 	/*
@@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	 * in situations when it's starved by the focus band.
 	 */
 	u32 normal_band_percentage;
-	/* Reserved */
-	u32 reserved_0;
+	/*
+	 * TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
+	 */
+	u32 tdr_timeout;
 };
 
 /*
@@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
 	s32 in_process_priority;
 	/* Zero padding / Reserved */
 	u32 reserved_1;
-	/* Context quantum relative to other contexts of same priority in the same process */
+	/*
+	 * Context quantum relative to other contexts of same priority in the same process
+	 * Minimum value supported by NPU is 1ms (10000 in 100ns units).
+	 */
 	u64 context_quantum;
 	/* Grace period when preempting context of the same priority within the same process */
 	u64 grace_period_same_priority;
diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
index a9f866230058..5e405a19c157 100644
--- a/drivers/accel/qaic/Kconfig
+++ b/drivers/accel/qaic/Kconfig
@@ -8,7 +8,6 @@ config DRM_ACCEL_QAIC
 	depends on DRM_ACCEL
 	depends on PCI && HAS_IOMEM
 	depends on MHI_BUS
-	depends on MMU
 	select CRC32
 	help
 	  Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 35e883515629..1106b876f737 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -10,6 +10,7 @@ qaic-y := \
 	qaic_control.o \
 	qaic_data.o \
 	qaic_drv.o \
+	qaic_ras.o \
 	qaic_timesync.o \
 	sahara.o
 
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 0dbb8e32e4b9..c31081e42cee 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -167,6 +167,14 @@ struct qaic_device {
 	struct workqueue_struct *bootlog_wq;
 	/* Synchronizes access of pages in MHI bootlog device */
 	struct mutex            bootlog_mutex;
+	/* MHI RAS channel device */
+	struct mhi_device	*ras_ch;
+	/* Correctable error count */
+	unsigned int		ce_count;
+	/* Un-correctable error count */
+	unsigned int		ue_count;
+	/* Un-correctable non-fatal error count */
+	unsigned int		ue_nf_count;
 };
 
 struct qaic_drm_device {
@@ -213,8 +221,6 @@ struct qaic_bo {
 	bool			sliced;
 	/* Request ID of this BO if it is queued for execution */
 	u16			req_id;
-	/* Handle assigned to this BO */
-	u32			handle;
 	/* Wait on this for completion of DMA transfer of this BO */
 	struct completion	xfer_done;
 	/*
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 43aba57b48f0..797289e9d780 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc
 	struct scatterlist *sg;
 	int ret = 0;
 
-	if (obj->import_attach)
+	if (drm_gem_is_imported(obj))
 		return -EINVAL;
 
 	for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
@@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
 {
 	struct qaic_bo *bo = to_qaic_bo(obj);
 
-	if (obj->import_attach) {
+	if (drm_gem_is_imported(obj)) {
 		/* DMABUF/PRIME Path */
 		drm_prime_gem_destroy(obj, NULL);
 	} else {
@@ -731,7 +731,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 	if (ret)
 		goto free_bo;
 
-	bo->handle = args->handle;
 	drm_gem_object_put(obj);
 	srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
 	srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
@@ -870,7 +869,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 {
 	int ret;
 
-	if (bo->base.import_attach)
+	if (drm_gem_is_imported(&bo->base))
 		ret = qaic_prepare_import_bo(bo, hdr);
 	else
 		ret = qaic_prepare_export_bo(qdev, bo, hdr);
@@ -894,7 +893,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b
 
 static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
 {
-	if (bo->base.import_attach)
+	if (drm_gem_is_imported(&bo->base))
 		qaic_unprepare_import_bo(bo);
 	else
 		qaic_unprepare_export_bo(qdev, bo);
diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c
index ba0cf2f94732..a991b8198dc4 100644
--- a/drivers/accel/qaic/qaic_debugfs.c
+++ b/drivers/accel/qaic/qaic_debugfs.c
@@ -240,7 +240,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
 mhi_unprepare:
 	mhi_unprepare_from_transfer(mhi_dev);
 destroy_workqueue:
-	flush_workqueue(qdev->bootlog_wq);
 	destroy_workqueue(qdev->bootlog_wq);
 out:
 	return ret;
@@ -253,7 +252,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev)
 	qdev = dev_get_drvdata(&mhi_dev->dev);
 
 	mhi_unprepare_from_transfer(qdev->bootlog_ch);
-	flush_workqueue(qdev->bootlog_wq);
 	destroy_workqueue(qdev->bootlog_wq);
 	qdev->bootlog_ch = NULL;
 }
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 3b415e2c9431..e31bcb0ecfc9 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -29,6 +29,7 @@
 #include "mhi_controller.h"
 #include "qaic.h"
 #include "qaic_debugfs.h"
+#include "qaic_ras.h"
 #include "qaic_timesync.h"
 #include "sahara.h"
 
@@ -695,6 +696,10 @@ static int __init qaic_init(void)
 	if (ret)
 		pr_debug("qaic: qaic_bootlog_register failed %d\n", ret);
 
+	ret = qaic_ras_register();
+	if (ret)
+		pr_debug("qaic: qaic_ras_register failed %d\n", ret);
+
 	return 0;
 
 free_mhi:
@@ -722,6 +727,7 @@ static void __exit qaic_exit(void)
 	 * reinitializing the link_up state after the cleanup is done.
 	 */
 	link_up = true;
+	qaic_ras_unregister();
 	qaic_bootlog_unregister();
 	qaic_timesync_deinit();
 	sahara_unregister();
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
new file mode 100644
index 000000000000..914ffc4a9970
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
+
+#include <asm/byteorder.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+
+#include "qaic.h"
+#include "qaic_ras.h"
+
+#define MAGIC		0x55AA
+#define VERSION		0x2
+#define HDR_SZ		12
+#define NUM_TEMP_LVL	3
+#define POWER_BREAK	BIT(0)
+
+enum msg_type {
+	MSG_PUSH, /* async push from device */
+	MSG_REQ,  /* sync request to device */
+	MSG_RESP, /* sync response from device */
+};
+
+enum err_type {
+	CE,	/* correctable error */
+	UE,	/* uncorrectable error */
+	UE_NF,	/* uncorrectable error that is non-fatal, expect a disruption */
+	ERR_TYPE_MAX,
+};
+
+static const char * const err_type_str[] = {
+	[CE]    = "Correctable",
+	[UE]    = "Uncorrectable",
+	[UE_NF] = "Uncorrectable Non-Fatal",
+};
+
+static const char * const err_class_str[] = {
+	[CE]    = "Warning",
+	[UE]    = "Fatal",
+	[UE_NF] = "Warning",
+};
+
+enum err_source {
+	SOC_MEM,
+	PCIE,
+	DDR,
+	SYS_BUS1,
+	SYS_BUS2,
+	NSP_MEM,
+	TSENS,
+};
+
+static const char * const err_src_str[TSENS + 1] = {
+	[SOC_MEM]	= "SoC Memory",
+	[PCIE]		= "PCIE",
+	[DDR]		= "DDR",
+	[SYS_BUS1]	= "System Bus source 1",
+	[SYS_BUS2]	= "System Bus source 2",
+	[NSP_MEM]	= "NSP Memory",
+	[TSENS]		= "Temperature Sensors",
+};
+
+struct ras_data {
+	/* header start */
+	/* Magic number to validate the message */
+	u16 magic;
+	/* RAS version number */
+	u16 ver;
+	u32 seq_num;
+	/* RAS message type */
+	u8  type;
+	u8  id;
+	/* Size of RAS message without the header in byte */
+	u16 len;
+	/* header end */
+	s32 result;
+	/*
+	 * Error source
+	 * 0 : SoC Memory
+	 * 1 : PCIE
+	 * 2 : DDR
+	 * 3 : System Bus source 1
+	 * 4 : System Bus source 2
+	 * 5 : NSP Memory
+	 * 6 : Temperature Sensors
+	 */
+	u32 source;
+	/*
+	 * Stores the error type, there are three types of error in RAS
+	 * 0 : correctable error (CE)
+	 * 1 : uncorrectable error (UE)
+	 * 2 : uncorrectable error that is non-fatal (UE_NF)
+	 */
+	u32 err_type;
+	u32 err_threshold;
+	u32 ce_count;
+	u32 ue_count;
+	u32 intr_num;
+	/* Data specific to error source */
+	u8  syndrome[64];
+} __packed;
+
+struct soc_mem_syndrome {
+	u64 error_address[8];
+} __packed;
+
+struct nsp_mem_syndrome {
+	u32 error_address[8];
+	u8 nsp_id;
+} __packed;
+
+struct ddr_syndrome {
+	u32 count;
+	u32 irq_status;
+	u32 data_31_0[2];
+	u32 data_63_32[2];
+	u32 data_95_64[2];
+	u32 data_127_96[2];
+	u32 addr_lsb;
+	u16 addr_msb;
+	u16 parity_bits;
+	u16 instance;
+	u16 err_type;
+} __packed;
+
+struct tsens_syndrome {
+	u32 threshold_type;
+	s32 temp;
+} __packed;
+
+struct sysbus1_syndrome {
+	u32 slave;
+	u32 err_type;
+	u16 addr[8];
+	u8  instance;
+} __packed;
+
+struct sysbus2_syndrome {
+	u32 lsb3;
+	u32 msb3;
+	u32 lsb2;
+	u32 msb2;
+	u32 ext_id;
+	u16 path;
+	u16 op_type;
+	u16 len;
+	u16 redirect;
+	u8  valid;
+	u8  word_error;
+	u8  non_secure;
+	u8  opc;
+	u8  error_code;
+	u8  trans_type;
+	u8  addr_space;
+	u8  instance;
+} __packed;
+
+struct pcie_syndrome {
+	/* CE info */
+	u32 bad_tlp;
+	u32 bad_dllp;
+	u32 replay_rollover;
+	u32 replay_timeout;
+	u32 rx_err;
+	u32 internal_ce_count;
+	/* UE_NF info */
+	u32 fc_timeout;
+	u32 poison_tlp;
+	u32 ecrc_err;
+	u32 unsupported_req;
+	u32 completer_abort;
+	u32 completion_timeout;
+	/* UE info */
+	u32 addr;
+	u8  index;
+	/*
+	 * Flag to indicate specific event of PCIe
+	 * BIT(0): Power break (low power)
+	 * BIT(1) to BIT(7): Reserved
+	 */
+	u8 flag;
+} __packed;
+
+static const char * const threshold_type_str[NUM_TEMP_LVL] = {
+	[0] = "lower",
+	[1] = "upper",
+	[2] = "critical",
+};
+
+static void ras_msg_to_cpu(struct ras_data *msg)
+{
+	struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+	struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+	struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+	struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+	struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+	struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+	struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+	int i;
+
+	le16_to_cpus(&msg->magic);
+	le16_to_cpus(&msg->ver);
+	le32_to_cpus(&msg->seq_num);
+	le16_to_cpus(&msg->len);
+	le32_to_cpus(&msg->result);
+	le32_to_cpus(&msg->source);
+	le32_to_cpus(&msg->err_type);
+	le32_to_cpus(&msg->err_threshold);
+	le32_to_cpus(&msg->ce_count);
+	le32_to_cpus(&msg->ue_count);
+	le32_to_cpus(&msg->intr_num);
+
+	switch (msg->source) {
+	case SOC_MEM:
+		for (i = 0; i < 8; i++)
+			le64_to_cpus(&soc_syndrome->error_address[i]);
+		break;
+	case PCIE:
+		le32_to_cpus(&pcie_syndrome->bad_tlp);
+		le32_to_cpus(&pcie_syndrome->bad_dllp);
+		le32_to_cpus(&pcie_syndrome->replay_rollover);
+		le32_to_cpus(&pcie_syndrome->replay_timeout);
+		le32_to_cpus(&pcie_syndrome->rx_err);
+		le32_to_cpus(&pcie_syndrome->internal_ce_count);
+		le32_to_cpus(&pcie_syndrome->fc_timeout);
+		le32_to_cpus(&pcie_syndrome->poison_tlp);
+		le32_to_cpus(&pcie_syndrome->ecrc_err);
+		le32_to_cpus(&pcie_syndrome->unsupported_req);
+		le32_to_cpus(&pcie_syndrome->completer_abort);
+		le32_to_cpus(&pcie_syndrome->completion_timeout);
+		le32_to_cpus(&pcie_syndrome->addr);
+		break;
+	case DDR:
+		le16_to_cpus(&ddr_syndrome->instance);
+		le16_to_cpus(&ddr_syndrome->err_type);
+		le32_to_cpus(&ddr_syndrome->count);
+		le32_to_cpus(&ddr_syndrome->irq_status);
+		le32_to_cpus(&ddr_syndrome->data_31_0[0]);
+		le32_to_cpus(&ddr_syndrome->data_31_0[1]);
+		le32_to_cpus(&ddr_syndrome->data_63_32[0]);
+		le32_to_cpus(&ddr_syndrome->data_63_32[1]);
+		le32_to_cpus(&ddr_syndrome->data_95_64[0]);
+		le32_to_cpus(&ddr_syndrome->data_95_64[1]);
+		le32_to_cpus(&ddr_syndrome->data_127_96[0]);
+		le32_to_cpus(&ddr_syndrome->data_127_96[1]);
+		le16_to_cpus(&ddr_syndrome->parity_bits);
+		le16_to_cpus(&ddr_syndrome->addr_msb);
+		le32_to_cpus(&ddr_syndrome->addr_lsb);
+		break;
+	case SYS_BUS1:
+		le32_to_cpus(&sysbus1_syndrome->slave);
+		le32_to_cpus(&sysbus1_syndrome->err_type);
+		for (i = 0; i < 8; i++)
+			le16_to_cpus(&sysbus1_syndrome->addr[i]);
+		break;
+	case SYS_BUS2:
+		le16_to_cpus(&sysbus2_syndrome->op_type);
+		le16_to_cpus(&sysbus2_syndrome->len);
+		le16_to_cpus(&sysbus2_syndrome->redirect);
+		le16_to_cpus(&sysbus2_syndrome->path);
+		le32_to_cpus(&sysbus2_syndrome->ext_id);
+		le32_to_cpus(&sysbus2_syndrome->lsb2);
+		le32_to_cpus(&sysbus2_syndrome->msb2);
+		le32_to_cpus(&sysbus2_syndrome->lsb3);
+		le32_to_cpus(&sysbus2_syndrome->msb3);
+		break;
+	case NSP_MEM:
+		for (i = 0; i < 8; i++)
+			le32_to_cpus(&nsp_syndrome->error_address[i]);
+		break;
+	case TSENS:
+		le32_to_cpus(&tsens_syndrome->threshold_type);
+		le32_to_cpus(&tsens_syndrome->temp);
+		break;
+	}
+}
+
+static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg)
+{
+	struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+	struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+	struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+	struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+	struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+	struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+	struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+	char *class;
+	char *level;
+
+	if (msg->magic != MAGIC) {
+		pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic);
+		return;
+	}
+
+	if (!msg->ver || msg->ver > VERSION) {
+		pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver);
+		return;
+	}
+
+	if (msg->type != MSG_PUSH) {
+		pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n");
+		return;
+	}
+
+	if (msg->len != sizeof(*msg) - HDR_SZ) {
+		pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len);
+		return;
+	}
+
+	if (msg->err_type >= ERR_TYPE_MAX) {
+		pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type);
+		return;
+	}
+
+	if (msg->err_type == UE)
+		level = KERN_ERR;
+	else
+		level = KERN_WARNING;
+
+	switch (msg->source) {
+	case SOC_MEM:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   soc_syndrome->error_address[0],
+			   soc_syndrome->error_address[1],
+			   soc_syndrome->error_address[2],
+			   soc_syndrome->error_address[3],
+			   soc_syndrome->error_address[4],
+			   soc_syndrome->error_address[5],
+			   soc_syndrome->error_address[6],
+			   soc_syndrome->error_address[7]);
+		break;
+	case PCIE:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold);
+
+		switch (msg->err_type) {
+		case CE:
+			/*
+			 * Modeled after AER prints. This continues the dev_printk() from a few
+			 * lines up. We reduce duplication of code, but also avoid re-printing the
+			 * PCI device info so that the end result looks uniform to the log user.
+			 */
+			printk(KERN_WARNING pr_fmt("Syndrome:\n    Bad TLP count %d\n    Bad DLLP count %d\n    Replay Rollover count %d\n    Replay Timeout count %d\n    Recv Error count %d\n    Internal CE count %d\n"),
+			       pcie_syndrome->bad_tlp,
+			       pcie_syndrome->bad_dllp,
+			       pcie_syndrome->replay_rollover,
+			       pcie_syndrome->replay_timeout,
+			       pcie_syndrome->rx_err,
+			       pcie_syndrome->internal_ce_count);
+			if (msg->ver > 0x1)
+				pr_warn("    Power break %s\n",
+					pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF");
+			break;
+		case UE:
+			printk(KERN_ERR pr_fmt("Syndrome:\n    Index %d\n    Address 0x%x\n"),
+			       pcie_syndrome->index, pcie_syndrome->addr);
+			break;
+		case UE_NF:
+			printk(KERN_WARNING pr_fmt("Syndrome:\n    FC timeout count %d\n    Poisoned TLP count %d\n    ECRC error count %d\n    Unsupported request count %d\n    Completer abort count %d\n    Completion timeout count %d\n"),
+			       pcie_syndrome->fc_timeout,
+			       pcie_syndrome->poison_tlp,
+			       pcie_syndrome->ecrc_err,
+			       pcie_syndrome->unsupported_req,
+			       pcie_syndrome->completer_abort,
+			       pcie_syndrome->completion_timeout);
+			break;
+		default:
+			break;
+		}
+		break;
+	case DDR:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    Instance %d\n    Count %d\n    Data 31_0 0x%x 0x%x\n    Data 63_32 0x%x 0x%x\n    Data 95_64 0x%x 0x%x\n    Data 127_96 0x%x 0x%x\n    Parity bits 0x%x\n    Address msb 0x%x\n    Address lsb 0x%x\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   ddr_syndrome->instance,
+			   ddr_syndrome->count,
+			   ddr_syndrome->data_31_0[1],
+			   ddr_syndrome->data_31_0[0],
+			   ddr_syndrome->data_63_32[1],
+			   ddr_syndrome->data_63_32[0],
+			   ddr_syndrome->data_95_64[1],
+			   ddr_syndrome->data_95_64[0],
+			   ddr_syndrome->data_127_96[1],
+			   ddr_syndrome->data_127_96[0],
+			   ddr_syndrome->parity_bits,
+			   ddr_syndrome->addr_msb,
+			   ddr_syndrome->addr_lsb);
+		break;
+	case SYS_BUS1:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    instance %d\n    %s\n    err_type %d\n    address0 0x%x\n    address1 0x%x\n    address2 0x%x\n    address3 0x%x\n    address4 0x%x\n    address5 0x%x\n    address6 0x%x\n    address7 0x%x\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   sysbus1_syndrome->instance,
+			   sysbus1_syndrome->slave ? "Slave" : "Master",
+			   sysbus1_syndrome->err_type,
+			   sysbus1_syndrome->addr[0],
+			   sysbus1_syndrome->addr[1],
+			   sysbus1_syndrome->addr[2],
+			   sysbus1_syndrome->addr[3],
+			   sysbus1_syndrome->addr[4],
+			   sysbus1_syndrome->addr[5],
+			   sysbus1_syndrome->addr[6],
+			   sysbus1_syndrome->addr[7]);
+		break;
+	case SYS_BUS2:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    instance %d\n    valid %d\n    word error %d\n    non-secure %d\n    opc %d\n    error code %d\n    transaction type %d\n    address space %d\n    operation type %d\n    len %d\n    redirect %d\n    path %d\n    ext_id %d\n    lsb2 %d\n    msb2 %d\n    lsb3 %d\n    msb3 %d\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   sysbus2_syndrome->instance,
+			   sysbus2_syndrome->valid,
+			   sysbus2_syndrome->word_error,
+			   sysbus2_syndrome->non_secure,
+			   sysbus2_syndrome->opc,
+			   sysbus2_syndrome->error_code,
+			   sysbus2_syndrome->trans_type,
+			   sysbus2_syndrome->addr_space,
+			   sysbus2_syndrome->op_type,
+			   sysbus2_syndrome->len,
+			   sysbus2_syndrome->redirect,
+			   sysbus2_syndrome->path,
+			   sysbus2_syndrome->ext_id,
+			   sysbus2_syndrome->lsb2,
+			   sysbus2_syndrome->msb2,
+			   sysbus2_syndrome->lsb3,
+			   sysbus2_syndrome->msb3);
+		break;
+	case NSP_MEM:
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    NSP ID %d\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n",
+			   err_class_str[msg->err_type],
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   nsp_syndrome->nsp_id,
+			   nsp_syndrome->error_address[0],
+			   nsp_syndrome->error_address[1],
+			   nsp_syndrome->error_address[2],
+			   nsp_syndrome->error_address[3],
+			   nsp_syndrome->error_address[4],
+			   nsp_syndrome->error_address[5],
+			   nsp_syndrome->error_address[6],
+			   nsp_syndrome->error_address[7]);
+		break;
+	case TSENS:
+		if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) {
+			pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n",
+				 tsens_syndrome->threshold_type);
+			break;
+		}
+
+		if (msg->err_type)
+			class = "Fatal";
+		else if (tsens_syndrome->threshold_type)
+			class = "Critical";
+		else
+			class = "Warning";
+
+		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    %s threshold\n    %d deg C\n",
+			   class,
+			   err_type_str[msg->err_type],
+			   "error from",
+			   err_src_str[msg->source],
+			   msg->err_threshold,
+			   threshold_type_str[tsens_syndrome->threshold_type],
+			   tsens_syndrome->temp);
+		break;
+	}
+
+	/* Uncorrectable errors are fatal */
+	if (msg->err_type == UE)
+		mhi_soc_reset(qdev->mhi_cntrl);
+
+	switch (msg->err_type) {
+	case CE:
+		if (qdev->ce_count != UINT_MAX)
+			qdev->ce_count++;
+		break;
+	case UE:
+		if (qdev->ce_count != UINT_MAX)
+			qdev->ue_count++;
+		break;
+	case UE_NF:
+		if (qdev->ce_count != UINT_MAX)
+			qdev->ue_nf_count++;
+		break;
+	default:
+		/* not possible */
+		break;
+	}
+}
+
+static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count);
+}
+
+static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count);
+}
+
+static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count);
+}
+
+static DEVICE_ATTR_RO(ce_count);
+static DEVICE_ATTR_RO(ue_count);
+static DEVICE_ATTR_RO(ue_nonfatal_count);
+
+static struct attribute *ras_attrs[] = {
+	&dev_attr_ce_count.attr,
+	&dev_attr_ue_count.attr,
+	&dev_attr_ue_nonfatal_count.attr,
+	NULL,
+};
+
+static struct attribute_group ras_group = {
+	.attrs = ras_attrs,
+};
+
+static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+	struct ras_data *resp;
+	int ret;
+
+	ret = mhi_prepare_for_transfer(mhi_dev);
+	if (ret)
+		return ret;
+
+	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+	if (!resp) {
+		mhi_unprepare_from_transfer(mhi_dev);
+		return -ENOMEM;
+	}
+
+	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT);
+	if (ret) {
+		kfree(resp);
+		mhi_unprepare_from_transfer(mhi_dev);
+		return ret;
+	}
+
+	ret = device_add_group(&qdev->pdev->dev, &ras_group);
+	if (ret) {
+		mhi_unprepare_from_transfer(mhi_dev);
+		pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret);
+		return ret;
+	}
+
+	dev_set_drvdata(&mhi_dev->dev, qdev);
+	qdev->ras_ch = mhi_dev;
+
+	return ret;
+}
+
+static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev)
+{
+	struct qaic_device *qdev;
+
+	qdev = dev_get_drvdata(&mhi_dev->dev);
+	qdev->ras_ch = NULL;
+	device_remove_group(&qdev->pdev->dev, &ras_group);
+	mhi_unprepare_from_transfer(mhi_dev);
+}
+
+static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {}
+
+static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+	struct ras_data *msg = mhi_result->buf_addr;
+	int ret;
+
+	if (mhi_result->transaction_status) {
+		kfree(msg);
+		return;
+	}
+
+	ras_msg_to_cpu(msg);
+	decode_ras_msg(qdev, msg);
+
+	ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT);
+	if (ret) {
+		dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret);
+		kfree(msg);
+	}
+}
+
+static const struct mhi_device_id qaic_ras_mhi_match_table[] = {
+	{ .chan = "QAIC_STATUS", },
+	{},
+};
+
+static struct mhi_driver qaic_ras_mhi_driver = {
+	.id_table = qaic_ras_mhi_match_table,
+	.remove = qaic_ras_mhi_remove,
+	.probe = qaic_ras_mhi_probe,
+	.ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb,
+	.dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb,
+	.driver = {
+		.name = "qaic_ras",
+	},
+};
+
+int qaic_ras_register(void)
+{
+	return mhi_driver_register(&qaic_ras_mhi_driver);
+}
+
+void qaic_ras_unregister(void)
+{
+	mhi_driver_unregister(&qaic_ras_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ras.h b/drivers/accel/qaic/qaic_ras.h
new file mode 100644
index 000000000000..d44a4eeeb060
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */
+
+#ifndef __QAIC_RAS_H__
+#define __QAIC_RAS_H__
+
+int qaic_ras_register(void);
+void qaic_ras_unregister(void);
+
+#endif /* __QAIC_RAS_H__ */
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 2473c66309d4..3fac540f8e03 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -129,7 +129,7 @@ static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_resu
 
 static void qaic_timesync_timer(struct timer_list *t)
 {
-	struct mqts_dev *mqtsdev = from_timer(mqtsdev, t, timer);
+	struct mqts_dev *mqtsdev = timer_container_of(mqtsdev, t, timer);
 	struct qts_host_time_sync_msg_data *sync_msg;
 	u64 device_qtimer_us;
 	u64 device_qtimer;
@@ -221,7 +221,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev)
 {
 	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
 
-	del_timer_sync(&mqtsdev->timer);
+	timer_delete_sync(&mqtsdev->timer);
 	mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
 	kfree(mqtsdev->sync_msg);
 	kfree(mqtsdev);