1 files changed, 373 insertions, 220 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 8270db0a72a2..dee487724918 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -14,10 +14,10 @@
 #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
 			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
 			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
-			HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
+			HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
 
 
-#define MAX_TS_ITER_NUM 10
+#define MAX_TS_ITER_NUM 100
 
 /**
  * enum hl_cs_wait_status - cs wait status
@@ -31,6 +31,24 @@ enum hl_cs_wait_status {
 	CS_WAIT_STATUS_GONE
 };
 
+/*
+ * Data used while handling wait/timestamp nodes.
+ * The purpose of this struct is to store the needed data for both operations
+ * in one variable instead of passing large number of arguments to functions.
+ */
+struct wait_interrupt_data {
+	struct hl_user_interrupt *interrupt;
+	struct hl_mmap_mem_buf *buf;
+	struct hl_mem_mgr *mmg;
+	struct hl_cb *cq_cb;
+	u64 ts_handle;
+	u64 ts_offset;
+	u64 cq_handle;
+	u64 cq_offset;
+	u64 target_value;
+	u64 intr_timeout_us;
+};
+
 static void job_wq_completion(struct work_struct *work);
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
 				enum hl_cs_wait_status *status, s64 *timestamp);
@@ -280,14 +298,8 @@ bool cs_needs_timeout(struct hl_cs *cs)
 
 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 {
-	/*
-	 * Patched CB is created for external queues jobs, and for H/W queues
-	 * jobs if the user CB was allocated by driver and MMU is disabled.
-	 */
-	return (job->queue_type == QUEUE_TYPE_EXT ||
-			(job->queue_type == QUEUE_TYPE_HW &&
-					job->is_kernel_allocated_cb &&
-					!hdev->mmu_enable));
+	/* Patched CB is created for external queues jobs */
+	return (job->queue_type == QUEUE_TYPE_EXT);
 }
 
 /*
@@ -363,14 +375,13 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 		}
 	}
 
-	/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
-	 * enabled, the user CB isn't released in cs_parser() and thus should be
+	/* For H/W queue jobs, if a user CB was allocated by driver,
+	 * the user CB isn't released in cs_parser() and thus should be
 	 * released here. This is also true for INT queues jobs which were
 	 * allocated by driver.
 	 */
-	if ((job->is_kernel_allocated_cb &&
-		((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
-				job->queue_type == QUEUE_TYPE_INT))) {
+	if (job->is_kernel_allocated_cb &&
+			(job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
 		atomic_dec(&job->user_cb->cs_cnt);
 		hl_cb_put(job->user_cb);
 	}
@@ -657,7 +668,7 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
 	/*
 	 * we get refcount upon reservation of signals or signal/wait cs for the
 	 * hw_sob object, and need to put it when the first staged cs
-	 * (which cotains the encaps signals) or cs signal/wait is completed.
+	 * (which contains the encaps signals) or cs signal/wait is completed.
 	 */
 	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
@@ -804,12 +815,14 @@ out:
 
 static void cs_timedout(struct work_struct *work)
 {
+	struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work);
+	bool skip_reset_on_timeout, device_reset = false;
 	struct hl_device *hdev;
 	u64 event_mask = 0x0;
+	uint timeout_sec;
 	int rc;
-	struct hl_cs *cs = container_of(work, struct hl_cs,
-						 work_tdr.work);
-	bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false;
+
+	skip_reset_on_timeout = cs->skip_reset_on_timeout;
 
 	rc = cs_get_unless_zero(cs);
 	if (!rc)
@@ -840,29 +853,31 @@ static void cs_timedout(struct work_struct *work)
 		event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
 	}
 
+	timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
+
 	switch (cs->type) {
 	case CS_TYPE_SIGNAL:
 		dev_err(hdev->dev,
-			"Signal command submission %llu has not finished in time!\n",
-			cs->sequence);
+			"Signal command submission %llu has not finished in %u seconds!\n",
+			cs->sequence, timeout_sec);
 		break;
 
 	case CS_TYPE_WAIT:
 		dev_err(hdev->dev,
-			"Wait command submission %llu has not finished in time!\n",
-			cs->sequence);
+			"Wait command submission %llu has not finished in %u seconds!\n",
+			cs->sequence, timeout_sec);
 		break;
 
 	case CS_TYPE_COLLECTIVE_WAIT:
 		dev_err(hdev->dev,
-			"Collective Wait command submission %llu has not finished in time!\n",
-			cs->sequence);
+			"Collective Wait command submission %llu has not finished in %u seconds!\n",
+			cs->sequence, timeout_sec);
 		break;
 
 	default:
 		dev_err(hdev->dev,
-			"Command submission %llu has not finished in time!\n",
-			cs->sequence);
+			"Command submission %llu has not finished in %u seconds!\n",
+			cs->sequence, timeout_sec);
 		break;
 	}
 
@@ -1085,17 +1100,19 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 	unsigned long flags;
 
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
-		if (pend->ts_reg_info.buf) {
-			list_del(&pend->wait_list_node);
-			hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
-			hl_cb_put(pend->ts_reg_info.cq_cb);
-		} else {
-			pend->fence.error = -EIO;
-			complete_all(&pend->fence.completion);
-		}
+	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
+		pend->fence.error = -EIO;
+		complete_all(&pend->fence.completion);
 	}
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
+		list_del(&pend->list_node);
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 }
 
 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -1140,11 +1157,10 @@ static void force_complete_cs(struct hl_device *hdev)
 	spin_unlock(&hdev->cs_mirror_lock);
 }
 
-void hl_abort_waitings_for_completion(struct hl_device *hdev)
+void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
 {
 	force_complete_cs(hdev);
 	force_complete_multi_cs(hdev);
-	hl_release_pending_user_interrupts(hdev);
 }
 
 static void job_wq_completion(struct work_struct *work)
@@ -1168,6 +1184,22 @@ static void cs_completion(struct work_struct *work)
 		hl_complete_job(hdev, job);
 }
 
+u32 hl_get_active_cs_num(struct hl_device *hdev)
+{
+	u32 active_cs_num = 0;
+	struct hl_cs *cs;
+
+	spin_lock(&hdev->cs_mirror_lock);
+
+	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
+		if (!cs->completed)
+			active_cs_num++;
+
+	spin_unlock(&hdev->cs_mirror_lock);
+
+	return active_cs_num;
+}
+
 static int validate_queue_index(struct hl_device *hdev,
 				struct hl_cs_chunk *chunk,
 				enum hl_queue_type *queue_type,
@@ -1304,6 +1336,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 		return CS_UNRESERVE_SIGNALS;
 	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
 		return CS_TYPE_ENGINE_CORE;
+	else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
+		return CS_TYPE_ENGINES;
 	else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
 		return CS_TYPE_FLUSH_PCI_HBW_WRITES;
 	else
@@ -1326,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 			return -EINVAL;
 		}
 
-	if (!hl_device_operational(hdev, &status)) {
+	if (!hl_device_operational(hdev, &status))
 		return -EBUSY;
-	}
 
 	if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
 			!hdev->supports_staged_submission) {
@@ -1717,16 +1750,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 		/* Need to wait for restore completion before execution phase */
 		if (num_chunks) {
 			enum hl_cs_wait_status status;
-wait_again:
+
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
 					*cs_seq, &status, NULL);
 			if (ret) {
-				if (ret == -ERESTARTSYS) {
-					usleep_range(100, 200);
-					goto wait_again;
-				}
-
 				dev_err(hdev->dev,
 					"Restore CS for context %d failed to complete %d\n",
 					ctx->asid, ret);
@@ -1931,8 +1959,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	else
 		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
 
-	cb = hl_cb_kernel_create(hdev, cb_size,
-				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
+	cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
 	if (!cb) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -2135,7 +2162,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
 
 			hdev->asic_funcs->hw_queues_unlock(hdev);
 			rc = -EINVAL;
-			goto out;
+			goto out_unlock;
 		}
 
 		/*
@@ -2150,15 +2177,21 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
 
 		/* Release the id and free allocated memory of the handle */
 		idr_remove(&mgr->handles, handle_id);
+
+		/* unlock before calling ctx_put, where we might sleep */
+		spin_unlock(&mgr->lock);
 		hl_ctx_put(encaps_sig_hdl->ctx);
 		kfree(encaps_sig_hdl);
+		goto out;
 	} else {
 		rc = -EINVAL;
 		dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
 	}
-out:
+
+out_unlock:
 	spin_unlock(&mgr->lock);
 
+out:
 	return rc;
 }
 
@@ -2429,10 +2462,13 @@ out:
 static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
 						u32 num_engine_cores, u32 core_command)
 {
-	int rc;
 	struct hl_device *hdev = hpriv->hdev;
 	void __user *engine_cores_arr;
 	u32 *cores;
+	int rc;
+
+	if (!hdev->asic_prop.supports_engine_modes)
+		return -EPERM;
 
 	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
 		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
@@ -2461,6 +2497,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
 	return rc;
 }
 
+static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
+						u32 num_engines, enum hl_engine_command command)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 *engines, max_num_of_engines;
+	void __user *engines_arr;
+	int rc;
+
+	if (!hdev->asic_prop.supports_engine_modes)
+		return -EPERM;
+
+	if (command >= HL_ENGINE_COMMAND_MAX) {
+		dev_err(hdev->dev, "Engine command is invalid\n");
+		return -EINVAL;
+	}
+
+	max_num_of_engines = hdev->asic_prop.max_num_of_engines;
+	if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
+		max_num_of_engines = hdev->asic_prop.num_engine_cores;
+
+	if (!num_engines || num_engines > max_num_of_engines) {
+		dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
+		return -EINVAL;
+	}
+
+	engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
+	engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
+		dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
+		kfree(engines);
+		return -EFAULT;
+	}
+
+	rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
+	kfree(engines);
+
+	return rc;
+}
+
 static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 {
 	struct hl_device *hdev = hpriv->hdev;
@@ -2476,8 +2554,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 	return 0;
 }
 
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	union hl_cs_args *args = data;
 	enum hl_cs_type cs_type = 0;
 	u64 cs_seq = ULONG_MAX;
@@ -2507,7 +2586,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		cs_seq = args->in.seq;
 
 	timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
-			? msecs_to_jiffies(args->in.timeout * 1000)
+			? secs_to_jiffies(args->in.timeout)
 			: hpriv->hdev->timeout_jiffies;
 
 	switch (cs_type) {
@@ -2532,6 +2611,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
 				args->in.num_engine_cores, args->in.core_command);
 		break;
+	case CS_TYPE_ENGINES:
+		rc = cs_ioctl_engines(hpriv, args->in.engines,
+				args->in.num_engines, args->in.engine_command);
+		break;
 	case CS_TYPE_FLUSH_PCI_HBW_WRITES:
 		rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
 		break;
@@ -3130,162 +3213,228 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
-					struct hl_cb *cq_cb,
-					u64 ts_offset, u64 cq_offset, u64 target_value,
-					spinlock_t *wait_list_lock,
-					struct hl_user_pending_interrupt **pend)
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
+{
+	record->ts_reg_info.cq_cb = cq_cb;
+	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+	record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+					struct hl_ts_buff *ts_buff, u64 ts_offset,
+					struct hl_user_pending_interrupt **req_event_record)
 {
-	struct hl_ts_buff *ts_buff = buf->private;
-	struct hl_user_pending_interrupt *requested_offset_record =
-				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-				ts_offset;
-	struct hl_user_pending_interrupt *cb_last =
-			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+	struct hl_user_pending_interrupt *ts_cb_last;
+
+	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+						ts_offset;
+	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long flags, iter_counter = 0;
-	u64 current_cq_counter;
 
 	/* Validate ts_offset not exceeding last max */
-	if (requested_offset_record >= cb_last) {
-		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
-								(u64)(uintptr_t)cb_last);
+	if (*req_event_record >= ts_cb_last) {
+		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
+				ts_offset, (u64)(uintptr_t)ts_cb_last);
 		return -EINVAL;
 	}
 
-start_over:
-	spin_lock_irqsave(wait_list_lock, flags);
+	return 0;
+}
 
-	/* Unregister only if we didn't reach the target value
-	 * since in this case there will be no handling in irq context
-	 * and then it's safe to delete the node out of the interrupt list
-	 * then re-use it on other interrupt
-	 */
-	if (requested_offset_record->ts_reg_info.in_use) {
-		current_cq_counter = *requested_offset_record->cq_kernel_addr;
-		if (current_cq_counter < requested_offset_record->cq_target_value) {
-			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock_irqrestore(wait_list_lock, flags);
+static void unregister_timestamp_node(struct hl_device *hdev,
+			struct hl_user_pending_interrupt *record, bool need_lock)
+{
+	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
+	bool ts_rec_found = false;
+	unsigned long flags;
 
-			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
-			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
+	if (need_lock)
+		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
 
-			dev_dbg(buf->mmg->dev,
-				"ts node removed from interrupt list now can re-use\n");
-		} else {
-			dev_dbg(buf->mmg->dev,
-				"ts node in middle of irq handling\n");
-
-			/* irq handling in the middle give it time to finish */
-			spin_unlock_irqrestore(wait_list_lock, flags);
-			usleep_range(1, 10);
-			if (++iter_counter == MAX_TS_ITER_NUM) {
-				dev_err(buf->mmg->dev,
-					"handling registration interrupt took too long!!\n");
-				return -EINVAL;
-			}
+	if (record->ts_reg_info.in_use) {
+		record->ts_reg_info.in_use = false;
+		list_del(&record->list_node);
+		ts_rec_found = true;
+	}
+
+	if (need_lock)
+		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
+
+	/* Put refcounts that were taken when we registered the event */
+	if (ts_rec_found) {
+		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
+		hl_cb_put(record->ts_reg_info.cq_cb);
+	}
+}
+
+static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
+					struct wait_interrupt_data *data, unsigned long *flags,
+					struct hl_user_pending_interrupt **pend)
+{
+	struct hl_user_pending_interrupt *req_offset_record;
+	struct hl_ts_buff *ts_buff = data->buf->private;
+	bool need_lock = false;
+	int rc;
+
+	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
+									&req_offset_record);
+	if (rc)
+		return rc;
+
+	/* In case the node already registered, need to unregister first then re-use */
+	if (req_offset_record->ts_reg_info.in_use) {
+		/*
+		 * Since interrupt here can be different than the one the node currently registered
+		 * on, and we don't want to lock two lists while we're doing unregister, so
+		 * unlock the new interrupt wait list here and acquire the lock again after you done
+		 */
+		if (data->interrupt->interrupt_id !=
+				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
 
-			goto start_over;
+			need_lock = true;
+			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
 		}
-	} else {
-		/* Fill up the new registration node info */
-		requested_offset_record->ts_reg_info.buf = buf;
-		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
-		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
-				(u64 *) ts_buff->user_buff_address + ts_offset;
-		requested_offset_record->cq_kernel_addr =
-				(u64 *) cq_cb->kernel_address + cq_offset;
-		requested_offset_record->cq_target_value = target_value;
 
-		spin_unlock_irqrestore(wait_list_lock, flags);
+		unregister_timestamp_node(hdev, req_offset_record, need_lock);
+
+		if (need_lock)
+			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
 	}
 
-	*pend = requested_offset_record;
+	/* Fill up the new registration node info and add it to the list */
+	req_offset_record->ts_reg_info.in_use = true;
+	req_offset_record->ts_reg_info.buf = data->buf;
+	req_offset_record->ts_reg_info.timestamp_kernel_addr =
+			(u64 *) ts_buff->user_buff_address + data->ts_offset;
+	req_offset_record->ts_reg_info.interrupt = data->interrupt;
+	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
+						data->target_value);
 
-	dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
-		requested_offset_record);
-	return 0;
+	*pend = req_offset_record;
+
+	return rc;
+}
+
+static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				struct wait_interrupt_data *data,
+				u32 *status, u64 *timestamp)
+{
+	struct hl_user_pending_interrupt *pend;
+	unsigned long flags;
+	int rc = 0;
+
+	hl_ctx_get(ctx);
+
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
+		rc = -EINVAL;
+		goto put_ctx;
+	}
+
+	/* Validate the cq offset */
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
+	if (!data->buf) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
+
+	/* get ts buffer record */
+	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
+	if (rc) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+		goto put_ts_buff;
+	}
+
+	/* We check for completion value as interrupt could have been received
+	 * before we add the timestamp node to the ts list.
+	 */
+	if (*pend->cq_kernel_addr >= data->target_value) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+		pend->ts_reg_info.in_use = 0;
+		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+
+		goto put_ts_buff;
+	}
+
+	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
+	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+
+	hl_ctx_put(ctx);
+
+	return rc;
+
+put_ts_buff:
+	hl_mmap_mem_buf_put(data->buf);
+put_cq_cb:
+	hl_cb_put(data->cq_cb);
+put_ctx:
+	hl_ctx_put(ctx);
+
+	return rc;
 }
 
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
-				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
-				u64 target_value, struct hl_user_interrupt *interrupt,
-				bool register_ts_record, u64 ts_handle, u64 ts_offset,
+				struct wait_interrupt_data *data,
 				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	struct hl_mmap_mem_buf *buf;
-	struct hl_cb *cq_cb;
 	unsigned long timeout, flags;
 	long completion_rc;
 	int rc = 0;
 
-	timeout = hl_usecs64_to_jiffies(timeout_us);
+	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
 
 	hl_ctx_get(ctx);
 
-	cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
-	if (!cq_cb) {
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
 		rc = -EINVAL;
 		goto put_ctx;
 	}
 
 	/* Validate the cq offset */
-	if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
-			((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
 		rc = -EINVAL;
 		goto put_cq_cb;
 	}
 
-	if (register_ts_record) {
-		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
-					interrupt->interrupt_id, ts_offset, cq_counters_offset);
-		buf = hl_mmap_mem_buf_get(mmg, ts_handle);
-		if (!buf) {
-			rc = -EINVAL;
-			goto put_cq_cb;
-		}
-
-		/* get ts buffer record */
-		rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
-						cq_counters_offset, target_value,
-						&interrupt->wait_list_lock, &pend);
-		if (rc)
-			goto put_ts_buff;
-	} else {
-		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-		if (!pend) {
-			rc = -ENOMEM;
-			goto put_cq_cb;
-		}
-		hl_fence_init(&pend->fence, ULONG_MAX);
-		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
-		pend->cq_target_value = target_value;
+	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+	if (!pend) {
+		rc = -ENOMEM;
+		goto put_cq_cb;
 	}
 
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	hl_fence_init(&pend->fence, ULONG_MAX);
+	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
+	pend->cq_target_value = data->target_value;
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+
 
 	/* We check for completion value as interrupt could have been received
-	 * before we added the node to the wait list
+	 * before we add the wait node to the wait list.
 	 */
-	if (*pend->cq_kernel_addr >= target_value) {
-		if (register_ts_record)
-			pend->ts_reg_info.in_use = 0;
-		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (*pend->cq_kernel_addr >= data->target_value)
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		else
+			*status = HL_WAIT_CS_STATUS_BUSY;
 
-		if (register_ts_record) {
-			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
-			goto put_ts_buff;
-		} else {
-			pend->fence.timestamp = ktime_get();
-			goto set_timestamp;
-		}
-	} else if (!timeout_us) {
-		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
-		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
 	}
@@ -3295,55 +3444,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * Note that we cannot have sorted list by target value,
 	 * in order to shorten the list pass loop, since
 	 * same list could have nodes for different cq counter handle.
-	 * Note:
-	 * Mark ts buff offset as in use here in the spinlock protection area
-	 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
-	 * before adding the node to the list. this scenario might happen when
-	 * multiple threads are racing on same offset and one thread could
-	 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread
-	 * takes over and get to ts_buff_get_kernel_ts_record and then we will try
-	 * to re-use the same ts buff offset, and will try to delete a non existing
-	 * node from the list.
 	 */
-	if (register_ts_record)
-		pend->ts_reg_info.in_use = 1;
-
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
-
-	if (register_ts_record) {
-		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
-		goto ts_registration_exit;
-	}
+	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 	/* Wait for interrupt handler to signal completion */
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
 								timeout);
 	if (completion_rc > 0) {
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (pend->fence.error == -EIO) {
+			dev_err_ratelimited(hdev->dev,
+					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+					pend->fence.error);
+			rc = -EIO;
+			*status = HL_WAIT_CS_STATUS_ABORTED;
+		} else {
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		}
 	} else {
 		if (completion_rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
 					"user process got signal while waiting for interrupt ID %d\n",
-					interrupt->interrupt_id);
+					data->interrupt->interrupt_id);
 			rc = -EINTR;
 			*status = HL_WAIT_CS_STATUS_ABORTED;
 		} else {
-			if (pend->fence.error == -EIO) {
-				dev_err_ratelimited(hdev->dev,
-						"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
-						pend->fence.error);
-				rc = -EIO;
-				*status = HL_WAIT_CS_STATUS_ABORTED;
-			} else {
-				/* The wait has timed-out. We don't know anything beyond that
-				 * because the workload wasn't submitted through the driver.
-				 * Therefore, from driver's perspective, the workload is still
-				 * executing.
-				 */
-				rc = 0;
-				*status = HL_WAIT_CS_STATUS_BUSY;
-			}
+			/* The wait has timed-out. We don't know anything beyond that
+			 * because the workload was not submitted through the driver.
+			 * Therefore, from driver's perspective, the workload is still
+			 * executing.
+			 */
+			rc = 0;
+			*status = HL_WAIT_CS_STATUS_BUSY;
 		}
 	}
 
@@ -3353,23 +3485,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-	list_del(&pend->wait_list_node);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 	kfree(pend);
-	hl_cb_put(cq_cb);
-ts_registration_exit:
+	hl_cb_put(data->cq_cb);
 	hl_ctx_put(ctx);
 
 	return rc;
 
-put_ts_buff:
-	hl_mmap_mem_buf_put(buf);
 put_cq_cb:
-	hl_cb_put(cq_cb);
+	hl_cb_put(data->cq_cb);
 put_ctx:
 	hl_ctx_put(ctx);
 
@@ -3404,7 +3533,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	 * handler to monitor
 	 */
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	/* We check for completion value as interrupt could have been received
@@ -3481,7 +3610,7 @@ wait_again:
 
 remove_pending_user_interrupt:
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-	list_del(&pend->wait_list_node);
+	list_del(&pend->list_node);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
@@ -3540,19 +3669,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}
 
-	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
-				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
-				args->in.cq_counters_offset,
-				args->in.target, interrupt,
-				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
-				args->in.timestamp_handle, args->in.timestamp_offset,
-				&status, &timestamp);
-	else
+	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
+		struct wait_interrupt_data wait_intr_data = {0};
+
+		wait_intr_data.interrupt = interrupt;
+		wait_intr_data.mmg = &hpriv->mem_mgr;
+		wait_intr_data.cq_handle = args->in.cq_counters_handle;
+		wait_intr_data.cq_offset = args->in.cq_counters_offset;
+		wait_intr_data.ts_handle = args->in.timestamp_handle;
+		wait_intr_data.ts_offset = args->in.timestamp_offset;
+		wait_intr_data.target_value = args->in.target;
+		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
+
+		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
+			/*
+			 * Allow only one registration at a time. this is needed in order to prevent
+			 * issues while handling the flow of re-use of the same offset.
+			 * Since the registration flow is protected only by the interrupt lock,
+			 * re-use flow might request to move ts node to another interrupt list,
+			 * and in such case we're not protected.
+			 */
+			mutex_lock(&hpriv->ctx->ts_reg_lock);
+
+			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+
+			mutex_unlock(&hpriv->ctx->ts_reg_lock);
+		} else
+			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+	} else {
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
 				args->in.target, interrupt, &status,
 				&timestamp);
+	}
+
 	if (rc)
 		return rc;
 
@@ -3567,8 +3719,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
 	union hl_wait_cs_args *args = data;
 	u32 flags = args->in.flags;