summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/common/context.c8
-rw-r--r--drivers/misc/habanalabs/common/device.c56
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h7
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c9
4 files changed, 50 insertions, 30 deletions
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 49e6f1172d18..c6360e33bce8 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -283,11 +283,9 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev)
/* There can only be a single user which has opened the compute device, so exit
* immediately once we find him
*/
- if (!hpriv->is_control) {
- ctx = hpriv->ctx;
- hl_ctx_get(hdev, ctx);
- break;
- }
+ ctx = hpriv->ctx;
+ hl_ctx_get(hdev, ctx);
+ break;
}
mutex_unlock(&hdev->fpriv_list_lock);
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index bea05a59425f..f1f482c5cdcb 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -169,9 +169,9 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
goto out;
}
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
list_del(&hpriv->dev_node);
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
out:
put_pid(hpriv->taskpid);
@@ -449,7 +449,9 @@ static int device_early_init(struct hl_device *hdev)
INIT_LIST_HEAD(&hdev->cs_mirror_list);
spin_lock_init(&hdev->cs_mirror_lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
+ INIT_LIST_HEAD(&hdev->fpriv_ctrl_list);
mutex_init(&hdev->fpriv_list_lock);
+ mutex_init(&hdev->fpriv_ctrl_list_lock);
atomic_set(&hdev->in_reset, 0);
mutex_init(&hdev->clk_throttling.lock);
@@ -491,6 +493,7 @@ static void device_early_fini(struct hl_device *hdev)
mutex_destroy(&hdev->send_cpu_message_lock);
mutex_destroy(&hdev->fpriv_list_lock);
+ mutex_destroy(&hdev->fpriv_ctrl_list_lock);
mutex_destroy(&hdev->clk_throttling.lock);
@@ -678,6 +681,8 @@ static void take_release_locks(struct hl_device *hdev)
/* Flush anyone that is inside device open */
mutex_lock(&hdev->fpriv_list_lock);
mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
}
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
@@ -789,17 +794,21 @@ disable_device:
return rc;
}
-static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
+static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
{
- struct hl_fpriv *hpriv;
struct task_struct *task = NULL;
+ struct list_head *fd_list;
+ struct hl_fpriv *hpriv;
+ struct mutex *fd_lock;
u32 pending_cnt;
+ fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+ fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
/* Giving time for user to close FD, and for processes that are inside
* hl_device_open to finish
*/
- if (!list_empty(&hdev->fpriv_list))
+ if (!list_empty(fd_list))
ssleep(1);
if (timeout) {
@@ -815,12 +824,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
}
}
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(fd_lock);
/* This section must be protected because we are dereferencing
* pointers that are freed if the process exits
*/
- list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+ list_for_each_entry(hpriv, fd_list, dev_node) {
task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
if (task) {
dev_info(hdev->dev, "Killing user process pid=%d\n",
@@ -832,12 +841,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
} else {
dev_warn(hdev->dev,
"Can't get task struct for PID so giving up on killing process\n");
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
return -ETIME;
}
}
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
/*
* We killed the open users, but that doesn't mean they are closed.
@@ -849,7 +858,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
*/
wait_for_processes:
- while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+ while ((!list_empty(fd_list)) && (pending_cnt)) {
dev_dbg(hdev->dev,
"Waiting for all unmap operations to finish before hard reset\n");
@@ -859,7 +868,7 @@ wait_for_processes:
}
/* All processes exited successfully */
- if (list_empty(&hdev->fpriv_list))
+ if (list_empty(fd_list))
return 0;
/* Give up waiting for processes to exit */
@@ -871,14 +880,19 @@ wait_for_processes:
return -EBUSY;
}
-static void device_disable_open_processes(struct hl_device *hdev)
+static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
{
+ struct list_head *fd_list;
struct hl_fpriv *hpriv;
+ struct mutex *fd_lock;
- mutex_lock(&hdev->fpriv_list_lock);
- list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+ fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+ fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+
+ mutex_lock(fd_lock);
+ list_for_each_entry(hpriv, fd_list, dev_node)
hpriv->hdev = NULL;
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
}
static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
@@ -1057,7 +1071,7 @@ kill_processes:
* process can't really exit until all its CSs are done, which
* is what we do in cs rollback
*/
- rc = device_kill_open_processes(hdev, 0);
+ rc = device_kill_open_processes(hdev, 0, false);
if (rc == -EBUSY) {
if (hdev->device_fini_pending) {
@@ -1629,10 +1643,16 @@ void hl_device_fini(struct hl_device *hdev)
"Waiting for all processes to exit (timeout of %u seconds)",
HL_PENDING_RESET_LONG_SEC);
- rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
+ rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
if (rc) {
dev_crit(hdev->dev, "Failed to kill all open processes\n");
- device_disable_open_processes(hdev);
+ device_disable_open_processes(hdev, false);
+ }
+
+ rc = device_kill_open_processes(hdev, 0, true);
+ if (rc) {
+ dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
+ device_disable_open_processes(hdev, true);
}
hl_cb_pool_fini(hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 362eee3f028c..015aa1ee8ce0 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1824,7 +1824,6 @@ struct hl_debug_params {
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
* @restore_phase_mutex: lock for context switch and restore phase.
- * @is_control: true for control device, false otherwise
*/
struct hl_fpriv {
struct hl_device *hdev;
@@ -1837,7 +1836,6 @@ struct hl_fpriv {
struct list_head dev_node;
struct kref refcount;
struct mutex restore_phase_mutex;
- u8 is_control;
};
@@ -2502,7 +2500,10 @@ struct last_error_session_info {
* @internal_cb_va_base: internal cb pool mmu virtual address base
* @fpriv_list: list of file private data structures. Each structure is created
* when a user opens the device
+ * @fpriv_ctrl_list: list of file private data structures. Each structure is created
+ * when a user opens the control device
* @fpriv_list_lock: protects the fpriv_list
+ * @fpriv_ctrl_list_lock: protects the fpriv_ctrl_list
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
@@ -2655,7 +2656,9 @@ struct hl_device {
u64 internal_cb_va_base;
struct list_head fpriv_list;
+ struct list_head fpriv_ctrl_list;
struct mutex fpriv_list_lock;
+ struct mutex fpriv_ctrl_list_lock;
struct hl_cs_counters_atomic aggregated_cs_counters;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index d59201f93de9..aa4e07b1f839 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -220,12 +220,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
- hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl,
@@ -235,13 +234,13 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
goto out_err;
}
- list_add(&hpriv->dev_node, &hdev->fpriv_list);
- mutex_unlock(&hdev->fpriv_list_lock);
+ list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
return 0;
out_err:
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
filp->private_data = NULL;
put_pid(hpriv->taskpid);