summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c104
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h2
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c56
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c34
-rw-r--r--drivers/misc/habanalabs/goya/goya.c4
5 files changed, 141 insertions, 59 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ba0c854b2ed4..458cdf2ddab5 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -52,6 +52,24 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id);
}
+static void hw_sob_put(struct hl_hw_sob *hw_sob)
+{
+ if (hw_sob)
+ kref_put(&hw_sob->kref, hl_sob_reset);
+}
+
+static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
+{
+ if (hw_sob)
+ kref_put(&hw_sob->kref, hl_sob_reset_error);
+}
+
+static void hw_sob_get(struct hl_hw_sob *hw_sob)
+{
+ if (hw_sob)
+ kref_get(&hw_sob->kref);
+}
+
/**
* hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
* @sob_base: sob base id
@@ -122,31 +140,7 @@ static void hl_fence_release(struct kref *kref)
container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence);
- struct hl_device *hdev = hl_cs_cmpl->hdev;
- /* EBUSY means the CS was never submitted and hence we don't have
- * an attached hw_sob object that we should handle here
- */
- if (fence->error == -EBUSY)
- goto free;
-
- if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
- (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
- (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
-
- dev_dbg(hdev->dev,
- "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
- hl_cs_cmpl->cs_seq,
- hl_cs_cmpl->type,
- hl_cs_cmpl->hw_sob->sob_id,
- hl_cs_cmpl->sob_val);
-
- queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
-
- return;
- }
-
-free:
kfree(hl_cs_cmpl);
}
@@ -567,11 +561,46 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
}
}
+static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
+ struct hl_cs *cs,
+ struct hl_cs_compl *hl_cs_cmpl)
+{
+ /* Skip this handler if the cs wasn't submitted, to avoid putting
+ * the hw_sob twice, since this case already handled at this point,
+ * also skip if the hw_sob pointer wasn't set.
+ */
+ if (!hl_cs_cmpl->hw_sob || !cs->submitted)
+ return;
+
+ spin_lock(&hl_cs_cmpl->lock);
+
+ if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
+ (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
+ (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
+ dev_dbg(hdev->dev,
+ "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
+ hl_cs_cmpl->cs_seq,
+ hl_cs_cmpl->type,
+ hl_cs_cmpl->hw_sob->sob_id,
+ hl_cs_cmpl->sob_val);
+
+ hw_sob_put(hl_cs_cmpl->hw_sob);
+
+ if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+ hdev->asic_funcs->reset_sob_group(hdev,
+ hl_cs_cmpl->sob_group);
+ }
+
+ spin_unlock(&hl_cs_cmpl->lock);
+}
+
static void cs_do_release(struct kref *ref)
{
struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
struct hl_device *hdev = cs->ctx->hdev;
struct hl_cs_job *job, *tmp;
+ struct hl_cs_compl *hl_cs_cmpl =
+ container_of(cs->fence, struct hl_cs_compl, base_fence);
cs->completed = true;
@@ -587,8 +616,9 @@ static void cs_do_release(struct kref *ref)
complete_job(hdev, job);
if (!cs->submitted) {
- /* In case the wait for signal CS was submitted, the put occurs
- * in init_signal_wait_cs() or collective_wait_init_cs()
+ /*
+ * In case the wait for signal CS was submitted, the fence put
+ * occurs in init_signal_wait_cs() or collective_wait_init_cs()
* right before hanging on the PQ.
*/
if (cs->type == CS_TYPE_WAIT ||
@@ -661,6 +691,9 @@ out:
cs->fence->timestamp = ktime_get();
complete_all(&cs->fence->completion);
complete_multi_cs(hdev, cs);
+
+ cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
+
hl_fence_put(cs->fence);
kfree(cs->jobs_in_queue_cnt);
@@ -1630,7 +1663,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
- kref_get(&sob->kref);
+ hw_sob_get(sob);
/* check for wraparound */
if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
@@ -1640,7 +1673,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
* just incremented the refcount right before calling this
* function.
*/
- kref_put(&sob->kref, hl_sob_reset_error);
+ hw_sob_put_err(sob);
/*
* check the other sob value, if it still in use then fail
@@ -1797,6 +1830,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_fence *sig_fence = NULL;
struct hl_ctx *ctx = hpriv->ctx;
enum hl_queue_type q_type;
+ bool is_wait_cs = false;
struct hl_cs *cs;
u64 signal_seq;
int rc;
@@ -1849,6 +1883,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
}
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
+ is_wait_cs = true;
+
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
if (rc)
goto free_cs_chunk_array;
@@ -1894,9 +1930,9 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
if (rc) {
- if (cs_type == CS_TYPE_WAIT ||
- cs_type == CS_TYPE_COLLECTIVE_WAIT)
+ if (is_wait_cs)
hl_fence_put(sig_fence);
+
goto free_cs_chunk_array;
}
@@ -1928,7 +1964,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
- if (rc != -EAGAIN)
+ /* In case wait cs failed here, it means the signal cs
+ * already completed. we want to free all it's related objects
+ * but we don't want to fail the ioctl.
+ */
+ if (is_wait_cs)
+ rc = 0;
+ else if (rc != -EAGAIN)
dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
ctx->asid, cs->sequence, rc);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6d5154434637..bf327cb7ddd6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1244,7 +1244,7 @@ struct hl_asic_funcs {
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
u64 (*get_device_time)(struct hl_device *hdev);
- void (*collective_wait_init_cs)(struct hl_cs *cs);
+ int (*collective_wait_init_cs)(struct hl_cs *cs);
int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index f05a0dbd0990..2494bd6e9358 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -416,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev,
cs_cmpl->sob_val = prop->next_sob_val;
dev_dbg(hdev->dev,
- "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
- cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+ "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d, seq: %llu\n",
+ cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
+ cs_cmpl->cs_seq);
/* we set an EB since we must make sure all oeprations are done
* when sending the signal
@@ -430,7 +431,7 @@ static int init_signal_cs(struct hl_device *hdev,
return rc;
}
-static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
{
struct hl_cs_compl *signal_cs_cmpl;
@@ -449,10 +450,33 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+ /* check again if the signal cs already completed.
+ * if yes then don't send any wait cs since the hw_sob
+ * could be in reset already. if signal is not completed
+ * then get refcount to hw_sob to prevent resetting the sob
+ * while wait cs is not submitted.
+ * note that this check is protected by two locks,
+ * hw queue lock and completion object lock,
+ * and the same completion object lock also protects
+ * the hw_sob reset handler function.
+ * The hw_queue lock prevent out of sync of hw_sob
+ * refcount value, changed by signal/wait flows.
+ */
+ spin_lock(&signal_cs_cmpl->lock);
+
+ if (completion_done(&cs->signal_fence->completion)) {
+ spin_unlock(&signal_cs_cmpl->lock);
+ return -EINVAL;
+ }
+
+ kref_get(&cs_cmpl->hw_sob->kref);
+
+ spin_unlock(&signal_cs_cmpl->lock);
+
dev_dbg(hdev->dev,
- "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+ "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
- prop->base_mon_id, q_idx);
+ prop->base_mon_id, q_idx, cs->sequence);
wait_prop.data = (void *) job->patched_cb;
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -461,17 +485,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
wait_prop.mon_id = prop->base_mon_id;
wait_prop.q_idx = q_idx;
wait_prop.size = 0;
+
hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
- kref_get(&cs_cmpl->hw_sob->kref);
- /*
- * Must put the signal fence after the SOB refcnt increment so
- * the SOB refcnt won't turn 0 and reset the SOB before the
- * wait CS was submitted.
- */
mb();
hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
+
+ return 0;
}
/*
@@ -496,7 +517,7 @@ static int init_signal_wait_cs(struct hl_cs *cs)
if (cs->type & CS_TYPE_SIGNAL)
rc = init_signal_cs(hdev, job, cs_cmpl);
else if (cs->type & CS_TYPE_WAIT)
- init_wait_cs(hdev, cs, job, cs_cmpl);
+ rc = init_wait_cs(hdev, cs, job, cs_cmpl);
return rc;
}
@@ -571,12 +592,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
rc = init_signal_wait_cs(cs);
- if (rc) {
- dev_err(hdev->dev, "Failed to submit signal cs\n");
+ if (rc)
goto unroll_cq_resv;
- }
- } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
- hdev->asic_funcs->collective_wait_init_cs(cs);
+ } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
+ rc = hdev->asic_funcs->collective_wait_init_cs(cs);
+ if (rc)
+ goto unroll_cq_resv;
+ }
spin_lock(&hdev->cs_mirror_lock);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index a4b33b0b17d4..5b7a5692cd21 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1239,7 +1239,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
prop->collective_sob_id, cb_size, false);
}
-static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
{
struct hl_cs_compl *signal_cs_cmpl =
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
@@ -1261,6 +1261,29 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+ /* check again if the signal cs already completed.
+ * if yes then don't send any wait cs since the hw_sob
+ * could be in reset already. if signal is not completed
+ * then get refcount to hw_sob to prevent resetting the sob
+ * while wait cs is not submitted.
+ * note that this check is protected by two locks,
+ * hw queue lock and completion object lock,
+ * and the same completion object lock also protects
+ * the hw_sob reset handler function.
+ * The hw_queue lock prevent out of sync of hw_sob
+ * refcount value, changed by signal/wait flows.
+ */
+ spin_lock(&signal_cs_cmpl->lock);
+
+ if (completion_done(&cs->signal_fence->completion)) {
+ spin_unlock(&signal_cs_cmpl->lock);
+ return -EINVAL;
+ }
+ /* Increment kref since all slave queues are now waiting on it */
+ kref_get(&cs_cmpl->hw_sob->kref);
+
+ spin_unlock(&signal_cs_cmpl->lock);
+
/* Calculate the stream from collective master queue (1st job) */
job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
stream = job->hw_queue_id % 4;
@@ -1304,16 +1327,11 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
cprop->curr_sob_group_idx[stream], stream);
}
- /* Increment kref since all slave queues are now waiting on it */
- kref_get(&cs_cmpl->hw_sob->kref);
- /*
- * Must put the signal fence after the SOB refcnt increment so
- * the SOB refcnt won't turn 0 and reset the SOB before the
- * wait CS was submitted.
- */
mb();
hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
+
+ return 0;
}
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index f6251d8663b2..dd218a4bb62e 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5480,9 +5480,9 @@ u64 goya_get_device_time(struct hl_device *hdev)
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
}
-static void goya_collective_wait_init_cs(struct hl_cs *cs)
+static int goya_collective_wait_init_cs(struct hl_cs *cs)
{
-
+ return 0;
}
static int goya_collective_wait_create_jobs(struct hl_device *hdev,