summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/Makefile10
-rw-r--r--drivers/misc/habanalabs/common/asid.c6
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c8
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c473
-rw-r--r--drivers/misc/habanalabs/common/context.c33
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c43
-rw-r--r--drivers/misc/habanalabs/common/device.c46
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c157
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h109
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c25
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c51
-rw-r--r--drivers/misc/habanalabs/common/memory.c621
-rw-r--r--drivers/misc/habanalabs/common/mmu/Makefile2
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c (renamed from drivers/misc/habanalabs/common/mmu.c)124
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu_v1.c (renamed from drivers/misc/habanalabs/common/mmu_v1.c)4
-rw-r--r--drivers/misc/habanalabs/common/pci/Makefile2
-rw-r--r--drivers/misc/habanalabs/common/pci/pci.c (renamed from drivers/misc/habanalabs/common/pci.c)47
17 files changed, 1306 insertions, 455 deletions
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index eccd8c7dc62d..5d8b48288cf4 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -1,7 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+
+include $(src)/common/mmu/Makefile
+habanalabs-y += $(HL_COMMON_MMU_FILES)
+
+include $(src)/common/pci/Makefile
+habanalabs-y += $(HL_COMMON_PCI_FILES)
+
HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
- common/command_submission.o common/mmu.o common/mmu_v1.o \
- common/firmware_if.o common/pci.o
+ common/command_submission.o common/firmware_if.o
diff --git a/drivers/misc/habanalabs/common/asid.c b/drivers/misc/habanalabs/common/asid.c
index a2fdf31cf27c..ede04c032b6e 100644
--- a/drivers/misc/habanalabs/common/asid.c
+++ b/drivers/misc/habanalabs/common/asid.c
@@ -50,8 +50,10 @@ unsigned long hl_asid_alloc(struct hl_device *hdev)
void hl_asid_free(struct hl_device *hdev, unsigned long asid)
{
- if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
- "Invalid ASID %lu", asid))
+ if (asid == HL_KERNEL_ASID_ID || asid >= hdev->asic_prop.max_asid) {
+ dev_crit(hdev->dev, "Invalid ASID %lu", asid);
return;
+ }
+
clear_bit(asid, hdev->asid_bitmap);
}
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 6f6a904ab6ca..d9adb9a5e4d8 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -635,10 +635,12 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
cb_handle >>= PAGE_SHIFT;
cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
- /* hl_cb_get should never fail here so use kernel WARN */
- WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
- if (!cb)
+ /* hl_cb_get should never fail here */
+ if (!cb) {
+ dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
+ (u32) cb_handle);
goto destroy_cb;
+ }
return cb;
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b2b3d2b0f808..7bd4a03b3429 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -48,8 +48,8 @@ void hl_sob_reset_error(struct kref *ref)
struct hl_device *hdev = hw_sob->hdev;
dev_crit(hdev->dev,
- "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
- hw_sob->q_idx, hw_sob->sob_id);
+ "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+ hw_sob->q_idx, hw_sob->sob_id);
}
/**
@@ -149,9 +149,10 @@ void hl_fence_get(struct hl_fence *fence)
kref_get(&fence->refcount);
}
-static void hl_fence_init(struct hl_fence *fence)
+static void hl_fence_init(struct hl_fence *fence, u64 sequence)
{
kref_init(&fence->refcount);
+ fence->cs_sequence = sequence;
fence->error = 0;
fence->timestamp = ktime_set(0, 0);
init_completion(&fence->completion);
@@ -184,6 +185,28 @@ static void cs_job_put(struct hl_cs_job *job)
kref_put(&job->refcount, cs_job_do_release);
}
+bool cs_needs_completion(struct hl_cs *cs)
+{
+ /* In case this is a staged CS, only the last CS in sequence should
+ * get a completion, any non staged CS will always get a completion
+ */
+ if (cs->staged_cs && !cs->staged_last)
+ return false;
+
+ return true;
+}
+
+bool cs_needs_timeout(struct hl_cs *cs)
+{
+ /* In case this is a staged CS, only the first CS in sequence should
+ * get a timeout, any non staged CS will always get a timeout
+ */
+ if (cs->staged_cs && !cs->staged_first)
+ return false;
+
+ return true;
+}
+
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
/*
@@ -225,6 +248,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL;
+ parser.completion = cs_needs_completion(job->cs);
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
@@ -290,13 +314,153 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
hl_debugfs_remove_job(hdev, job);
- if (job->queue_type == QUEUE_TYPE_EXT ||
- job->queue_type == QUEUE_TYPE_HW)
+ /* We decrement reference only for a CS that gets completion
+ * because the reference was incremented only for this kind of CS
+ * right before it was scheduled.
+ *
+ * In staged submission, only the last CS marked as 'staged_last'
+ * gets completion, hence its release function will be called from here.
+ * As for all the rest CS's in the staged submission which do not get
+ * completion, their CS reference will be decremented by the
+ * 'staged_last' CS during the CS release flow.
+ * All relevant PQ CI counters will be incremented during the CS release
+ * flow by calling 'hl_hw_queue_update_ci'.
+ */
+ if (cs_needs_completion(cs) &&
+ (job->queue_type == QUEUE_TYPE_EXT ||
+ job->queue_type == QUEUE_TYPE_HW))
cs_put(cs);
cs_job_put(job);
}
+/*
+ * hl_staged_cs_find_first - locate the first CS in this staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs_seq: staged submission sequence number
+ *
+ * @note: This function must be called under 'hdev->cs_mirror_lock'
+ *
+ * Find and return a CS pointer with the given sequence
+ */
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
+{
+ struct hl_cs *cs;
+
+ list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
+ if (cs->staged_cs && cs->staged_first &&
+ cs->sequence == cs_seq)
+ return cs;
+
+ return NULL;
+}
+
+/*
+ * is_staged_cs_last_exists - returns true if the last CS in sequence exists
+ *
+ * @hdev: pointer to device structure
+ * @cs: staged submission member
+ *
+ */
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
+{
+ struct hl_cs *last_entry;
+
+ last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
+ staged_cs_node);
+
+ if (last_entry->staged_last)
+ return true;
+
+ return false;
+}
+
+/*
+ * staged_cs_get - get CS reference if this CS is a part of a staged CS
+ *
+ * @hdev: pointer to device structure
+ * @cs: current CS
+ * @cs_seq: staged submission sequence number
+ *
+ * Increment CS reference for every CS in this staged submission except for
+ * the CS which get completion.
+ */
+static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
+{
+ /* Only the last CS in this staged submission will get a completion.
+ * We must increment the reference for all other CS's in this
+ * staged submission.
+ * Once we get a completion we will release the whole staged submission.
+ */
+ if (!cs->staged_last)
+ cs_get(cs);
+}
+
+/*
+ * staged_cs_put - put a CS in case it is part of staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs: CS to put
+ *
+ * This function decrements a CS reference (for a non completion CS)
+ */
+static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
+{
+ /* We release all CS's in a staged submission except the last
+ * CS which we have never incremented its reference.
+ */
+ if (!cs_needs_completion(cs))
+ cs_put(cs);
+}
+
+static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
+{
+ bool next_entry_found = false;
+ struct hl_cs *next;
+
+ if (!cs_needs_timeout(cs))
+ return;
+
+ spin_lock(&hdev->cs_mirror_lock);
+
+ /* We need to handle tdr only once for the complete staged submission.
+ * Hence, we choose the CS that reaches this function first which is
+ * the CS marked as 'staged_last'.
+ */
+ if (cs->staged_cs && cs->staged_last)
+ cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+
+ spin_unlock(&hdev->cs_mirror_lock);
+
+ /* Don't cancel TDR in case this CS was timedout because we might be
+ * running from the TDR context
+ */
+ if (cs && (cs->timedout ||
+ hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+ return;
+
+ if (cs && cs->tdr_active)
+ cancel_delayed_work_sync(&cs->work_tdr);
+
+ spin_lock(&hdev->cs_mirror_lock);
+
+ /* queue TDR for next CS */
+ list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
+ if (cs_needs_timeout(next)) {
+ next_entry_found = true;
+ break;
+ }
+
+ if (next_entry_found && !next->tdr_active) {
+ next->tdr_active = true;
+ schedule_delayed_work(&next->work_tdr,
+ hdev->timeout_jiffies);
+ }
+
+ spin_unlock(&hdev->cs_mirror_lock);
+}
+
static void cs_do_release(struct kref *ref)
{
struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
@@ -346,36 +510,37 @@ static void cs_do_release(struct kref *ref)
hdev->asic_funcs->hw_queues_unlock(hdev);
- /* Need to update CI for internal queues */
- hl_int_hw_queue_update_ci(cs);
+ /* Need to update CI for all queue jobs that does not get completion */
+ hl_hw_queue_update_ci(cs);
/* remove CS from CS mirror list */
spin_lock(&hdev->cs_mirror_lock);
list_del_init(&cs->mirror_node);
spin_unlock(&hdev->cs_mirror_lock);
- /* Don't cancel TDR in case this CS was timedout because we might be
- * running from the TDR context
- */
- if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
- struct hl_cs *next;
-
- if (cs->tdr_active)
- cancel_delayed_work_sync(&cs->work_tdr);
+ cs_handle_tdr(hdev, cs);
- spin_lock(&hdev->cs_mirror_lock);
-
- /* queue TDR for next CS */
- next = list_first_entry_or_null(&hdev->cs_mirror_list,
- struct hl_cs, mirror_node);
+ if (cs->staged_cs) {
+ /* the completion CS decrements reference for the entire
+ * staged submission
+ */
+ if (cs->staged_last) {
+ struct hl_cs *staged_cs, *tmp;
- if (next && !next->tdr_active) {
- next->tdr_active = true;
- schedule_delayed_work(&next->work_tdr,
- hdev->timeout_jiffies);
+ list_for_each_entry_safe(staged_cs, tmp,
+ &cs->staged_cs_node, staged_cs_node)
+ staged_cs_put(hdev, staged_cs);
}
- spin_unlock(&hdev->cs_mirror_lock);
+ /* A staged CS will be a member in the list only after it
+ * was submitted. We used 'cs_mirror_lock' when inserting
+ * it to list so we will use it again when removing it
+ */
+ if (cs->submitted) {
+ spin_lock(&hdev->cs_mirror_lock);
+ list_del(&cs->staged_cs_node);
+ spin_unlock(&hdev->cs_mirror_lock);
+ }
}
out:
@@ -461,7 +626,8 @@ static void cs_timedout(struct work_struct *work)
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
- enum hl_cs_type cs_type, struct hl_cs **cs_new)
+ enum hl_cs_type cs_type, u64 user_sequence,
+ struct hl_cs **cs_new)
{
struct hl_cs_counters_atomic *cntr;
struct hl_fence *other = NULL;
@@ -478,6 +644,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
return -ENOMEM;
}
+ /* increment refcnt for context */
+ hl_ctx_get(hdev, ctx);
+
cs->ctx = ctx;
cs->submitted = false;
cs->completed = false;
@@ -507,6 +676,18 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
(hdev->asic_prop.max_pending_cs - 1)];
if (other && !completion_done(&other->completion)) {
+ /* If the following statement is true, it means we have reached
+ * a point in which only part of the staged submission was
+ * submitted and we don't have enough room in the 'cs_pending'
+ * array for the rest of the submission.
+ * This causes a deadlock because this CS will never be
+ * completed as it depends on future CS's for completion.
+ */
+ if (other->cs_sequence == user_sequence)
+ dev_crit_ratelimited(hdev->dev,
+ "Staged CS %llu deadlock due to lack of resources",
+ user_sequence);
+
dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
@@ -525,7 +706,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
}
/* init hl_fence */
- hl_fence_init(&cs_cmpl->base_fence);
+ hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
cs->sequence = cs_cmpl->cs_seq;
@@ -549,6 +730,7 @@ free_fence:
kfree(cs_cmpl);
free_cs:
kfree(cs);
+ hl_ctx_put(ctx);
return rc;
}
@@ -556,6 +738,8 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
{
struct hl_cs_job *job, *tmp;
+ staged_cs_put(hdev, cs);
+
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
complete_job(hdev, job);
}
@@ -565,7 +749,9 @@ void hl_cs_rollback_all(struct hl_device *hdev)
int i;
struct hl_cs *cs, *tmp;
- /* flush all completions */
+ /* flush all completions before iterating over the CS mirror list in
+ * order to avoid a race with the release functions
+ */
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]);
@@ -574,12 +760,24 @@ void hl_cs_rollback_all(struct hl_device *hdev)
cs_get(cs);
cs->aborted = true;
dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
- cs->ctx->asid, cs->sequence);
+ cs->ctx->asid, cs->sequence);
cs_rollback(hdev, cs);
cs_put(cs);
}
}
+void hl_pending_cb_list_flush(struct hl_ctx *ctx)
+{
+ struct hl_pending_cb *pending_cb, *tmp;
+
+ list_for_each_entry_safe(pending_cb, tmp,
+ &ctx->pending_cb_list, cb_node) {
+ list_del(&pending_cb->cb_node);
+ hl_cb_put(pending_cb->cb);
+ kfree(pending_cb);
+ }
+}
+
static void job_wq_completion(struct work_struct *work)
{
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -734,6 +932,12 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
return -EBUSY;
}
+ if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+ !hdev->supports_staged_submission) {
+ dev_err(hdev->dev, "staged submission not supported");
+ return -EPERM;
+ }
+
cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
@@ -805,10 +1009,38 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
return 0;
}
+static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
+ u64 sequence, u32 flags)
+{
+ if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
+ return 0;
+
+ cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
+ cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
+
+ if (cs->staged_first) {
+ /* Staged CS sequence is the first CS sequence */
+ INIT_LIST_HEAD(&cs->staged_cs_node);
+ cs->staged_sequence = cs->sequence;
+ } else {
+ /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
+ * under the cs_mirror_lock
+ */
+ cs->staged_sequence = sequence;
+ }
+
+ /* Increment CS reference if needed */
+ staged_cs_get(hdev, cs);
+
+ cs->staged_cs = true;
+
+ return 0;
+}
+
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
- u32 num_chunks, u64 *cs_seq, bool timestamp)
+ u32 num_chunks, u64 *cs_seq, u32 flags)
{
- bool int_queues_only = true;
+ bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_chunk *cs_chunk_array;
struct hl_cs_counters_atomic *cntr;
@@ -816,9 +1048,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
+ u64 user_sequence;
int rc, i;
cntr = &hdev->aggregated_cs_counters;
+ user_sequence = *cs_seq;
*cs_seq = ULLONG_MAX;
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
@@ -826,20 +1060,26 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
if (rc)
goto out;
- /* increment refcnt for context */
- hl_ctx_get(hdev, hpriv->ctx);
+ if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+ !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+ staged_mid = true;
+ else
+ staged_mid = false;
- rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
- if (rc) {
- hl_ctx_put(hpriv->ctx);
+ rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
+ staged_mid ? user_sequence : ULLONG_MAX, &cs);
+ if (rc)
goto free_cs_chunk_array;
- }
- cs->timestamp = !!timestamp;
+ cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
*cs_seq = cs->sequence;
hl_debugfs_add_cs(cs);
+ rc = cs_staged_submission(hdev, cs, user_sequence, flags);
+ if (rc)
+ goto free_cs_object;
+
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@@ -899,8 +1139,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
* Only increment for JOB on external or H/W queues, because
* only for those JOBs we get completion
*/
- if (job->queue_type == QUEUE_TYPE_EXT ||
- job->queue_type == QUEUE_TYPE_HW)
+ if (cs_needs_completion(cs) &&
+ (job->queue_type == QUEUE_TYPE_EXT ||
+ job->queue_type == QUEUE_TYPE_HW))
cs_get(cs);
hl_debugfs_add_job(hdev, job);
@@ -916,11 +1157,14 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
}
- if (int_queues_only) {
+ /* We allow a CS with any queue type combination as long as it does
+ * not get a completion
+ */
+ if (int_queues_only && cs_needs_completion(cs)) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
- "Reject CS %d.%llu because only internal queues jobs are present\n",
+ "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
cs->ctx->asid, cs->sequence);
rc = -EINVAL;
goto free_cs_object;
@@ -954,6 +1198,129 @@ out:
return rc;
}
+static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
+ struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
+{
+ struct hw_queue_properties *hw_queue_prop;
+ struct hl_cs_counters_atomic *cntr;
+ struct hl_cs_job *job;
+
+ hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
+ cntr = &hdev->aggregated_cs_counters;
+
+ job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
+ if (!job) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ return -ENOMEM;
+ }
+
+ job->id = 0;
+ job->cs = cs;
+ job->user_cb = cb;
+ atomic_inc(&job->user_cb->cs_cnt);
+ job->user_cb_size = size;
+ job->hw_queue_id = hw_queue_id;
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size;
+
+ /* increment refcount as for external queues we get completion */
+ cs_get(cs);
+
+ cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+ list_add_tail(&job->cs_node, &cs->job_list);
+
+ hl_debugfs_add_job(hdev, job);
+
+ return 0;
+}
+
+static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ struct hl_pending_cb *pending_cb, *tmp;
+ struct list_head local_cb_list;
+ struct hl_cs *cs;
+ struct hl_cb *cb;
+ u32 hw_queue_id;
+ u32 cb_size;
+ int process_list, rc = 0;
+
+ if (list_empty(&ctx->pending_cb_list))
+ return 0;
+
+ process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
+
+ /* Only a single thread is allowed to process the list */
+ if (!process_list)
+ return 0;
+
+ if (list_empty(&ctx->pending_cb_list))
+ goto free_pending_cb_token;
+
+ /* move all list elements to a local list */
+ INIT_LIST_HEAD(&local_cb_list);
+ spin_lock(&ctx->pending_cb_lock);
+ list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
+ cb_node)
+ list_move_tail(&pending_cb->cb_node, &local_cb_list);
+ spin_unlock(&ctx->pending_cb_lock);
+
+ rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+ if (rc)
+ goto add_list_elements;
+
+ hl_debugfs_add_cs(cs);
+
+ /* Iterate through pending cb list, create jobs and add to CS */
+ list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
+ cb = pending_cb->cb;
+ cb_size = pending_cb->cb_size;
+ hw_queue_id = pending_cb->hw_queue_id;
+
+ rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
+ hw_queue_id);
+ if (rc)
+ goto free_cs_object;
+ }
+
+ rc = hl_hw_queue_schedule_cs(cs);
+ if (rc) {
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to submit CS %d.%llu (%d)\n",
+ ctx->asid, cs->sequence, rc);
+ goto free_cs_object;
+ }
+
+ /* pending cb was scheduled successfully */
+ list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
+ list_del(&pending_cb->cb_node);
+ kfree(pending_cb);
+ }
+
+ cs_put(cs);
+
+ goto free_pending_cb_token;
+
+free_cs_object:
+ cs_rollback(hdev, cs);
+ cs_put(cs);
+add_list_elements:
+ spin_lock(&ctx->pending_cb_lock);
+ list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
+ cb_node)
+ list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
+ spin_unlock(&ctx->pending_cb_lock);
+free_pending_cb_token:
+ atomic_set(&ctx->thread_pending_cb_token, 1);
+
+ return rc;
+}
+
static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
u64 *cs_seq)
{
@@ -1003,7 +1370,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
- cs_seq, false);
+ cs_seq, 0);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1275,15 +1642,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
}
}
- /* increment refcnt for context */
- hl_ctx_get(hdev, ctx);
-
- rc = allocate_cs(hdev, ctx, cs_type, &cs);
+ rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
if (rc) {
if (cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT)
hl_fence_put(sig_fence);
- hl_ctx_put(ctx);
goto free_cs_chunk_array;
}
@@ -1346,7 +1709,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
enum hl_cs_type cs_type;
u64 cs_seq = ULONG_MAX;
void __user *chunks;
- u32 num_chunks;
+ u32 num_chunks, flags;
int rc;
rc = hl_cs_sanity_checks(hpriv, args);
@@ -1357,10 +1720,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
if (rc)
goto out;
+ rc = hl_submit_pending_cb(hpriv);
+ if (rc)
+ goto out;
+
cs_type = hl_cs_get_cs_type(args->in.cs_flags &
~HL_CS_FLAGS_FORCE_RESTORE);
chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
num_chunks = args->in.num_chunks_execute;
+ flags = args->in.cs_flags;
+
+ /* In case this is a staged CS, user should supply the CS sequence */
+ if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+ !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+ cs_seq = args->in.seq;
switch (cs_type) {
case CS_TYPE_SIGNAL:
@@ -1371,7 +1744,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
- args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+ args->in.cs_flags);
break;
}
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index f65e6559149b..cda871afb8f4 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -12,9 +12,14 @@
static void hl_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- u64 idle_mask = 0;
+ u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
int i;
+ /* Release all allocated pending cb's, those cb's were never
+ * scheduled so it is safe to release them here
+ */
+ hl_pending_cb_list_flush(ctx);
+
/*
* If we arrived here, there are no jobs waiting for this context
* on its queues so we can safely remove it.
@@ -50,12 +55,15 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
- &idle_mask, NULL)))
+ idle_mask,
+ HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
dev_notice(hdev->dev,
- "device not idle after user context is closed (0x%llx)\n",
- idle_mask);
+ "device not idle after user context is closed (0x%llx, 0x%llx)\n",
+ idle_mask[0], idle_mask[1]);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
+ hdev->asic_funcs->ctx_fini(ctx);
+ hl_vm_ctx_fini(ctx);
hl_mmu_ctx_fini(ctx);
}
}
@@ -140,8 +148,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
kref_init(&ctx->refcount);
ctx->cs_sequence = 1;
+ INIT_LIST_HEAD(&ctx->pending_cb_list);
+ spin_lock_init(&ctx->pending_cb_lock);
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1);
+ atomic_set(&ctx->thread_pending_cb_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct hl_fence *),
@@ -151,11 +162,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
- rc = hl_mmu_ctx_init(ctx);
+ rc = hl_vm_ctx_init(ctx);
if (rc) {
- dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+ dev_err(hdev->dev, "Failed to init mem ctx module\n");
+ rc = -ENOMEM;
goto err_free_cs_pending;
}
+
+ rc = hdev->asic_funcs->ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "ctx_init failed\n");
+ goto err_vm_ctx_fini;
+ }
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
@@ -194,7 +212,8 @@ err_cb_va_pool_fini:
err_vm_ctx_fini:
hl_vm_ctx_fini(ctx);
err_asid_free:
- hl_asid_free(hdev, ctx->asid);
+ if (ctx->asid != HL_KERNEL_ASID_ID)
+ hl_asid_free(hdev, ctx->asid);
err_free_cs_pending:
kfree(ctx->cs_pending);
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index cef716643979..df847a6d19f4 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct hl_ctx *ctx;
- struct hl_mmu_hop_info hops_info;
- u64 virt_addr = dev_entry->mmu_addr;
+ struct hl_mmu_hop_info hops_info = {0};
+ u64 virt_addr = dev_entry->mmu_addr, phys_addr;
int i;
if (!hdev->mmu_enable)
@@ -333,8 +333,19 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
- seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
- dev_entry->mmu_asid, dev_entry->mmu_addr);
+ phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+
+ if (hops_info.scrambled_vaddr &&
+ (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
+ seq_printf(s,
+ "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
+ dev_entry->mmu_asid, dev_entry->mmu_addr,
+ hops_info.scrambled_vaddr,
+ hops_info.unscrambled_paddr, phys_addr);
+ else
+ seq_printf(s,
+ "asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
+ dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
for (i = 0 ; i < hops_info.used_hops ; i++) {
seq_printf(s, "hop%d_addr: 0x%llx\n",
@@ -403,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
- hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+ hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
return 0;
}
@@ -865,6 +876,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
return count;
}
+static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+
+ hdev->asic_funcs->ack_protection_bits_errors(hdev);
+
+ return 0;
+}
+
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@@ -922,6 +944,11 @@ static const struct file_operations hl_stop_on_err_fops = {
.write = hl_stop_on_err_write
};
+static const struct file_operations hl_security_violations_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_security_violations_read
+};
+
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
@@ -1071,6 +1098,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_stop_on_err_fops);
+ debugfs_create_file("dump_security_violations",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_security_violations_fops);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
ent = debugfs_create_file(hl_debugfs_list[i].name,
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 69d04eca767f..15fcb5c31c4b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
case HL_MMAP_TYPE_CB:
return hl_cb_mmap(hpriv, vma);
+
+ case HL_MMAP_TYPE_BLOCK:
+ return hl_hw_block_mmap(hpriv, vma);
}
return -EINVAL;
@@ -373,7 +376,6 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->send_cpu_message_lock);
mutex_init(&hdev->debug_lock);
- mutex_init(&hdev->mmu_cache_lock);
INIT_LIST_HEAD(&hdev->cs_mirror_list);
spin_lock_init(&hdev->cs_mirror_lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
@@ -414,7 +416,6 @@ static void device_early_fini(struct hl_device *hdev)
{
int i;
- mutex_destroy(&hdev->mmu_cache_lock);
mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
@@ -1158,12 +1159,20 @@ kill_processes:
atomic_set(&hdev->in_reset, 0);
hdev->needs_reset = false;
- if (hard_reset)
+ dev_notice(hdev->dev, "Successfully finished resetting the device\n");
+
+ if (hard_reset) {
hdev->hard_reset_cnt++;
- else
- hdev->soft_reset_cnt++;
- dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+ /* After reset is done, we are ready to receive events from
+ * the F/W. We can't do it before because we will ignore events
+ * and if those events are fatal, we won't know about it and
+ * the device will be operational although it shouldn't be
+ */
+ hdev->asic_funcs->enable_events_from_fw(hdev);
+ } else {
+ hdev->soft_reset_cnt++;
+ }
return 0;
@@ -1314,11 +1323,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->compute_ctx = NULL;
+ hl_debugfs_add_device(hdev);
+
+ /* debugfs nodes are created in hl_ctx_init so it must be called after
+ * hl_debugfs_add_device.
+ */
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n");
kfree(hdev->kernel_ctx);
- goto mmu_fini;
+ goto remove_device_from_debugfs;
}
rc = hl_cb_pool_init(hdev);
@@ -1327,8 +1341,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx;
}
- hl_debugfs_add_device(hdev);
-
/*
* From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging.
@@ -1411,12 +1423,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->init_done = true;
+ /* After initialization is done, we are ready to receive events from
+ * the F/W. We can't do it before because we will ignore events and if
+ * those events are fatal, we won't know about it and the device will
+ * be operational although it shouldn't be
+ */
+ hdev->asic_funcs->enable_events_from_fw(hdev);
+
return 0;
release_ctx:
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
"kernel ctx is still alive on initialization failure\n");
+remove_device_from_debugfs:
+ hl_debugfs_remove_device(hdev);
mmu_fini:
hl_mmu_fini(hdev);
eq_fini:
@@ -1482,7 +1503,8 @@ void hl_device_fini(struct hl_device *hdev)
usleep_range(50, 200);
rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
if (ktime_compare(ktime_get(), timeout) > 0) {
- WARN(1, "Failed to remove device because reset function did not finish\n");
+ dev_crit(hdev->dev,
+ "Failed to remove device because reset function did not finish\n");
return;
}
}
@@ -1515,8 +1537,6 @@ void hl_device_fini(struct hl_device *hdev)
device_late_fini(hdev);
- hl_debugfs_remove_device(hdev);
-
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
@@ -1548,6 +1568,8 @@ void hl_device_fini(struct hl_device *hdev)
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n");
+ hl_debugfs_remove_device(hdev);
+
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c9a12980218a..09706c571e95 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result)
{
+ struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
- u32 tmp;
+ u32 tmp, expected_ack_val;
int rc = 0;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
+ /* set fence to a non valid value */
+ pkt->fence = UINT_MAX;
+
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
if (rc) {
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
goto out;
}
+ if (hdev->asic_prop.fw_app_security_map &
+ CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
+ expected_ack_val = queue->pi;
+ else
+ expected_ack_val = CPUCP_PACKET_FENCE_VAL;
+
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
- (tmp == CPUCP_PACKET_FENCE_VAL), 1000,
+ (tmp == expected_ack_val), 1000,
timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -279,8 +289,74 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
return rc;
}
+static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
+ u32 cpu_security_boot_status_reg)
+{
+ u32 err_val, security_val;
+
+ /* Some of the firmware status codes are deprecated in newer f/w
+ * versions. In those versions, the errors are reported
+ * in different registers. Therefore, we need to check those
+ * registers and print the exact errors. Moreover, there
+ * may be multiple errors, so we need to report on each error
+ * separately. Some of the error codes might indicate a state
+ * that is not an error per-se, but it is an error in production
+ * environment
+ */
+ err_val = RREG32(boot_err0_reg);
+ if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+ return 0;
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - DRAM initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - Thermal Sensor initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ dev_warn(hdev->dev,
+ "Device boot warning - Skipped DRAM initialization\n");
+
+ if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
+ if (hdev->bmc_enable)
+ dev_warn(hdev->dev,
+ "Device boot error - Skipped waiting for BMC\n");
+ else
+ err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ dev_err(hdev->dev,
+ "Device boot error - Serdes data from BMC not available\n");
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - NIC F/W initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+ dev_warn(hdev->dev,
+ "Device boot warning - security not ready\n");
+ if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+ dev_err(hdev->dev, "Device boot error - security failure\n");
+ if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+ dev_err(hdev->dev, "Device boot error - eFuse failure\n");
+ if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+ dev_err(hdev->dev, "Device boot error - PLL failure\n");
+
+ security_val = RREG32(cpu_security_boot_status_reg);
+ if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
+ dev_dbg(hdev->dev, "Device security status %#x\n",
+ security_val);
+
+ if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+ return -EIO;
+
+ return 0;
+}
+
int hl_fw_cpucp_info_get(struct hl_device *hdev,
- u32 cpu_security_boot_status_reg)
+ u32 cpu_security_boot_status_reg,
+ u32 boot_err0_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet pkt = {};
@@ -314,6 +390,12 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
goto out;
}
+ rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+ if (rc) {
+ dev_err(hdev->dev, "Errors in device boot\n");
+ goto out;
+ }
+
memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
sizeof(prop->cpucp_info));
@@ -483,58 +565,6 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
return rc;
}
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
- u32 cpu_security_boot_status_reg)
-{
- u32 err_val, security_val;
-
- /* Some of the firmware status codes are deprecated in newer f/w
- * versions. In those versions, the errors are reported
- * in different registers. Therefore, we need to check those
- * registers and print the exact errors. Moreover, there
- * may be multiple errors, so we need to report on each error
- * separately. Some of the error codes might indicate a state
- * that is not an error per-se, but it is an error in production
- * environment
- */
- err_val = RREG32(boot_err0_reg);
- if (!(err_val & CPU_BOOT_ERR0_ENABLED))
- return;
-
- if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
- dev_err(hdev->dev,
- "Device boot error - DRAM initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
- dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
- if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
- dev_err(hdev->dev,
- "Device boot error - Thermal Sensor initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
- dev_warn(hdev->dev,
- "Device boot warning - Skipped DRAM initialization\n");
- if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
- dev_warn(hdev->dev,
- "Device boot error - Skipped waiting for BMC\n");
- if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
- dev_err(hdev->dev,
- "Device boot error - Serdes data from BMC not available\n");
- if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
- dev_err(hdev->dev,
- "Device boot error - NIC F/W initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
- dev_warn(hdev->dev,
- "Device boot warning - security not ready\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
- dev_err(hdev->dev, "Device boot error - security failure\n");
- if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
- dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-
- security_val = RREG32(cpu_security_boot_status_reg);
- if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
- dev_dbg(hdev->dev, "Device security status %#x\n",
- security_val);
-}
-
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
{
/* Some of the status codes below are deprecated in newer f/w
@@ -659,6 +689,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
prop->fw_security_disabled = true;
}
+ dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
+ security_status);
+
dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
@@ -753,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (prop->fw_boot_cpu_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
+
+ dev_dbg(hdev->dev,
+ "Firmware boot CPU security status %#x\n",
+ prop->fw_boot_cpu_security_map);
}
dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
@@ -826,6 +863,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
+ rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+ if (rc)
+ return rc;
+
/* Clear reset status since we need to read again from app */
prop->hard_reset_done_by_fw = false;
@@ -837,6 +878,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (prop->fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
+
+ dev_dbg(hdev->dev,
+ "Firmware application CPU security status %#x\n",
+ prop->fw_app_security_map);
}
dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
@@ -844,6 +889,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
+ return 0;
+
out:
fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 41af347157e0..d933878b24d1 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -28,17 +28,18 @@
#define HL_NAME "habanalabs"
/* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:62] - Encode mmap type
+ * bits[63:61] - Encode mmap type
* bits[45:0] - mmap offset value
*
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
-#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK (0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
-#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_MASK (0x1FFFFFFFFFFFull >> PAGE_SHIFT)
#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK)
#define HL_PENDING_RESET_PER_SEC 10
@@ -408,6 +409,9 @@ struct hl_mmu_properties {
* @sync_stream_first_mon: first monitor available for sync stream use
* @first_available_user_sob: first sob available for the user
* @first_available_user_mon: first monitor available for the user
+ * @first_available_user_msix_interrupt: first available msix interrupt
+ * reserved for the user
+ * @first_available_cq: first available CQ for the user.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
@@ -416,6 +420,7 @@ struct hl_mmu_properties {
* from BOOT_DEV_STS0
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
+ * @num_functional_hbms: number of functional HBMs in each DCORE.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -468,18 +473,22 @@ struct asic_fixed_properties {
u16 sync_stream_first_mon;
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
+ u16 first_available_user_msix_interrupt;
+ u16 first_available_cq[HL_MAX_DCORES];
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
u8 fw_security_status_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
+ u8 num_functional_hbms;
};
/**
* struct hl_fence - software synchronization primitive
* @completion: fence is implemented using completion
* @refcount: refcount for this fence
+ * @cs_sequence: sequence of the corresponding command submission
* @error: mark this fence with error
* @timestamp: timestamp upon completion
*
@@ -487,6 +496,7 @@ struct asic_fixed_properties {
struct hl_fence {
struct completion completion;
struct kref refcount;
+ u64 cs_sequence;
int error;
ktime_t timestamp;
};
@@ -846,6 +856,19 @@ enum div_select_defs {
* @collective_wait_init_cs: Generate collective master/slave packets
* and place them in the relevant cs jobs
* @collective_wait_create_jobs: allocate collective wait cs jobs
+ * @scramble_addr: Routine to scramble the address prior of mapping it
+ * in the MMU.
+ * @descramble_addr: Routine to de-scramble the address prior of
+ * showing it to users.
+ * @ack_protection_bits_errors: ack and dump all security violations
+ * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ * also returns the size of the block if caller supplies
+ * a valid pointer for it
+ * @hw_block_mmap: mmap a HW block with a given id.
+ * @enable_events_from_fw: send interrupt to firmware to notify them the
+ * driver is ready to receive asynchronous events. This
+ * function should be called during the first init and
+ * after every hard-reset of the device
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -918,8 +941,8 @@ struct hl_asic_funcs {
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
- struct seq_file *s);
+ bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
+ u8 mask_len, struct seq_file *s);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -955,6 +978,14 @@ struct hl_asic_funcs {
int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
+ u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
+ u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
+ void (*ack_protection_bits_errors)(struct hl_device *hdev);
+ int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
+ u32 *block_size, u32 *block_id);
+ int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+ u32 block_id, u32 block_size);
+ void (*enable_events_from_fw)(struct hl_device *hdev);
};
@@ -1012,6 +1043,20 @@ struct hl_cs_counters_atomic {
};
/**
+ * struct hl_pending_cb - pending command buffer structure
+ * @cb_node: cb node in pending cb list
+ * @cb: command buffer to send in next submission
+ * @cb_size: command buffer size
+ * @hw_queue_id: destination queue id
+ */
+struct hl_pending_cb {
+ struct list_head cb_node;
+ struct hl_cb *cb;
+ u32 cb_size;
+ u32 hw_queue_id;
+};
+
+/**
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
* descriptor (hl_vm_phys_pg_list or hl_userptr).
@@ -1026,6 +1071,8 @@ struct hl_cs_counters_atomic {
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
* MMU hash or walking the PGT requires talking this lock.
* @debugfs_list: node in debugfs list of contexts.
+ * pending_cb_list: list of pending command buffers waiting to be sent upon
+ * next user command submission context.
* @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU.
@@ -1034,11 +1081,17 @@ struct hl_cs_counters_atomic {
* index to cs_pending array.
* @dram_default_hops: array that holds all hops addresses needed for default
* DRAM mapping.
+ * @pending_cb_lock: spinlock to protect pending cb list
* @cs_lock: spinlock to protect cs_sequence.
* @dram_phys_mem: amount of used physical DRAM memory by this context.
* @thread_ctx_switch_token: token to prevent multiple threads of the same
* context from running the context switch phase.
* Only a single thread should run it.
+ * @thread_pending_cb_token: token to prevent multiple threads from processing
+ * the pending CB list. Only a single thread should
+ * process the list since it is protected by a
+ * spinlock and we don't want to halt the entire
+ * command submission sequence.
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
* the context switch phase from moving to their
* execution phase before the context switch phase
@@ -1057,13 +1110,16 @@ struct hl_ctx {
struct mutex mem_hash_lock;
struct mutex mmu_lock;
struct list_head debugfs_list;
+ struct list_head pending_cb_list;
struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool;
u64 cs_sequence;
u64 *dram_default_hops;
+ spinlock_t pending_cb_lock;
spinlock_t cs_lock;
atomic64_t dram_phys_mem;
atomic_t thread_ctx_switch_token;
+ atomic_t thread_pending_cb_token;
u32 thread_ctx_switch_wait_token;
u32 asid;
u32 handle;
@@ -1124,8 +1180,11 @@ struct hl_userptr {
* @finish_work: workqueue object to run when CS is completed by H/W.
* @work_tdr: delayed work node for TDR.
* @mirror_node : node in device mirror list of command submissions.
+ * @staged_cs_node: node in the staged cs list.
* @debugfs_list: node in debugfs list of command submissions.
* @sequence: the sequence number of this CS.
+ * @staged_sequence: the sequence of the staged submission this CS is part of,
+ * relevant only if staged_cs is set.
* @type: CS_TYPE_*.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
@@ -1133,7 +1192,11 @@ struct hl_userptr {
* @tdr_active: true if TDR was activated for this CS (to prevent
* double TDR activation).
* @aborted: true if CS was aborted due to some device error.
- * @timestamp: true if a timestmap must be captured upon completion
+ * @timestamp: true if a timestmap must be captured upon completion.
+ * @staged_last: true if this is the last staged CS and needs completion.
+ * @staged_first: true if this is the first staged CS and we need to receive
+ * timeout for this CS.
+ * @staged_cs: true if this CS is part of a staged submission.
*/
struct hl_cs {
u16 *jobs_in_queue_cnt;
@@ -1146,8 +1209,10 @@ struct hl_cs {
struct work_struct finish_work;
struct delayed_work work_tdr;
struct list_head mirror_node;
+ struct list_head staged_cs_node;
struct list_head debugfs_list;
u64 sequence;
+ u64 staged_sequence;
enum hl_cs_type type;
u8 submitted;
u8 completed;
@@ -1155,6 +1220,9 @@ struct hl_cs {
u8 tdr_active;
u8 aborted;
u8 timestamp;
+ u8 staged_last;
+ u8 staged_first;
+ u8 staged_cs;
};
/**
@@ -1225,6 +1293,7 @@ struct hl_cs_job {
* MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
* have streams so the engine can't be busy by another
* stream.
+ * @completion: true if we need completion for this CS.
*/
struct hl_cs_parser {
struct hl_cb *user_cb;
@@ -1239,6 +1308,7 @@ struct hl_cs_parser {
u8 job_id;
u8 is_kernel_allocated_cb;
u8 contains_dma_pkt;
+ u8 completion;
};
/*
@@ -1688,12 +1758,20 @@ struct hl_mmu_per_hop_info {
* struct hl_mmu_hop_info - A structure describing the TLB hops and their
* hop-entries that were created in order to translate a virtual address to a
* physical one.
+ * @scrambled_vaddr: The value of the virtual address after scrambling. This
+ * address replaces the original virtual-address when mapped
+ * in the MMU tables.
+ * @unscrambled_paddr: The un-scrambled physical address.
* @hop_info: Array holding the per-hop information used for the translation.
* @used_hops: The number of hops used for the translation.
+ * @range_type: virtual address range type.
*/
struct hl_mmu_hop_info {
+ u64 scrambled_vaddr;
+ u64 unscrambled_paddr;
struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
u32 used_hops;
+ enum hl_va_range_type range_type;
};
/**
@@ -1766,7 +1844,6 @@ struct hl_mmu_funcs {
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
* @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
@@ -1844,6 +1921,7 @@ struct hl_mmu_funcs {
* user processes
* @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish
+ * @supports_staged_submission: true if staged submissions are supported
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1881,7 +1959,6 @@ struct hl_device {
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
struct hl_vm vm;
- struct mutex mmu_cache_lock;
struct device *hwmon_dev;
enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
@@ -1950,6 +2027,7 @@ struct hl_device {
u8 needs_reset;
u8 process_kill_trial_cnt;
u8 device_fini_pending;
+ u8 supports_staged_submission;
/* Parameters for bring-up */
u64 nic_ports_mask;
@@ -2067,7 +2145,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
int hl_hw_queue_schedule_cs(struct hl_cs *cs);
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
-void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+void hl_hw_queue_update_ci(struct hl_cs *cs);
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
@@ -2123,6 +2201,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
bool map_cb, u64 *handle);
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
u32 handle);
void hl_cb_put(struct hl_cb *cb);
@@ -2136,6 +2215,7 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
void hl_cs_rollback_all(struct hl_device *hdev);
+void hl_pending_cb_list_flush(struct hl_ctx *ctx);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
@@ -2143,6 +2223,10 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
void hl_fence_put(struct hl_fence *fence);
void hl_fence_get(struct hl_fence *fence);
void cs_get(struct hl_cs *cs);
+bool cs_needs_completion(struct hl_cs *cs);
+bool cs_needs_timeout(struct hl_cs *cs);
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -2184,6 +2268,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops);
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
@@ -2201,7 +2287,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_cpucp_info_get(struct hl_device *hdev,
- u32 cpu_security_boot_status_reg);
+ u32 cpu_security_boot_status_reg,
+ u32 boot_err0_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index d25892d61ec9..03af61cecd37 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -57,12 +57,23 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
hw_ip.sram_base_address = prop->sram_user_base_address;
- hw_ip.dram_base_address = prop->dram_user_base_address;
+ hw_ip.dram_base_address =
+ hdev->mmu_enable && prop->dram_supports_virtual_memory ?
+ prop->dmmu.start_addr : prop->dram_user_base_address;
hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
hw_ip.sram_size = prop->sram_size - sram_kmd_size;
- hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
+ if (hdev->mmu_enable)
+ hw_ip.dram_size =
+ DIV_ROUND_DOWN_ULL(prop->dram_size - dram_kmd_size,
+ prop->dram_page_size) *
+ prop->dram_page_size;
+ else
+ hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
if (hw_ip.dram_size > PAGE_SIZE)
hw_ip.dram_enabled = 1;
+ hw_ip.dram_page_size = prop->dram_page_size;
hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
@@ -79,6 +90,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
+ hw_ip.first_available_interrupt_id =
+ prop->first_available_user_msix_interrupt;
return copy_to_user(out, &hw_ip,
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}
@@ -132,9 +145,10 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
return -EINVAL;
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
- &hw_idle.busy_engines_mask_ext, NULL);
+ hw_idle.busy_engines_mask_ext,
+ HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
hw_idle.busy_engines_mask =
- lower_32_bits(hw_idle.busy_engines_mask_ext);
+ lower_32_bits(hw_idle.busy_engines_mask_ext[0]);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -383,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id];
-
+ sm_info.first_available_cq =
+ prop->first_available_cq[args->dcore_id];
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 76217258780a..0f335182267f 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -38,7 +38,7 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
return (abs(delta) - queue_len);
}
-void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+void hl_hw_queue_update_ci(struct hl_cs *cs)
{
struct hl_device *hdev = cs->ctx->hdev;
struct hl_hw_queue *q;
@@ -53,8 +53,13 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
return;
+ /* We must increment CI for every queue that will never get a
+ * completion, there are 2 scenarios this can happen:
+ * 1. All queues of a non completion CS will never get a completion.
+ * 2. Internal queues never gets completion.
+ */
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
- if (q->queue_type == QUEUE_TYPE_INT)
+ if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
}
}
@@ -292,6 +297,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
len = job->job_cb_size;
ptr = cb->bus_address;
+ /* Skip completion flow in case this is a non completion CS */
+ if (!cs_needs_completion(job->cs))
+ goto submit_bd;
+
cq_pkt.data = cpu_to_le32(
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) |
@@ -318,6 +327,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
cq->pi = hl_cq_inc_ptr(cq->pi);
+submit_bd:
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}
@@ -525,6 +535,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
struct hl_cs_job *job, *tmp;
struct hl_hw_queue *q;
int rc = 0, i, cq_cnt;
+ bool first_entry;
u32 max_queues;
cntr = &hdev->aggregated_cs_counters;
@@ -548,7 +559,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
switch (q->queue_type) {
case QUEUE_TYPE_EXT:
rc = ext_queue_sanity_checks(hdev, q,
- cs->jobs_in_queue_cnt[i], true);
+ cs->jobs_in_queue_cnt[i],
+ cs_needs_completion(cs) ?
+ true : false);
break;
case QUEUE_TYPE_INT:
rc = int_queue_sanity_checks(hdev, q,
@@ -583,12 +596,38 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
hdev->asic_funcs->collective_wait_init_cs(cs);
spin_lock(&hdev->cs_mirror_lock);
+
+ /* Verify staged CS exists and add to the staged list */
+ if (cs->staged_cs && !cs->staged_first) {
+ struct hl_cs *staged_cs;
+
+ staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+ if (!staged_cs) {
+ dev_err(hdev->dev,
+ "Cannot find staged submission sequence %llu",
+ cs->staged_sequence);
+ rc = -EINVAL;
+ goto unlock_cs_mirror;
+ }
+
+ if (is_staged_cs_last_exists(hdev, staged_cs)) {
+ dev_err(hdev->dev,
+ "Staged submission sequence %llu already submitted",
+ cs->staged_sequence);
+ rc = -EINVAL;
+ goto unlock_cs_mirror;
+ }
+
+ list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
+ }
+
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
/* Queue TDR if the CS is the first entry and if timeout is wanted */
+ first_entry = list_first_entry(&hdev->cs_mirror_list,
+ struct hl_cs, mirror_node) == cs;
if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
- (list_first_entry(&hdev->cs_mirror_list,
- struct hl_cs, mirror_node) == cs)) {
+ first_entry && cs_needs_timeout(cs)) {
cs->tdr_active = true;
schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
@@ -623,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
goto out;
+unlock_cs_mirror:
+ spin_unlock(&hdev->cs_mirror_lock);
unroll_cq_resv:
q = &hdev->kernel_queues[0];
for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 245c0159159f..1f5910517b0e 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -14,6 +14,9 @@
#define HL_MMU_DEBUG 0
+/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
+#define DRAM_POOL_PAGE_SIZE SZ_8M
+
/*
* The va ranges in context object contain a list with the available chunks of
* device virtual memory.
@@ -38,15 +41,14 @@
*/
/*
- * alloc_device_memory - allocate device memory
- *
- * @ctx : current context
- * @args : host parameters containing the requested size
- * @ret_handle : result handle
+ * alloc_device_memory() - allocate device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
+ * @ret_handle: result handle.
*
* This function does the following:
- * - Allocate the requested size rounded up to 'dram_page_size' pages
- * - Return unique handle
+ * - Allocate the requested size rounded up to 'dram_page_size' pages.
+ * - Return unique handle for later map/unmap/free.
*/
static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
u32 *ret_handle)
@@ -55,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i;
- u32 num_curr_pgs, page_size, page_shift;
+ u32 num_curr_pgs, page_size;
int handle, rc;
bool contiguous;
num_curr_pgs = 0;
page_size = hdev->asic_prop.dram_page_size;
- page_shift = __ffs(page_size);
- num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
- total_size = num_pgs << page_shift;
+ num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
+ total_size = num_pgs * page_size;
if (!total_size) {
dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
@@ -182,17 +183,17 @@ pages_pack_err:
return rc;
}
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
+/**
+ * dma_map_host_va() - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @p_userptr: pointer to result userptr structure.
*
* This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
+ * - Allocate userptr structure.
+ * - Pin the given host memory using the userptr structure.
+ * - Perform DMA mapping to have the DMA addresses of the pages.
*/
static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr **p_userptr)
@@ -236,14 +237,14 @@ userptr_err:
return rc;
}
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
+/**
+ * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @userptr: userptr to free.
*
* This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
+ * - Unpins the physical pages.
+ * - Frees the userptr structure.
*/
static void dma_unmap_host_va(struct hl_device *hdev,
struct hl_userptr *userptr)
@@ -252,14 +253,13 @@ static void dma_unmap_host_va(struct hl_device *hdev,
kfree(userptr);
}
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref : pointer to reference object
+/**
+ * dram_pg_pool_do_release() - free DRAM pages pool
+ * @ref: pointer to reference object.
*
* This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
+ * - Frees the idr structure of physical pages handles.
+ * - Frees the generic pool of DRAM physical pages.
*/
static void dram_pg_pool_do_release(struct kref *ref)
{
@@ -274,15 +274,15 @@ static void dram_pg_pool_do_release(struct kref *ref)
gen_pool_destroy(vm->dram_pg_pool);
}
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
+/**
+ * free_phys_pg_pack() - free physical page pack.
+ * @hdev: habanalabs device structure.
+ * @phys_pg_pack: physical page pack to free.
*
* This function does the following:
* - For DRAM memory only, iterate over the pack and free each physical block
- * structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
+ * structure by returning it to the general pool.
+ * - Free the hl_vm_phys_pg_pack structure.
*/
static void free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -313,20 +313,20 @@ static void free_phys_pg_pack(struct hl_device *hdev,
kfree(phys_pg_pack);
}
-/*
- * free_device_memory - free device memory
- *
- * @ctx : current context
- * @handle : handle of the memory chunk to free
+/**
+ * free_device_memory() - free device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
*
* This function does the following:
- * - Free the device memory related to the given handle
+ * - Free the device memory related to the given handle.
*/
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u32 handle = args->free.handle;
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
@@ -361,16 +361,15 @@ static int free_device_memory(struct hl_ctx *ctx, u32 handle)
return 0;
}
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to free
+/**
+ * clear_va_list_locked() - free virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to free.
*
* This function does the following:
- * - Iterate over the list and free each virtual addresses block
+ * - Iterate over the list and free each virtual addresses block.
*
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
*/
static void clear_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
@@ -383,16 +382,15 @@ static void clear_va_list_locked(struct hl_device *hdev,
}
}
-/*
- * print_va_list_locked - print virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to print
+/**
+ * print_va_list_locked() - print virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to print.
*
* This function does the following:
- * - Iterate over the list and print each virtual addresses block
+ * - Iterate over the list and print each virtual addresses block.
*
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
*/
static void print_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
@@ -409,18 +407,17 @@ static void print_va_list_locked(struct hl_device *hdev,
#endif
}
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @va_block : virtual block to merge with adjacent blocks
+/**
+ * merge_va_blocks_locked() - merge a virtual block if possible.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @va_block: virtual block to merge with adjacent blocks.
*
* This function does the following:
* - Merge the given blocks with the adjacent blocks if their virtual ranges
- * create a contiguous virtual range
+ * create a contiguous virtual range.
*
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
*/
static void merge_va_blocks_locked(struct hl_device *hdev,
struct list_head *va_list, struct hl_vm_va_block *va_block)
@@ -445,19 +442,18 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
}
}
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
+/**
+ * add_va_block_locked() - add a virtual block to the virtual addresses list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
+ * - Add the given block to the virtual blocks list and merge with other blocks
+ * if a contiguous virtual block can be created.
*
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
*/
static int add_va_block_locked(struct hl_device *hdev,
struct list_head *va_list, u64 start, u64 end)
@@ -501,16 +497,15 @@ static int add_va_block_locked(struct hl_device *hdev,
return 0;
}
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
+/**
+ * add_va_block() - wrapper for add_va_block_locked.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
*/
static inline int add_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 start, u64 end)
@@ -524,8 +519,9 @@ static inline int add_va_block(struct hl_device *hdev,
return rc;
}
-/*
+/**
* get_va_block() - get a virtual block for the given size and alignment.
+ *
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
@@ -534,33 +530,51 @@ static inline int add_va_block(struct hl_device *hdev,
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
- * given size and alignment.
+ * given size, hint address and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
-static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
- u64 size, u64 hint_addr, u32 va_block_align)
+static u64 get_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range,
+ u64 size, u64 hint_addr, u32 va_block_align)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
- u64 valid_start, valid_size, prev_start, prev_end, align_mask,
- res_valid_start = 0, res_valid_size = 0;
+ u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
+ align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
bool add_prev = false;
+ bool is_align_pow_2 = is_power_of_2(va_range->page_size);
+
+ if (is_align_pow_2)
+ align_mask = ~((u64)va_block_align - 1);
+ else
+ /*
+ * with non-power-of-2 range we work only with page granularity
+ * and the start address is page aligned,
+ * so no need for alignment checking.
+ */
+ size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
+ va_range->page_size;
- align_mask = ~((u64)va_block_align - 1);
+ tmp_hint_addr = hint_addr;
- /* check if hint_addr is aligned */
- if (hint_addr & (va_block_align - 1))
+ /* Check if we need to ignore hint address */
+ if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
+ (!is_align_pow_2 &&
+ do_div(tmp_hint_addr, va_range->page_size))) {
+ dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
+ hint_addr);
hint_addr = 0;
+ }
mutex_lock(&va_range->lock);
print_va_list_locked(hdev, &va_range->list);
list_for_each_entry(va_block, &va_range->list, node) {
- /* calc the first possible aligned addr */
+ /* Calc the first possible aligned addr */
valid_start = va_block->start;
- if (valid_start & (va_block_align - 1)) {
+ if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
valid_start &= align_mask;
valid_start += va_block_align;
if (valid_start > va_block->end)
@@ -568,35 +582,41 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
}
valid_size = va_block->end - valid_start;
+ if (valid_size < size)
+ continue;
- if (valid_size >= size &&
- (!new_va_block || valid_size < res_valid_size)) {
+ /* Pick the minimal length block which has the required size */
+ if (!new_va_block || (valid_size < reserved_valid_size)) {
new_va_block = va_block;
- res_valid_start = valid_start;
- res_valid_size = valid_size;
+ reserved_valid_start = valid_start;
+ reserved_valid_size = valid_size;
}
if (hint_addr && hint_addr >= valid_start &&
- ((hint_addr + size) <= va_block->end)) {
+ (hint_addr + size) <= va_block->end) {
new_va_block = va_block;
- res_valid_start = hint_addr;
- res_valid_size = valid_size;
+ reserved_valid_start = hint_addr;
+ reserved_valid_size = valid_size;
break;
}
}
if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n",
- size);
+ size);
goto out;
}
- if (res_valid_start > new_va_block->start) {
+ /*
+ * Check if there is some leftover range due to reserving the new
+ * va block, then return it to the main virtual addresses list.
+ */
+ if (reserved_valid_start > new_va_block->start) {
prev_start = new_va_block->start;
- prev_end = res_valid_start - 1;
+ prev_end = reserved_valid_start - 1;
- new_va_block->start = res_valid_start;
- new_va_block->size = res_valid_size;
+ new_va_block->start = reserved_valid_start;
+ new_va_block->size = reserved_valid_size;
add_prev = true;
}
@@ -617,7 +637,7 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
out:
mutex_unlock(&va_range->lock);
- return res_valid_start;
+ return reserved_valid_start;
}
/*
@@ -644,9 +664,9 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
/**
* hl_get_va_range_type() - get va_range type for the given address and size.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @type: returned va_range type
+ * @address: the start address of the area we want to validate.
+ * @size: the size in bytes of the area we want to validate.
+ * @type: returned va_range type.
*
* Return: true if the area is inside a valid range, false otherwise.
*/
@@ -667,16 +687,15 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
return -EINVAL;
}
-/*
- * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block
- *
+/**
+ * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
* @hdev: pointer to the habanalabs device structure
- * @ctx: current context
- * @start: start virtual address
- * @end: end virtual address
+ * @ctx: pointer to the context structure.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
*/
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
u64 start_addr, u64 size)
@@ -701,11 +720,10 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg : the SG list
- * @dma_addr : pointer to DMA address to return
+/**
+ * get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
*
* Calculate the number of consecutive pages described by the SG list. Take the
* offset of the address in the first page, add to it the length and round it up
@@ -719,17 +737,17 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
}
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- * memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
+/**
+ * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
+ * memory
+ * @ctx: pointer to the context structure.
+ * @userptr: userptr to initialize from.
+ * @pphys_pg_pack: result pointer.
*
* This function does the following:
- * - Pin the physical pages related to the given virtual block
+ * - Pin the physical pages related to the given virtual block.
* - Create a physical page pack from the physical pages related to the given
- * virtual block
+ * virtual block.
*/
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
@@ -821,16 +839,16 @@ page_pack_arr_mem_err:
return rc;
}
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
+/**
+ * map_phys_pg_pack() - maps the physical page pack..
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to map from.
+ * @phys_pg_pack: the pack of physical pages to map to.
*
* This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
+ * - Maps each chunk of virtual memory to matching physical chunk.
+ * - Stores number of successful mappings in the given argument.
+ * - Returns 0 on success, error code otherwise.
*/
static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -875,11 +893,11 @@ err:
return rc;
}
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
+/**
+ * unmap_phys_pg_pack() - unmaps the physical page pack.
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to unmap.
+ * @phys_pg_pack: the pack of physical pages to unmap.
*/
static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -913,7 +931,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
}
static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *paddr)
+ u64 *paddr)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
@@ -936,19 +954,18 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
return 0;
}
-/*
- * map_device_va - map the given memory
- *
- * @ctx : current context
- * @args : host parameters with handle/host virtual address
- * @device_addr : pointer to result device virtual address
+/**
+ * map_device_va() - map the given memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with handle/host virtual address.
+ * @device_addr: pointer to result device virtual address.
*
* This function does the following:
* - If given a physical device memory handle, map to a device virtual block
- * and return the start address of this block
+ * and return the start address of this block.
* - If given a host virtual address and size, find the related physical pages,
* map a device virtual block to this pages and return the start address of
- * this block
+ * this block.
*/
static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
u64 *device_addr)
@@ -1034,7 +1051,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
hint_addr = args->map_device.hint_addr;
- /* DRAM VA alignment is the same as the DRAM page size */
+ /* DRAM VA alignment is the same as the MMU page size */
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
va_block_align = hdev->asic_prop.dmmu.page_size;
}
@@ -1125,24 +1142,26 @@ init_page_pack_err:
return rc;
}
-/*
- * unmap_device_va - unmap the given device virtual address
- *
- * @ctx : current context
- * @vaddr : device virtual address to unmap
- * @ctx_free : true if in context free flow, false otherwise.
+/**
+ * unmap_device_va() - unmap the given device virtual address.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with device virtual address to unmap.
+ * @ctx_free: true if in context free flow, false otherwise.
*
* This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
+ * - unmap the physical pages related to the given virtual address.
+ * - return the device virtual block to the virtual block list.
*/
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+ bool ctx_free)
{
struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
struct hl_vm_hash_node *hnode = NULL;
struct hl_userptr *userptr = NULL;
struct hl_va_range *va_range;
+ u64 vaddr = args->unmap.device_virt_addr;
enum vm_type_t *vm_type;
bool is_userptr;
int rc = 0;
@@ -1201,7 +1220,13 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
goto mapping_cnt_err;
}
- vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+ if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
+ vaddr = prop->dram_base_address +
+ DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
+ phys_pg_pack->page_size) *
+ phys_pg_pack->page_size;
+ else
+ vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&ctx->mmu_lock);
@@ -1264,12 +1289,90 @@ vm_type_err:
return rc;
}
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
+ u32 *size)
+{
+ u32 block_id = 0;
+ int rc;
+
+ rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+
+ *handle = block_id | HL_MMAP_TYPE_BLOCK;
+ *handle <<= PAGE_SHIFT;
+
+ return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+ struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+ hl_ctx_put(ctx);
+ vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+ .close = hw_block_vm_close
+};
+
+/**
+ * hl_hw_block_mmap() - mmap a hw block to user.
+ * @hpriv: pointer to the private data of the fd
+ * @vma: pointer to vm_area_struct of the process
+ *
+ * Driver increments context reference for every HW block mapped in order
+ * to prevent user from closing FD without unmapping first
+ */
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 block_id, block_size;
+ int rc;
+
+ /* We use the page offset to hold the block id and thus we need to clear
+ * it before doing the mmap itself
+ */
+ block_id = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+
+ /* Driver only allows mapping of a complete HW block */
+ block_size = vma->vm_end - vma->vm_start;
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#else
+ if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#endif
+ dev_err(hdev->dev,
+ "user pointer is invalid - 0x%lx\n",
+ vma->vm_start);
+
+ return -EINVAL;
+ }
+
+ vma->vm_ops = &hw_block_vm_ops;
+ vma->vm_private_data = hpriv->ctx;
+
+ hl_ctx_get(hdev, hpriv->ctx);
+
+ rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
+ if (rc) {
+ hl_ctx_put(hpriv->ctx);
+ return rc;
+ }
+
+ vma->vm_pgoff = block_id;
+
+ return 0;
+}
+
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
+ u64 block_handle, device_addr = 0;
+ u32 handle = 0, block_size;
int rc;
switch (args->in.op) {
@@ -1292,7 +1395,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
break;
case HL_MEM_OP_FREE:
- rc = free_device_memory(ctx, args->in.free.handle);
+ rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
@@ -1301,7 +1404,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
rc = 0;
} else {
rc = get_paddr_from_handle(ctx, &args->in,
- &device_addr);
+ &device_addr);
}
memset(args, 0, sizeof(*args));
@@ -1312,6 +1415,13 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
rc = 0;
break;
+ case HL_MEM_OP_MAP_BLOCK:
+ rc = map_block(hdev, args->in.map_block.block_addr,
+ &block_handle, &block_size);
+ args->out.block_handle = block_handle;
+ args->out.block_size = block_size;
+ break;
+
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -ENOTTY;
@@ -1328,8 +1438,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
+ u64 block_handle, device_addr = 0;
+ u32 handle = 0, block_size;
int rc;
if (!hl_device_operational(hdev, &status)) {
@@ -1400,7 +1510,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
- rc = free_device_memory(ctx, args->in.free.handle);
+ rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
@@ -1411,8 +1521,14 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
break;
case HL_MEM_OP_UNMAP:
- rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
- false);
+ rc = unmap_device_va(ctx, &args->in, false);
+ break;
+
+ case HL_MEM_OP_MAP_BLOCK:
+ rc = map_block(hdev, args->in.map_block.block_addr,
+ &block_handle, &block_size);
+ args->out.block_handle = block_handle;
+ args->out.block_size = block_size;
break;
default:
@@ -1473,16 +1589,16 @@ destroy_pages:
return rc;
}
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
+/**
+ * hl_pin_host_memory() - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @userptr: pointer to hl_userptr structure.
*
* This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
+ * - Pins the physical pages.
+ * - Create an SG list from those pages.
*/
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr *userptr)
@@ -1571,11 +1687,10 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
kfree(userptr->sgt);
}
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
+/**
+ * hl_userptr_delete_list() - clear userptr list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
*
* This function does the following:
* - Iterates over the list and unpins the host memory and frees the userptr
@@ -1594,12 +1709,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
INIT_LIST_HEAD(userptr_list);
}
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
- * @userptr : pointer to userptr to check
+/**
+ * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
+ * @userptr: pointer to userptr to check.
*
* This function does the following:
* - Iterates over the list and checks if the given userptr is in it, means is
@@ -1617,12 +1731,12 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
return false;
}
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
+/**
+ * va_range_init() - initialize virtual addresses range.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_range: pointer to the range to initialize.
+ * @start: range start address.
+ * @end: range end address.
*
* This function does the following:
* - Initializes the virtual addresses list of the given range with the given
@@ -1635,15 +1749,21 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
INIT_LIST_HEAD(&va_range->list);
- /* PAGE_SIZE alignment */
+ /*
+ * PAGE_SIZE alignment
+ * it is the callers responsibility to align the addresses if the
+ * page size is not a power of 2
+ */
- if (start & (PAGE_SIZE - 1)) {
- start &= PAGE_MASK;
- start += PAGE_SIZE;
- }
+ if (is_power_of_2(page_size)) {
+ if (start & (PAGE_SIZE - 1)) {
+ start &= PAGE_MASK;
+ start += PAGE_SIZE;
+ }
- if (end & (PAGE_SIZE - 1))
- end &= PAGE_MASK;
+ if (end & (PAGE_SIZE - 1))
+ end &= PAGE_MASK;
+ }
if (start >= end) {
dev_err(hdev->dev, "too small vm range for va list\n");
@@ -1664,13 +1784,13 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
return 0;
}
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
+/**
+ * va_range_fini() - clear a virtual addresses range.
+ * @hdev: pointer to the habanalabs structure.
+ * va_range: pointer to virtual addresses rang.e
*
* This function does the following:
- * - Frees the virtual addresses block list and its lock
+ * - Frees the virtual addresses block list and its lock.
*/
static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
{
@@ -1682,22 +1802,22 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
kfree(va_range);
}
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
+/**
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context.
+ * @ctx: pointer to the habanalabs context structure.
* @host_range_start: host virtual addresses range start.
* @host_range_end: host virtual addresses range end.
* @host_huge_range_start: host virtual addresses range start for memory
- * allocated with huge pages.
+ * allocated with huge pages.
* @host_huge_range_end: host virtual addresses range end for memory allocated
* with huge pages.
* @dram_range_start: dram virtual addresses range start.
* @dram_range_end: dram virtual addresses range end.
*
* This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
+ * - MMU for context.
+ * - Virtual address to area descriptor hashtable.
+ * - Virtual block list of available virtual memory.
*/
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
u64 host_range_start,
@@ -1818,7 +1938,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
dram_range_start = prop->dmmu.start_addr;
dram_range_end = prop->dmmu.end_addr;
- dram_page_size = prop->dmmu.page_size;
+ dram_page_size = prop->dram_page_size ?
+ prop->dram_page_size : prop->dmmu.page_size;
host_range_start = prop->pmmu.start_addr;
host_range_end = prop->pmmu.end_addr;
host_page_size = prop->pmmu.page_size;
@@ -1832,15 +1953,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
dram_range_start, dram_range_end, dram_page_size);
}
-/*
- * hl_vm_ctx_fini - virtual memory teardown of context
- *
- * @ctx : pointer to the habanalabs context structure
+/**
+ * hl_vm_ctx_fini() - virtual memory teardown of context.
+ * @ctx: pointer to the habanalabs context structure.
*
* This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
+ * - Virtual block list of available virtual memory.
+ * - Virtual address to area descriptor hashtable.
+ * - MMU for context.
*
* In addition this function does the following:
* - Unmaps the existing hashtable nodes if the hashtable is not empty. The
@@ -1859,9 +1979,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
struct hl_vm_phys_pg_pack *phys_pg_list;
struct hl_vm_hash_node *hnode;
struct hlist_node *tmp_node;
+ struct hl_mem_in args;
int i;
- if (!ctx->hdev->mmu_enable)
+ if (!hdev->mmu_enable)
return;
hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
@@ -1878,13 +1999,18 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
dev_dbg(hdev->dev,
"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
hnode->vaddr, ctx->asid);
- unmap_device_va(ctx, hnode->vaddr, true);
+ args.unmap.device_virt_addr = hnode->vaddr;
+ unmap_device_va(ctx, &args, true);
}
+ mutex_lock(&ctx->mmu_lock);
+
/* invalidate the cache once after the unmapping loop */
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+ mutex_unlock(&ctx->mmu_lock);
+
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
@@ -1911,19 +2037,19 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
* because the user notifies us on allocations. If the user is no more,
* all DRAM is available
*/
- if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
- atomic64_set(&ctx->hdev->dram_used_mem, 0);
+ if (ctx->asid != HL_KERNEL_ASID_ID &&
+ !hdev->asic_prop.dram_supports_virtual_memory)
+ atomic64_set(&hdev->dram_used_mem, 0);
}
-/*
- * hl_vm_init - initialize virtual memory module
- *
- * @hdev : pointer to the habanalabs device structure
+/**
+ * hl_vm_init() - initialize virtual memory module.
+ * @hdev: pointer to the habanalabs device structure.
*
* This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
+ * - MMU module.
+ * - DRAM physical pages pool of 2MB.
+ * - Idr for device memory allocation handles.
*/
int hl_vm_init(struct hl_device *hdev)
{
@@ -1931,7 +2057,13 @@ int hl_vm_init(struct hl_device *hdev)
struct hl_vm *vm = &hdev->vm;
int rc;
- vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+ if (is_power_of_2(prop->dram_page_size))
+ vm->dram_pg_pool =
+ gen_pool_create(__ffs(prop->dram_page_size), -1);
+ else
+ vm->dram_pg_pool =
+ gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
+
if (!vm->dram_pg_pool) {
dev_err(hdev->dev, "Failed to create dram page pool\n");
return -ENOMEM;
@@ -1964,15 +2096,14 @@ pool_add_err:
return rc;
}
-/*
- * hl_vm_fini - virtual memory module teardown
- *
- * @hdev : pointer to the habanalabs device structure
+/**
+ * hl_vm_fini() - virtual memory module teardown.
+ * @hdev: pointer to the habanalabs device structure.
*
* This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
+ * - Idr for device memory allocation handles.
+ * - DRAM physical pages pool of 2MB.
+ * - MMU module.
*/
void hl_vm_fini(struct hl_device *hdev)
{
diff --git a/drivers/misc/habanalabs/common/mmu/Makefile b/drivers/misc/habanalabs/common/mmu/Makefile
new file mode 100644
index 000000000000..d852c3874658
--- /dev/null
+++ b/drivers/misc/habanalabs/common/mmu/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 28a4638741d8..71703a32350f 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -7,7 +7,7 @@
#include <linux/slab.h>
-#include "habanalabs.h"
+#include "../habanalabs.h"
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
@@ -166,7 +166,6 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
mmu_prop = &prop->pmmu;
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
-
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
@@ -174,11 +173,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
- dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
- page_size, mmu_prop->page_size >> 10);
+ /*
+ * MMU page size may differ from DRAM page size.
+ * In such case work with the DRAM page size and let the MMU
+ * scrambling routine to handle this mismatch when
+ * calculating the address to remove from the MMU page table
+ */
+ if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
+ real_page_size = prop->dram_page_size;
+ } else {
+ dev_err(hdev->dev,
+ "page size of %u is not %uKB aligned, can't unmap\n",
+ page_size, mmu_prop->page_size >> 10);
- return -EFAULT;
+ return -EFAULT;
+ }
}
npages = page_size / real_page_size;
@@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
*/
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
+ } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
+ (prop->dram_page_size < mmu_prop->page_size)) {
+ /*
+ * MMU page size may differ from DRAM page size.
+ * In such case work with the DRAM page size and let the MMU
+ * scrambling routine handle this mismatch when calculating
+ * the address to place in the MMU page table. (in that case
+ * also make sure that the dram_page_size smaller than the
+ * mmu page size)
+ */
+ real_page_size = prop->dram_page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB aligned, can't map\n",
@@ -261,9 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
return -EFAULT;
}
- WARN_ONCE((phys_addr & (real_page_size - 1)),
- "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
- phys_addr, real_page_size);
+ /*
+ * Verify that the phys and virt addresses are aligned with the
+ * MMU page size (in dram this means checking the address and MMU
+ * after scrambling)
+ */
+ if ((is_dram_addr &&
+ ((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
+ (mmu_prop->page_size - 1)) ||
+ (hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
+ (mmu_prop->page_size - 1)))) ||
+ (!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
+ (virt_addr & (real_page_size - 1)))))
+ dev_crit(hdev->dev,
+ "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
+ phys_addr, virt_addr, real_page_size);
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
@@ -444,19 +476,53 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
}
+static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
+ struct hl_mmu_hop_info *hops,
+ u64 *phys_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
+ u32 hop0_shift_off;
+ void *p;
+
+ /* last hop holds the phys address and flags */
+ if (hops->unscrambled_paddr)
+ tmp_phys_addr = hops->unscrambled_paddr;
+ else
+ tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
+
+ if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
+ p = &prop->pmmu_huge;
+ else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
+ p = &prop->pmmu;
+ else /* HL_VA_RANGE_TYPE_DRAM */
+ p = &prop->dmmu;
+
+ /*
+ * find the correct hop shift field in hl_mmu_properties structure
+ * in order to determine the right maks for the page offset.
+ */
+ hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
+ p = (char *)p + hop0_shift_off;
+ p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
+ hop_shift = *(u64 *)p;
+ offset_mask = (1ull << hop_shift) - 1;
+ addr_mask = ~(offset_mask);
+ *phys_addr = (tmp_phys_addr & addr_mask) |
+ (virt_addr & offset_mask);
+}
+
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
{
struct hl_mmu_hop_info hops;
- u64 tmp_addr;
int rc;
rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
if (rc)
return rc;
- /* last hop holds the phys address and flags */
- tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
- *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
+ hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops, phys_addr);
return 0;
}
@@ -473,6 +539,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (!hdev->mmu_enable)
return -EOPNOTSUPP;
+ hops->scrambled_vaddr = virt_addr; /* assume no scrambling */
+
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
@@ -491,6 +559,11 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
mutex_unlock(&ctx->mmu_lock);
+ /* add page offset to physical address */
+ if (hops->unscrambled_paddr)
+ hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
+ &hops->unscrambled_paddr);
+
return rc;
}
@@ -512,3 +585,28 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
return 0;
}
+
+/**
+ * hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to scramble.
+ *
+ * Return: The scrambled address.
+ */
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
+{
+ return addr;
+}
+
+/**
+ * hl_mmu_descramble_addr() - The generic mmu address descrambling
+ * routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to descramble.
+ *
+ * Return: The un-scrambled address.
+ */
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
+{
+ return addr;
+}
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 06d8a44dd5d4..c5e93ff32586 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -5,8 +5,8 @@
* All Rights Reserved.
*/
-#include "habanalabs.h"
-#include "../include/hw_ip/mmu/mmu_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/mmu/mmu_general.h"
#include <linux/slab.h>
diff --git a/drivers/misc/habanalabs/common/pci/Makefile b/drivers/misc/habanalabs/common/pci/Makefile
new file mode 100644
index 000000000000..dc922a686683
--- /dev/null
+++ b/drivers/misc/habanalabs/common/pci/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_PCI_FILES := common/pci/pci.o
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index b4725e6101f6..b799f9258fb0 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -5,8 +5,8 @@
* All Rights Reserved.
*/
-#include "habanalabs.h"
-#include "../include/hw_ip/pci/pci_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/pci/pci_general.h"
#include <linux/pci.h>
@@ -308,40 +308,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
}
/**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- int rc;
-
- /* set DMA mask */
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
- if (rc) {
- dev_err(hdev->dev,
- "Failed to set pci dma mask to %d bits, error %d\n",
- hdev->dma_mask, rc);
- return rc;
- }
-
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
- if (rc) {
- dev_err(hdev->dev,
- "Failed to set pci consistent dma mask to %d bits, error %d\n",
- hdev->dma_mask, rc);
- return rc;
- }
-
- return 0;
-}
-
-/**
* hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure.
*
@@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev)
goto unmap_pci_bars;
}
- rc = hl_pci_set_dma_mask(hdev);
- if (rc)
+ rc = dma_set_mask_and_coherent(&pdev->dev,
+ DMA_BIT_MASK(hdev->dma_mask));
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to set dma mask to %d bits, error %d\n",
+ hdev->dma_mask, rc);
goto unmap_pci_bars;
+ }
return 0;