diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-26 11:03:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-26 11:03:17 -0700 |
commit | 8e3a3249502d8ff92d73d827fb41dd44c5a16f76 (patch) | |
tree | 033e620f253013f37426c51fdb1699e65dc9770d /drivers/misc/habanalabs/common | |
parent | 90035c28f17d59be660b9992757d09853ab203ec (diff) | |
parent | e2cb6b891ad2b8caa9131e3be70f45243df82a80 (diff) |
Merge tag 'char-misc-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc driver updates from Greg KH:
"Here is the big set of various smaller driver subsystem updates for
5.13-rc1.
Major bits in here are:
- habanalabs driver updates
- hwtracing driver updates
- interconnect driver updates
- mhi driver updates
- extcon driver updates
- fpga driver updates
- new binder features added
- nvmem driver updates
- phy driver updates
- soundwire driver updates
- smaller misc and char driver fixes and updates.
- bluetooth driver bugfix that maintainer wanted to go through this
tree.
All of these have been in linux-next with no reported issues"
* tag 'char-misc-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (330 commits)
bluetooth: eliminate the potential race condition when removing the HCI controller
coresight: etm-perf: Fix define build issue when built as module
phy: Revert "phy: ti: j721e-wiz: add missing of_node_put"
phy: ti: j721e-wiz: Add missing include linux/slab.h
phy: phy-twl4030-usb: Fix possible use-after-free in twl4030_usb_remove()
stm class: Use correct UUID APIs
intel_th: pci: Add Alder Lake-M support
intel_th: pci: Add Rocket Lake CPU support
intel_th: Consistency and off-by-one fix
intel_th: Constify attribute_group structs
intel_th: Constify all drvdata references
stm class: Remove an unused function
habanalabs/gaudi: Fix uninitialized return code rc when read size is zero
greybus: es2: fix kernel-doc warnings
mei: me: add Alder Lake P device id.
dw-xdata-pcie: Update outdated info and improve text format
dw-xdata-pcie: Fix documentation build warns
fbdev: zero-fill colormap in fbcmap.c
firmware: qcom-scm: Fix QCOM_SCM configuration
speakup: i18n: Switch to kmemdup_nul() in spk_msg_set()
...
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c | 12 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 368 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/context.c | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c | 224 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 221 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c | 238 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 184 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 28 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_ioctl.c | 35 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 10 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/irq.c | 56 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 182 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu/mmu.c | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/pci/pci.c | 52 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/sysfs.c | 33 |
15 files changed, 1291 insertions, 369 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index d9adb9a5e4d8..719168c980a4 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -181,7 +181,7 @@ static void cb_release(struct kref *ref) static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { - struct hl_cb *cb; + struct hl_cb *cb = NULL; u32 cb_offset; void *p; @@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, * the kernel's copy. Hence, we must never sleep in this code section * and must use GFP_ATOMIC for all memory allocations. */ - if (ctx_id == HL_KERNEL_ASID_ID) + if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) cb = kzalloc(sizeof(*cb), GFP_ATOMIC); - else + + if (!cb) cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) @@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } else if (ctx_id == HL_KERNEL_ASID_ID) { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); + if (!p) + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + cb_size, &cb->bus_address, GFP_KERNEL); } else { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, @@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + if (rc < 0) + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL); spin_unlock(&mgr->cb_lock); if (rc < 0) { diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 7bd4a03b3429..ff8791a651fd 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) return 0; } +static void sob_reset_work(struct work_struct *work) +{ + struct hl_cs_compl *hl_cs_cmpl = + container_of(work, struct hl_cs_compl, sob_reset_work); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* + * A signal CS can get completion while the corresponding wait + * for signal CS is on its way to the PQ. The wait for signal CS + * will get stuck if the signal CS incremented the SOB to its + * max value and there are no pending (submitted) waits on this + * SOB. + * We do the following to void this situation: + * 1. The wait for signal CS must get a ref for the signal CS as + * soon as possible in cs_ioctl_signal_wait() and put it + * before being submitted to the PQ but after it incremented + * the SOB refcnt in init_signal_wait_cs(). + * 2. Signal/Wait for signal CS will decrement the SOB refcnt + * here. + * These two measures guarantee that the wait for signal CS will + * reset the SOB upon completion rather than the signal CS and + * hence the above scenario is avoided. + */ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); + + kfree(hl_cs_cmpl); +} + static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = @@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref) hl_cs_cmpl->hw_sob->sob_id, hl_cs_cmpl->sob_val); - /* - * A signal CS can get completion while the corresponding wait - * for signal CS is on its way to the PQ. The wait for signal CS - * will get stuck if the signal CS incremented the SOB to its - * max value and there are no pending (submitted) waits on this - * SOB. - * We do the following to void this situation: - * 1. The wait for signal CS must get a ref for the signal CS as - * soon as possible in cs_ioctl_signal_wait() and put it - * before being submitted to the PQ but after it incremented - * the SOB refcnt in init_signal_wait_cs(). - * 2. Signal/Wait for signal CS will decrement the SOB refcnt - * here. - * These two measures guarantee that the wait for signal CS will - * reset the SOB upon completion rather than the signal CS and - * hence the above scenario is avoided. - */ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work); - if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->reset_sob_group(hdev, - hl_cs_cmpl->sob_group); + return; } free: @@ -454,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) if (next_entry_found && !next->tdr_active) { next->tdr_active = true; - schedule_delayed_work(&next->work_tdr, - hdev->timeout_jiffies); + schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); } spin_unlock(&hdev->cs_mirror_lock); @@ -492,24 +504,6 @@ static void cs_do_release(struct kref *ref) goto out; } - hdev->asic_funcs->hw_queues_lock(hdev); - - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); - - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } - - hdev->asic_funcs->hw_queues_unlock(hdev); - /* Need to update CI for all queue jobs that does not get completion */ hl_hw_queue_update_ci(cs); @@ -620,14 +614,14 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); if (hdev->reset_on_lockup) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); else hdev->needs_reset = true; } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_cs_type cs_type, u64 user_sequence, - struct hl_cs **cs_new) + struct hl_cs **cs_new, u32 flags, u32 timeout) { struct hl_cs_counters_atomic *cntr; struct hl_fence *other = NULL; @@ -638,6 +632,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -651,12 +648,17 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->submitted = false; cs->completed = false; cs->type = cs_type; + cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); + cs->timeout_jiffies = timeout; INIT_LIST_HEAD(&cs->job_list); INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); kref_init(&cs->refcount); spin_lock_init(&cs->job_lock); cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL); + if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -664,9 +666,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_cs; } + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + + if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + rc = -ENOMEM; + goto free_cs_cmpl; + } + cs_cmpl->hdev = hdev; cs_cmpl->type = cs->type; spin_lock_init(&cs_cmpl->lock); + INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work); cs->fence = &cs_cmpl->base_fence; spin_lock(&ctx->cs_lock); @@ -696,15 +712,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_fence; } - cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, - sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); - if (!cs->jobs_in_queue_cnt) { - atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); - atomic64_inc(&cntr->out_of_mem_drop_cnt); - rc = -ENOMEM; - goto free_fence; - } - /* init hl_fence */ hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); @@ -727,6 +734,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, free_fence: spin_unlock(&ctx->cs_lock); + kfree(cs->jobs_in_queue_cnt); +free_cs_cmpl: kfree(cs_cmpl); free_cs: kfree(cs); @@ -749,6 +758,8 @@ void hl_cs_rollback_all(struct hl_device *hdev) int i; struct hl_cs *cs, *tmp; + flush_workqueue(hdev->sob_reset_wq); + /* flush all completions before iterating over the CS mirror list in * order to avoid a race with the release functions */ @@ -778,6 +789,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx) } } +static void +wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) +{ + struct hl_user_pending_interrupt *pend; + + spin_lock(&interrupt->wait_list_lock); + list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } + spin_unlock(&interrupt->wait_list_lock); +} + +void hl_release_pending_user_interrupts(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_user_interrupt *interrupt; + int i; + + if (!prop->user_interrupt_count) + return; + + /* We iterate through the user interrupt requests and waking up all + * user threads waiting for interrupt completion. We iterate the + * list under a lock, this is why all user threads, once awake, + * will wait on the same lock and will release the waiting object upon + * unlock. + */ + + for (i = 0 ; i < prop->user_interrupt_count ; i++) { + interrupt = &hdev->user_interrupt[i]; + wake_pending_user_interrupt_threads(interrupt); + } + + interrupt = &hdev->common_user_interrupt; + wake_pending_user_interrupt_threads(interrupt); +} + static void job_wq_completion(struct work_struct *work) { struct hl_cs_job *job = container_of(work, struct hl_cs_job, @@ -890,6 +939,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, job = kzalloc(sizeof(*job), GFP_ATOMIC); if (!job) + job = kzalloc(sizeof(*job), GFP_KERNEL); + + if (!job) return NULL; kref_init(&job->refcount); @@ -991,6 +1043,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); + if (!*cs_chunk_array) + *cs_chunk_array = kmalloc_array(num_chunks, + sizeof(**cs_chunk_array), GFP_KERNEL); if (!*cs_chunk_array) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1038,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, } static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, - u32 num_chunks, u64 *cs_seq, u32 flags) + u32 num_chunks, u64 *cs_seq, u32 flags, + u32 timeout) { bool staged_mid, int_queues_only = true; struct hl_device *hdev = hpriv->hdev; @@ -1067,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, staged_mid = false; rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, - staged_mid ? user_sequence : ULLONG_MAX, &cs); + staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, + timeout); if (rc) goto free_cs_chunk_array; - cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); *cs_seq = cs->sequence; hl_debugfs_add_cs(cs); @@ -1269,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv) list_move_tail(&pending_cb->cb_node, &local_cb_list); spin_unlock(&ctx->pending_cb_lock); - rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0, + hdev->timeout_jiffies); if (rc) goto add_list_elements; @@ -1370,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, rc = 0; } else { rc = cs_ioctl_default(hpriv, chunks, num_chunks, - cs_seq, 0); + cs_seq, 0, hdev->timeout_jiffies); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -1419,7 +1476,7 @@ wait_again: out: if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return rc; } @@ -1445,6 +1502,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); + if (!signal_seq_arr) + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), + GFP_KERNEL); if (!signal_seq_arr) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1536,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, void __user *chunks, u32 num_chunks, - u64 *cs_seq, bool timestamp) + u64 *cs_seq, u32 flags, u32 timeout) { struct hl_cs_chunk *cs_chunk_array, *chunk; struct hw_queue_properties *hw_queue_prop; @@ -1642,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } } - rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); if (rc) { if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) @@ -1650,8 +1711,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, goto free_cs_chunk_array; } - cs->timestamp = !!timestamp; - /* * Save the signal CS fence for later initialization right before * hanging the wait CS on the queue. @@ -1709,7 +1768,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) enum hl_cs_type cs_type; u64 cs_seq = ULONG_MAX; void __user *chunks; - u32 num_chunks, flags; + u32 num_chunks, flags, timeout; int rc; rc = hl_cs_sanity_checks(hpriv, args); @@ -1735,16 +1794,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) cs_seq = args->in.seq; + timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT + ? msecs_to_jiffies(args->in.timeout * 1000) + : hpriv->hdev->timeout_jiffies; + switch (cs_type) { case CS_TYPE_SIGNAL: case CS_TYPE_WAIT: case CS_TYPE_COLLECTIVE_WAIT: rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, - &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP); + &cs_seq, args->in.cs_flags, timeout); break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, - args->in.cs_flags); + args->in.cs_flags, timeout); break; } @@ -1818,7 +1881,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, return rc; } -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; @@ -1873,3 +1936,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } + +static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + u32 timeout_us, u64 user_address, + u32 target_value, u16 interrupt_offset, + enum hl_cs_wait_status *status) +{ + struct hl_user_pending_interrupt *pend; + struct hl_user_interrupt *interrupt; + unsigned long timeout; + long completion_rc; + u32 completion_value; + int rc = 0; + + if (timeout_us == U32_MAX) + timeout = timeout_us; + else + timeout = usecs_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + pend = kmalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + hl_ctx_put(ctx); + return -ENOMEM; + } + + hl_fence_init(&pend->fence, ULONG_MAX); + + if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID) + interrupt = &hdev->common_user_interrupt; + else + interrupt = &hdev->user_interrupt[interrupt_offset]; + + spin_lock(&interrupt->wait_list_lock); + if (!hl_device_operational(hdev, NULL)) { + rc = -EPERM; + goto unlock_and_free_fence; + } + + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + dev_err(hdev->dev, + "Failed to copy completion value from user\n"); + rc = -EFAULT; + goto unlock_and_free_fence; + } + + if (completion_value >= target_value) + *status = CS_WAIT_STATUS_COMPLETED; + else + *status = CS_WAIT_STATUS_BUSY; + + if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) + goto unlock_and_free_fence; + + /* Add pending user interrupt to relevant list for the interrupt + * handler to monitor + */ + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + spin_unlock(&interrupt->wait_list_lock); + +wait_again: + /* Wait for interrupt handler to signal completion */ + completion_rc = + wait_for_completion_interruptible_timeout( + &pend->fence.completion, timeout); + + /* If timeout did not expire we need to perform the comparison. + * If comparison fails, keep waiting until timeout expires + */ + if (completion_rc > 0) { + if (copy_from_user(&completion_value, + u64_to_user_ptr(user_address), 4)) { + dev_err(hdev->dev, + "Failed to copy completion value from user\n"); + rc = -EFAULT; + goto remove_pending_user_interrupt; + } + + if (completion_value >= target_value) { + *status = CS_WAIT_STATUS_COMPLETED; + } else { + timeout -= jiffies_to_usecs(completion_rc); + goto wait_again; + } + } else { + *status = CS_WAIT_STATUS_BUSY; + } + +remove_pending_user_interrupt: + spin_lock(&interrupt->wait_list_lock); + list_del(&pend->wait_list_node); + +unlock_and_free_fence: + spin_unlock(&interrupt->wait_list_lock); + kfree(pend); + hl_ctx_put(ctx); + + return rc; +} + +static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt; + struct hl_device *hdev = hpriv->hdev; + struct asic_fixed_properties *prop; + union hl_wait_cs_args *args = data; + enum hl_cs_wait_status status; + int rc; + + prop = &hdev->asic_prop; + + if (!prop->user_interrupt_count) { + dev_err(hdev->dev, "no user interrupts allowed"); + return -EPERM; + } + + interrupt_id = + FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); + + first_interrupt = prop->first_available_user_msix_interrupt; + last_interrupt = prop->first_available_user_msix_interrupt + + prop->user_interrupt_count - 1; + + if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) && + interrupt_id != HL_COMMON_USER_INTERRUPT_ID) { + dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); + return -EINVAL; + } + + if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID) + interrupt_offset = HL_COMMON_USER_INTERRUPT_ID; + else + interrupt_offset = interrupt_id - first_interrupt; + + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, + args->in.interrupt_timeout_us, args->in.addr, + args->in.target, interrupt_offset, &status); + + memset(args, 0, sizeof(*args)); + + if (rc) { + dev_err_ratelimited(hdev->dev, + "interrupt_wait_ioctl failed (%d)\n", rc); + + return rc; + } + + switch (status) { + case CS_WAIT_STATUS_COMPLETED: + args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + break; + case CS_WAIT_STATUS_BUSY: + default: + args->out.status = HL_WAIT_CS_STATUS_BUSY; + break; + } + + return 0; +} + +int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_wait_cs_args *args = data; + u32 flags = args->in.flags; + int rc; + + if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) + rc = hl_interrupt_wait_ioctl(hpriv, data); + else + rc = hl_cs_wait_ioctl(hpriv, data); + + return rc; +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index cda871afb8f4..62d705889ca8 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx) */ hl_pending_cb_list_flush(ctx); + /* Release all allocated HW block mapped list entries and destroy + * the mutex. + */ + hl_hw_block_mem_fini(ctx); + /* * If we arrived here, there are no jobs waiting for this context * on its queues so we can safely remove it. @@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->cs_pending) return -ENOMEM; + hl_hw_block_mem_init(ctx); + if (is_kernel_ctx) { ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ rc = hl_vm_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mem ctx module\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hdev->asic_funcs->ctx_init(ctx); @@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->asid) { dev_err(hdev->dev, "No free ASID, failed to create context\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hl_vm_ctx_init(ctx); @@ -214,7 +221,8 @@ err_vm_ctx_fini: err_asid_free: if (ctx->asid != HL_KERNEL_ASID_ID) hl_asid_free(hdev, ctx->asid); -err_free_cs_pending: +err_hw_block_mem_fini: + hl_hw_block_mem_fini(ctx); kfree(ctx->cs_pending); return rc; diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 9f19bee7b592..8381155578a0 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -9,8 +9,8 @@ #include "../include/hw_ip/mmu/mmu_general.h" #include <linux/pci.h> -#include <linux/debugfs.h> #include <linux/uaccess.h> +#include <linux/vmalloc.h> #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 @@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_vm_hw_block_list_node *lnode; struct hl_ctx *ctx; struct hl_vm *vm; struct hl_vm_hash_node *hnode; @@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data) } mutex_unlock(&ctx->mem_hash_lock); + if (ctx->asid != HL_KERNEL_ASID_ID && + !list_empty(&ctx->hw_block_mem_list)) { + seq_puts(s, "\nhw_block mappings:\n\n"); + seq_puts(s, " virtual address size HW block id\n"); + seq_puts(s, "-------------------------------------------\n"); + mutex_lock(&ctx->hw_block_list_lock); + list_for_each_entry(lnode, &ctx->hw_block_mem_list, + node) { + seq_printf(s, + " 0x%-14lx %-6u %-9u\n", + lnode->vaddr, lnode->size, lnode->id); + } + mutex_unlock(&ctx->hw_block_list_lock); + } + vm = &ctx->hdev->vm; spin_lock(&vm->idr_lock); @@ -441,21 +457,86 @@ out: return false; } -static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, - u64 *phys_addr) +static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr, + u32 size) { + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_start_addr, dram_end_addr; + + if (!hdev->mmu_enable) + return false; + + if (prop->dram_supports_virtual_memory) { + dram_start_addr = prop->dmmu.start_addr; + dram_end_addr = prop->dmmu.end_addr; + } else { + dram_start_addr = prop->dram_base_address; + dram_end_addr = prop->dram_end_address; + } + + if (hl_mem_area_inside_range(addr, size, dram_start_addr, + dram_end_addr)) + return true; + + if (hl_mem_area_inside_range(addr, size, prop->sram_base_address, + prop->sram_end_address)) + return true; + + return false; +} + +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, + u64 *phys_addr) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_ctx *ctx = hdev->compute_ctx; - int rc = 0; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + enum vm_type_t *vm_type; + bool valid = false; + u64 end_address; + u32 range_size; + int i, rc = 0; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + /* Verify address is mapped */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + userptr = hnode->ptr; + range_size = userptr->size; + } else { + phys_pg_pack = hnode->ptr; + range_size = phys_pg_pack->total_size; + } + + end_address = virt_addr + size; + if ((virt_addr >= hnode->vaddr) && + (end_address <= hnode->vaddr + range_size)) { + valid = true; + break; + } + } + mutex_unlock(&ctx->mem_hash_lock); + + if (!valid) { + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped\n", + virt_addr); + return -EINVAL; + } + rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr); if (rc) { - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); rc = -EINVAL; } @@ -467,10 +548,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; u64 addr = entry->addr; - u32 val; + bool user_address; + char tmp_buf[32]; ssize_t rc; + u32 val; if (atomic_read(&hdev->in_reset)) { dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); @@ -480,13 +562,14 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val); if (rc) { dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; @@ -503,6 +586,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; + bool user_address; u32 value; ssize_t rc; @@ -515,13 +599,14 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); + rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value); if (rc) { dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", value, addr); @@ -536,21 +621,28 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; u64 addr = entry->addr; - u64 val; + bool user_address; + char tmp_buf[32]; ssize_t rc; + u64 val; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } if (*ppos) return 0; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val); if (rc) { dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; @@ -567,20 +659,27 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; + bool user_address; u64 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value); if (rc) { dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", value, addr); @@ -590,6 +689,63 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, return count; } +static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + ssize_t rc; + u32 size; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &size); + if (rc) + return rc; + + if (!size) { + dev_err(hdev->dev, "DMA read failed. size can't be 0\n"); + return -EINVAL; + } + + if (size > SZ_128M) { + dev_err(hdev->dev, + "DMA read failed. size can't be larger than 128MB\n"); + return -EINVAL; + } + + if (!hl_is_device_internal_memory_va(hdev, addr, size)) { + dev_err(hdev->dev, + "DMA read failed. Invalid 0x%010llx + 0x%08x\n", + addr, size); + return -EINVAL; + } + + /* Free the previous allocation, if there was any */ + entry->blob_desc.size = 0; + vfree(entry->blob_desc.data); + + entry->blob_desc.data = vmalloc(size); + if (!entry->blob_desc.data) + return -ENOMEM; + + rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size, + entry->blob_desc.data); + if (rc) { + dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr); + vfree(entry->blob_desc.data); + entry->blob_desc.data = NULL; + return -EIO; + } + + entry->blob_desc.size = size; + + return count; +} + static ssize_t hl_get_power_state(struct file *f, char __user *buf, size_t count, loff_t *ppos) { @@ -871,7 +1027,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, hdev->stop_on_err = value ? 1 : 0; - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return count; } @@ -899,6 +1055,11 @@ static const struct file_operations hl_data64b_fops = { .write = hl_data_write64 }; +static const struct file_operations hl_dma_size_fops = { + .owner = THIS_MODULE, + .write = hl_dma_size_write +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1001,6 +1162,9 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!dev_entry->entry_arr) return; + dev_entry->blob_desc.size = 0; + dev_entry->blob_desc.data = NULL; + INIT_LIST_HEAD(&dev_entry->file_list); INIT_LIST_HEAD(&dev_entry->cb_list); INIT_LIST_HEAD(&dev_entry->cs_list); @@ -1103,6 +1267,17 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_security_violations_fops); + debugfs_create_file("dma_size", + 0200, + dev_entry->root, + dev_entry, + &hl_dma_size_fops); + + debugfs_create_blob("data_dma", + 0400, + dev_entry->root, + &dev_entry->blob_desc); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, @@ -1121,6 +1296,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev) debugfs_remove_recursive(entry->root); mutex_destroy(&entry->file_mutex); + + vfree(entry->blob_desc.data); + kfree(entry->entry_arr); } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 334009e83823..00e92b678828 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -70,6 +70,9 @@ static void hpriv_release(struct kref *ref) mutex_unlock(&hdev->fpriv_list_lock); kfree(hpriv); + + if (hdev->reset_upon_device_release) + hl_device_reset(hdev, 0); } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -77,9 +80,9 @@ void hl_hpriv_get(struct hl_fpriv *hpriv) kref_get(&hpriv->refcount); } -void hl_hpriv_put(struct hl_fpriv *hpriv) +int hl_hpriv_put(struct hl_fpriv *hpriv) { - kref_put(&hpriv->refcount, hpriv_release); + return kref_put(&hpriv->refcount, hpriv_release); } /* @@ -103,10 +106,17 @@ static int hl_device_release(struct inode *inode, struct file *filp) return 0; } - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + /* Each pending user interrupt holds the user's context, hence we + * must release them all before calling hl_ctx_mgr_fini(). + */ + hl_release_pending_user_interrupts(hpriv->hdev); + + hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); - hl_hpriv_put(hpriv); + if (!hl_hpriv_put(hpriv)) + dev_warn(hdev->dev, + "Device is still in use because there are live CS and/or memory mappings\n"); return 0; } @@ -283,7 +293,7 @@ static void device_hard_reset_pending(struct work_struct *work) struct hl_device *hdev = device_reset_work->hdev; int rc; - rc = hl_device_reset(hdev, true, true); + rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD); if ((rc == -EBUSY) && !hdev->device_fini_pending) { dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", @@ -311,11 +321,15 @@ static int device_early_init(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: goya_set_asic_funcs(hdev); - strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); + strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); break; case ASIC_GAUDI: gaudi_set_asic_funcs(hdev); - sprintf(hdev->asic_name, "GAUDI"); + strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); + break; + case ASIC_GAUDI_SEC: + gaudi_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -334,7 +348,7 @@ static int device_early_init(struct hl_device *hdev) if (hdev->asic_prop.completion_queues_count) { hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, sizeof(*hdev->cq_wq), - GFP_ATOMIC); + GFP_KERNEL); if (!hdev->cq_wq) { rc = -ENOMEM; goto asid_fini; @@ -358,24 +372,24 @@ static int device_early_init(struct hl_device *hdev) goto free_cq_wq; } - hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), - GFP_KERNEL); - if (!hdev->hl_chip_info) { + hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); + if (!hdev->sob_reset_wq) { + dev_err(hdev->dev, + "Failed to allocate SOB reset workqueue\n"); rc = -ENOMEM; goto free_eq_wq; } - hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, - sizeof(struct hl_device_idle_busy_ts), - (GFP_KERNEL | __GFP_ZERO)); - if (!hdev->idle_busy_ts_arr) { + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), + GFP_KERNEL); + if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_chip_info; + goto free_sob_reset_wq; } rc = hl_mmu_if_set_funcs(hdev); if (rc) - goto free_idle_busy_ts_arr; + goto free_chip_info; hl_cb_mgr_init(&hdev->kernel_cb_mgr); @@ -404,10 +418,10 @@ static int device_early_init(struct hl_device *hdev) free_cb_mgr: hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); -free_idle_busy_ts_arr: - kfree(hdev->idle_busy_ts_arr); free_chip_info: kfree(hdev->hl_chip_info); +free_sob_reset_wq: + destroy_workqueue(hdev->sob_reset_wq); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -441,9 +455,9 @@ static void device_early_fini(struct hl_device *hdev) hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); - kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); + destroy_workqueue(hdev->sob_reset_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -485,7 +499,7 @@ static void hl_device_heartbeat(struct work_struct *work) goto reschedule; dev_err(hdev->dev, "Device heartbeat failed!\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT); return; @@ -561,100 +575,24 @@ static void device_late_fini(struct hl_device *hdev) hdev->late_init_done = false; } -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +int hl_device_utilization(struct hl_device *hdev, u32 *utilization) { - struct hl_device_idle_busy_ts *ts; - ktime_t zero_ktime, curr = ktime_get(); - u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; - s64 period_us, last_start_us, last_end_us, last_busy_time_us, - total_busy_time_us = 0, total_busy_time_ms; - - zero_ktime = ktime_set(0, 0); - period_us = period_ms * USEC_PER_MSEC; - ts = &hdev->idle_busy_ts_arr[last_index]; - - /* check case that device is currently in idle */ - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && - !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - } - - while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { - /* Check if we are in last sample case. i.e. if the sample - * begun before the sampling period. This could be a real - * sample or 0 so need to handle both cases - */ - last_start_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - if (last_start_us > period_us) { - - /* First check two cases: - * 1. If the device is currently busy - * 2. If the device was idle during the whole sampling - * period - */ - - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { - /* Check if the device is currently busy */ - if (ktime_compare(ts->idle_to_busy_ts, - zero_ktime)) - return 100; - - /* We either didn't have any activity or we - * reached an entry which is 0. Either way, - * exit and return what was accumulated so far - */ - break; - } - - /* If sample has finished, check it is relevant */ - last_end_us = ktime_to_us( - ktime_sub(curr, ts->busy_to_idle_ts)); - - if (last_end_us > period_us) - break; - - /* It is relevant so add it but with adjustment */ - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - total_busy_time_us += last_busy_time_us - - (last_start_us - period_us); - break; - } - - /* Check if the sample is finished or still open */ - if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - else - last_busy_time_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - total_busy_time_us += last_busy_time_us; + u64 max_power, curr_power, dc_power, dividend; + int rc; - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + max_power = hdev->asic_prop.max_power_default; + dc_power = hdev->asic_prop.dc_power_default; + rc = hl_fw_cpucp_power_get(hdev, &curr_power); - ts = &hdev->idle_busy_ts_arr[last_index]; + if (rc) + return rc; - overlap_cnt++; - } + curr_power = clamp(curr_power, dc_power, max_power); - total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, - USEC_PER_MSEC); + dividend = (curr_power - dc_power) * 100; + *utilization = (u32) div_u64(dividend, (max_power - dc_power)); - return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); + return 0; } /* @@ -809,7 +747,7 @@ int hl_device_resume(struct hl_device *hdev) hdev->disabled = false; atomic_set(&hdev->in_reset, 0); - rc = hl_device_reset(hdev, true, false); + rc = hl_device_reset(hdev, HL_RESET_HARD); if (rc) { dev_err(hdev->dev, "Failed to reset device during resume\n"); goto disable_device; @@ -915,9 +853,7 @@ static void device_disable_open_processes(struct hl_device *hdev) * hl_device_reset - reset the device * * @hdev: pointer to habanalabs device structure - * @hard_reset: should we do hard reset to all engines or just reset the - * compute/dma engines - * @from_hard_reset_thread: is the caller the hard-reset thread + * @flags: reset flags. * * Block future CS and wait for pending CS to be enqueued * Call ASIC H/W fini @@ -929,9 +865,10 @@ static void device_disable_open_processes(struct hl_device *hdev) * * Returns 0 for success or an error on failure. */ -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread) +int hl_device_reset(struct hl_device *hdev, u32 flags) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + bool hard_reset, from_hard_reset_thread; int i, rc; if (!hdev->init_done) { @@ -940,6 +877,9 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, return 0; } + hard_reset = (flags & HL_RESET_HARD) != 0; + from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0; + if ((!hard_reset) && (!hdev->supports_soft_reset)) { dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); hard_reset = true; @@ -960,7 +900,11 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, if (rc) return 0; - if (hard_reset) { + /* + * if reset is due to heartbeat, device CPU is no responsive in + * which case no point sending PCI disable message to it + */ + if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) { /* Disable PCI access from device F/W so he won't send * us additional interrupts. We disable MSI/MSI-X at * the halt_engines function and we can't have the F/W @@ -1030,6 +974,11 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); + /* Release all pending user interrupts, each pending user interrupt + * holds a reference to user context + */ + hl_release_pending_user_interrupts(hdev); + kill_processes: if (hard_reset) { /* Kill processes here after CS rollback. This is because the @@ -1078,14 +1027,6 @@ kill_processes: for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); - hdev->idle_busy_ts_idx = 0; - hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); - hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); - - if (hdev->cs_active_cnt) - dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", - hdev->cs_active_cnt); - mutex_lock(&hdev->fpriv_list_lock); /* Make sure the context switch phase will run again */ @@ -1151,6 +1092,16 @@ kill_processes: goto out_err; } + /* If device is not idle fail the reset process */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_err(hdev->dev, + "device is not idle (mask %#llx %#llx) after reset\n", + idle_mask[0], idle_mask[1]); + rc = -EIO; + goto out_err; + } + /* Check that the communication with the device is working */ rc = hdev->asic_funcs->test_queues(hdev); if (rc) { @@ -1235,7 +1186,7 @@ out_err: */ int hl_device_init(struct hl_device *hdev, struct class *hclass) { - int i, rc, cq_cnt, cq_ready_cnt; + int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; char *name; bool add_cdev_sysfs_on_err = false; @@ -1274,13 +1225,26 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto free_dev_ctrl; + user_interrupt_cnt = hdev->asic_prop.user_interrupt_count; + + if (user_interrupt_cnt) { + hdev->user_interrupt = kcalloc(user_interrupt_cnt, + sizeof(*hdev->user_interrupt), + GFP_KERNEL); + + if (!hdev->user_interrupt) { + rc = -ENOMEM; + goto early_fini; + } + } + /* * Start calling ASIC initialization. First S/W then H/W and finally * late init */ rc = hdev->asic_funcs->sw_init(hdev); if (rc) - goto early_fini; + goto user_interrupts_fini; /* * Initialize the H/W queues. Must be done before hw_init, because @@ -1478,6 +1442,8 @@ hw_queues_destroy: hl_hw_queues_destroy(hdev); sw_fini: hdev->asic_funcs->sw_fini(hdev); +user_interrupts_fini: + kfree(hdev->user_interrupt); early_fini: device_early_fini(hdev); free_dev_ctrl: @@ -1609,6 +1575,7 @@ void hl_device_fini(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); kfree(hdev->completion_queue); + kfree(hdev->user_interrupt); hl_hw_queues_destroy(hdev); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 09706c571e95..832dd5c5bb06 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -293,6 +293,7 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, u32 cpu_security_boot_status_reg) { u32 err_val, security_val; + bool err_exists = false; /* Some of the firmware status codes are deprecated in newer f/w * versions. In those versions, the errors are reported @@ -307,48 +308,102 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, if (!(err_val & CPU_BOOT_ERR0_ENABLED)) return 0; - if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) + if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) { dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) { dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); - if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) { dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED; + } if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) { - if (hdev->bmc_enable) - dev_warn(hdev->dev, + if (hdev->bmc_enable) { + dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n"); - else + err_exists = true; + } else { + dev_info(hdev->dev, + "Device boot message - Skipped waiting for BMC\n"); + /* This is an info so we don't want it to disable the + * device + */ err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED; + } } - if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) { dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n"); - if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) { dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { dev_warn(hdev->dev, "Device boot warning - security not ready\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY; + } + + if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { dev_err(hdev->dev, "Device boot error - security failure\n"); - if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) { dev_err(hdev->dev, "Device boot error - eFuse failure\n"); - if (err_val & CPU_BOOT_ERR0_PLL_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_PLL_FAIL) { dev_err(hdev->dev, "Device boot error - PLL failure\n"); + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) { + dev_err(hdev->dev, + "Device boot error - device unusable\n"); + err_exists = true; + } security_val = RREG32(cpu_security_boot_status_reg); if (security_val & CPU_BOOT_DEV_STS0_ENABLED) dev_dbg(hdev->dev, "Device security status %#x\n", security_val); - if (err_val & ~CPU_BOOT_ERR0_ENABLED) + if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) { + dev_err(hdev->dev, + "Device boot error - unknown error 0x%08x\n", + err_val); + err_exists = true; + } + + if (err_exists) return -EIO; return 0; @@ -419,6 +474,73 @@ out: return rc; } +static int hl_fw_send_msi_info_msg(struct hl_device *hdev) +{ + struct cpucp_array_data_packet *pkt; + size_t total_pkt_size, data_size; + u64 result; + int rc; + + /* skip sending this info for unsupported ASICs */ + if (!hdev->asic_funcs->get_msi_info) + return 0; + + data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32); + total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size; + + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "CPUCP array data is too big\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES); + + hdev->asic_funcs->get_msi_info((u32 *)&pkt->data); + + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt, + total_pkt_size, 0, &result); + + /* + * in case packet result is invalid it means that FW does not support + * this feature and will use default/hard coded MSI values. no reason + * to stop the boot + */ + if (rc && result == cpucp_packet_invalid) + rc = 0; + + if (rc) + dev_err(hdev->dev, "failed to send CPUCP array data\n"); + + kfree(pkt); + + return rc; +} + +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg) +{ + int rc; + + rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg, + boot_err0_reg); + if (rc) + return rc; + + return hl_fw_send_msi_info_msg(hdev); +} + int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct cpucp_packet pkt = {}; @@ -539,18 +661,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u8 pll_byte, pll_bit_off; + bool dynamic_pll; + + if (input_pll_index >= PLL_MAX) { + dev_err(hdev->dev, "PLL index %d is out of range\n", + input_pll_index); + return -EINVAL; + } + + dynamic_pll = prop->fw_security_status_valid && + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); + + if (!dynamic_pll) { + /* + * in case we are working with legacy FW (each asic has unique + * PLL numbering) extract the legacy numbering + */ + *pll_index = hdev->legacy_pll_map[input_pll_index]; + return 0; + } + + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; + pll_bit_off = input_pll_index & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + input_pll_index); + return -EINVAL; + } + + *pll_index = input_pll_index; + + return 0; +} + +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; + enum pll_index used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_type = __cpu_to_le16(pll_index); + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); @@ -565,6 +732,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, return rc; } +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, "Failed to read power, error %d\n", rc); + return rc; + } + + *power = result; + + return rc; +} + static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) { /* Some of the status codes below are deprecated in newer f/w @@ -623,7 +813,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status, security_status; int rc; - if (!hdev->cpu_enable) + /* pldm was added for cases in which we use preboot on pldm and want + * to load boot fit, but we can't wait for preboot because it runs + * very slowly + */ + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) return 0; /* Need to check two possible scenarios: @@ -677,16 +871,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { prop->fw_security_status_valid = 1; + /* FW security should be derived from PCI ID, we keep this + * check for backward compatibility + */ if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) prop->fw_security_disabled = false; - else - prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) prop->hard_reset_done_by_fw = true; } else { prop->fw_security_status_valid = 0; - prop->fw_security_disabled = true; } dev_dbg(hdev->dev, "Firmware preboot security status %#x\n", @@ -710,7 +904,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status; int rc; - if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU)) + if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) return 0; dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", @@ -801,7 +995,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } - if (!(hdev->fw_loading & FW_TYPE_LINUX)) { + if (!(hdev->fw_components & FW_TYPE_LINUX)) { dev_info(hdev->dev, "Skip loading Linux F/W\n"); goto out; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4b321e4f8059..44e89da30b4a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -19,6 +19,7 @@ #include <linux/dma-direction.h> #include <linux/scatterlist.h> #include <linux/hashtable.h> +#include <linux/debugfs.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -61,7 +62,7 @@ #define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ -#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 +#define HL_COMMON_USER_INTERRUPT_ID 0xFFF /* Memory */ #define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ @@ -102,6 +103,23 @@ enum hl_mmu_page_table_location { #define HL_MAX_DCORES 4 +/* + * Reset Flags + * + * - HL_RESET_HARD + * If set do hard reset to all engines. If not set reset just + * compute/DMA engines. + * + * - HL_RESET_FROM_RESET_THREAD + * Set if the caller is the hard-reset thread + * + * - HL_RESET_HEARTBEAT + * Set if reset is due to heartbeat + */ +#define HL_RESET_HARD (1 << 0) +#define HL_RESET_FROM_RESET_THREAD (1 << 1) +#define HL_RESET_HEARTBEAT (1 << 2) + #define HL_MAX_SOBS_PER_MONITOR 8 /** @@ -169,15 +187,19 @@ enum hl_fw_component { }; /** - * enum hl_fw_types - F/W types to load + * enum hl_fw_types - F/W types present in the system * @FW_TYPE_LINUX: Linux image for device CPU * @FW_TYPE_BOOT_CPU: Boot image for device CPU + * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system + * (preboot, ppboot etc...) * @FW_TYPE_ALL_TYPES: Mask for all types */ enum hl_fw_types { FW_TYPE_LINUX = 0x1, FW_TYPE_BOOT_CPU = 0x2, - FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU) + FW_TYPE_PREBOOT_CPU = 0x4, + FW_TYPE_ALL_TYPES = + (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU) }; /** @@ -368,6 +390,7 @@ struct hl_mmu_properties { * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset + * @dc_power_default: power consumed by the device in mode idle. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. @@ -412,6 +435,7 @@ struct hl_mmu_properties { * @first_available_user_msix_interrupt: first available msix interrupt * reserved for the user * @first_available_cq: first available CQ for the user. + * @user_interrupt_count: number of user interrupts. * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, @@ -421,6 +445,7 @@ struct hl_mmu_properties { * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow * @num_functional_hbms: number of functional HBMs in each DCORE. + * @iatu_done_by_fw: true if iATU configuration is being done by FW. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -439,6 +464,7 @@ struct asic_fixed_properties { u64 dram_size; u64 dram_pci_bar_size; u64 max_power_default; + u64 dc_power_default; u64 dram_size_for_default_page_mapping; u64 pcie_dbi_base_address; u64 pcie_aux_dbi_reg_addr; @@ -475,6 +501,7 @@ struct asic_fixed_properties { u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_msix_interrupt; u16 first_available_cq[HL_MAX_DCORES]; + u16 user_interrupt_count; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; @@ -482,6 +509,7 @@ struct asic_fixed_properties { u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; u8 num_functional_hbms; + u8 iatu_done_by_fw; }; /** @@ -503,6 +531,7 @@ struct hl_fence { /** * struct hl_cs_compl - command submission completion object. + * @sob_reset_work: workqueue object to run SOB reset flow. * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. @@ -513,6 +542,7 @@ struct hl_fence { * @sob_group: the SOB group that is used in this collective wait CS. */ struct hl_cs_compl { + struct work_struct sob_reset_work; struct hl_fence base_fence; spinlock_t lock; struct hl_device *hdev; @@ -690,6 +720,31 @@ struct hl_cq { }; /** + * struct hl_user_interrupt - holds user interrupt information + * @hdev: pointer to the device structure + * @wait_list_head: head to the list of user threads pending on this interrupt + * @wait_list_lock: protects wait_list_head + * @interrupt_id: msix interrupt id + */ +struct hl_user_interrupt { + struct hl_device *hdev; + struct list_head wait_list_head; + spinlock_t wait_list_lock; + u32 interrupt_id; +}; + +/** + * struct hl_user_pending_interrupt - holds a context to a user thread + * pending on an interrupt + * @wait_list_node: node in the list of user threads pending on an interrupt + * @fence: hl fence object for interrupt completion + */ +struct hl_user_pending_interrupt { + struct list_head wait_list_node; + struct hl_fence fence; +}; + +/** * struct hl_eq - describes the event queue (single one per device) * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address @@ -713,11 +768,13 @@ struct hl_eq { * @ASIC_INVALID: Invalid ASIC type. * @ASIC_GOYA: Goya device. * @ASIC_GAUDI: Gaudi device. + * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). */ enum hl_asic_type { ASIC_INVALID, ASIC_GOYA, - ASIC_GAUDI + ASIC_GAUDI, + ASIC_GAUDI_SEC }; struct hl_cs_parser; @@ -802,8 +859,12 @@ enum div_select_defs { * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory. + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory. + * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory. + * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory. + * @debugfs_read_dma: debug interface for reading up to 2MB from the device's + * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. * @set_pll_profile: change PLL profile (manual/automatic). @@ -919,10 +980,16 @@ struct hl_asic_funcs { void (*update_eq_ci)(struct hl_device *hdev, u32 val); int (*context_switch)(struct hl_device *hdev, u32 asid); void (*restore_phase_topology)(struct hl_device *hdev); - int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); - int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); - int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); - int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); + int (*debugfs_read32)(struct hl_device *hdev, u64 addr, + bool user_address, u32 *val); + int (*debugfs_write32)(struct hl_device *hdev, u64 addr, + bool user_address, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, + bool user_address, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, + bool user_address, u64 val); + int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_attr_grp); void (*handle_eqe)(struct hl_device *hdev, @@ -986,6 +1053,7 @@ struct hl_asic_funcs { int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); + void (*get_msi_info)(u32 *table); }; @@ -1070,9 +1138,11 @@ struct hl_pending_cb { * @mem_hash_lock: protects the mem_hash. * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * MMU hash or walking the PGT requires talking this lock. + * @hw_block_list_lock: protects the HW block memory list. * @debugfs_list: node in debugfs list of contexts. * pending_cb_list: list of pending command buffers waiting to be sent upon * next user command submission context. + * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. @@ -1109,8 +1179,10 @@ struct hl_ctx { struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; struct mutex mmu_lock; + struct mutex hw_block_list_lock; struct list_head debugfs_list; struct list_head pending_cb_list; + struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; struct gen_pool *cb_va_pool; u64 cs_sequence; @@ -1185,6 +1257,7 @@ struct hl_userptr { * @sequence: the sequence number of this CS. * @staged_sequence: the sequence of the staged submission this CS is part of, * relevant only if staged_cs is set. + * @timeout_jiffies: cs timeout in jiffies. * @type: CS_TYPE_*. * @submitted: true if CS was submitted to H/W. * @completed: true if CS was completed by device. @@ -1213,6 +1286,7 @@ struct hl_cs { struct list_head debugfs_list; u64 sequence; u64 staged_sequence; + u64 timeout_jiffies; enum hl_cs_type type; u8 submitted; u8 completed; @@ -1330,6 +1404,23 @@ struct hl_vm_hash_node { }; /** + * struct hl_vm_hw_block_list_node - list element from user virtual address to + * HW block id. + * @node: node to hang on the list in context object. + * @ctx: the context this node belongs to. + * @vaddr: virtual address of the HW block. + * @size: size of the block. + * @id: HW block id (handle). + */ +struct hl_vm_hw_block_list_node { + struct list_head node; + struct hl_ctx *ctx; + unsigned long vaddr; + u32 size; + u32 id; +}; + +/** * struct hl_vm_phys_pg_pack - physical page pack. * @vm_type: describes the type of the virtual area descriptor. * @pages: the physical page array. @@ -1490,12 +1581,13 @@ struct hl_debugfs_entry { * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. + * @blob_desc: descriptor of blob * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. * @mmu_asid: ASID to use while translating in mmu_show. * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. + * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. + * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. */ struct hl_dbg_device_entry { struct dentry *root; @@ -1513,6 +1605,7 @@ struct hl_dbg_device_entry { spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; + struct debugfs_blob_wrapper blob_desc; u64 addr; u64 mmu_addr; u32 mmu_asid; @@ -1684,16 +1777,6 @@ struct hl_device_reset_work { }; /** - * struct hl_device_idle_busy_ts - used for calculating device utilization rate. - * @idle_to_busy_ts: timestamp where device changed from idle to busy. - * @busy_to_idle_ts: timestamp where device changed from busy to idle. - */ -struct hl_device_idle_busy_ts { - ktime_t idle_to_busy_ts; - ktime_t busy_to_idle_ts; -}; - -/** * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop * information. * @virt_addr: the virtual address of the hop. @@ -1821,9 +1904,16 @@ struct hl_mmu_funcs { * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. + * @user_interrupt: array of hl_user_interrupt. upon the corresponding user + * interrupt, driver will monitor the list of fences + * registered to this interrupt. + * @common_user_interrupt: common user interrupt for all user interrupts. + * upon any user interrupt, driver will monitor the + * list of fences registered to this common structure. * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: work queue of event queue for executing work in process context. + * @sob_reset_wq: work queue for sob reset executions. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -1857,11 +1947,11 @@ struct hl_mmu_funcs { * when a user opens the device * @fpriv_list_lock: protects the fpriv_list * @compute_ctx: current compute context executing. - * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy - * and vice-versa * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and + * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -1874,13 +1964,10 @@ struct hl_mmu_funcs { * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. - * @cs_active_cnt: number of active command submissions on this device (active - * means already in H/W queues) * @major: habanalabs kernel driver major. * @high_pll: high PLL profile frequency. * @soft_reset_cnt: number of soft reset since the driver was loaded. * @hard_reset_cnt: number of hard reset since the driver was loaded. - * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr * @clk_throttling_reason: bitmask represents the current clk throttling reasons * @id: device minor. * @id_control: minor of the control device @@ -1937,8 +2024,11 @@ struct hl_device { char status[HL_DEV_STS_MAX][HL_STR_MAX]; enum hl_asic_type asic_type; struct hl_cq *completion_queue; + struct hl_user_interrupt *user_interrupt; + struct hl_user_interrupt common_user_interrupt; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; + struct workqueue_struct *sob_reset_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; @@ -1976,13 +2066,13 @@ struct hl_device { struct hl_ctx *compute_ctx; - struct hl_device_idle_busy_ts *idle_busy_ts_arr; - struct hl_cs_counters_atomic aggregated_cs_counters; struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; + enum pll_index *legacy_pll_map; + atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -1990,12 +2080,10 @@ struct hl_device { atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; - int cs_active_cnt; u32 major; u32 high_pll; u32 soft_reset_cnt; u32 hard_reset_cnt; - u32 idle_busy_ts_idx; u32 clk_throttling_reason; u16 id; u16 id_control; @@ -2029,10 +2117,9 @@ struct hl_device { /* Parameters for bring-up */ u64 nic_ports_mask; - u64 fw_loading; + u64 fw_components; u8 mmu_enable; u8 mmu_huge_page_opt; - u8 cpu_enable; u8 reset_pcilink; u8 cpu_queues_enable; u8 pldm; @@ -2043,6 +2130,7 @@ struct hl_device { u8 bmc_enable; u8 rl_enable; u8 reset_on_preboot_fail; + u8 reset_upon_device_release; }; @@ -2157,6 +2245,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); +irqreturn_t hl_irq_handler_user_cq(int irq, void *arg); +irqreturn_t hl_irq_handler_default(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -2178,12 +2268,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass); void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread); +int hl_device_reset(struct hl_device *hdev, u32 flags); void hl_hpriv_get(struct hl_fpriv *hpriv); -void hl_hpriv_put(struct hl_fpriv *hpriv); +int hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); +int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); @@ -2235,6 +2324,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx); int hl_vm_init(struct hl_device *hdev); void hl_vm_fini(struct hl_device *hdev); +void hl_hw_block_mem_init(struct hl_ctx *ctx); +void hl_hw_block_mem_fini(struct hl_ctx *ctx); + u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_va_range_type type, u32 size, u32 alignment); int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, @@ -2287,13 +2379,19 @@ int hl_fw_send_heartbeat(struct hl_device *hdev); int hl_fw_cpucp_info_get(struct hl_device *hdev, u32 cpu_security_boot_status_reg, u32 boot_err0_reg); +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index); +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr); +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, @@ -2304,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); @@ -2312,8 +2411,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr); +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq); int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); int hl_set_temperature(struct hl_device *hdev, @@ -2334,6 +2435,7 @@ int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +void hl_release_pending_user_interrupts(struct hl_device *hdev); #ifdef CONFIG_DEBUG_FS @@ -2434,7 +2536,7 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data); int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); #endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 032d114f01ea..7135f1e03864 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -27,13 +27,13 @@ static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); -static int timeout_locked = 5; +static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, - "Device lockup timeout in seconds (0 = disabled, default 5s)"); + "Device lockup timeout in seconds (0 = disabled, default 30s)"); module_param(reset_on_lockup, int, 0444); MODULE_PARM_DESC(reset_on_lockup, @@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub, #define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GAUDI 0x1000 +#define PCI_IDS_GAUDI_SEC 0x1010 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI: asic_type = ASIC_GAUDI; break; + case PCI_IDS_GAUDI_SEC: + asic_type = ASIC_GAUDI_SEC; + break; default: asic_type = ASIC_INVALID; break; @@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device) return asic_type; } +static bool is_asic_secured(enum hl_asic_type asic_type) +{ + switch (asic_type) { + case ASIC_GAUDI_SEC: + return true; + default: + return false; + } +} + /* * hl_device_open - open function for habanalabs device * @@ -234,8 +249,7 @@ out_err: static void set_driver_behavior_per_device(struct hl_device *hdev) { - hdev->cpu_enable = 1; - hdev->fw_loading = FW_TYPE_ALL_TYPES; + hdev->fw_components = FW_TYPE_ALL_TYPES; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; hdev->mmu_enable = 1; @@ -288,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_type = asic_type; } + if (pdev) + hdev->asic_prop.fw_security_disabled = + !is_asic_secured(pdev->device); + else + hdev->asic_prop.fw_security_disabled = true; + /* Assign status description string */ strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 083a30969c5f..33841c272eb6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -226,19 +226,14 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) struct hl_info_device_utilization device_util = {0}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; if ((!max_size) || (!out)) return -EINVAL; - if ((args->period_ms < 100) || (args->period_ms > 1000) || - (args->period_ms % 100)) { - dev_err(hdev->dev, - "period %u must be between 100 - 1000 and must be divisible by 100\n", - args->period_ms); + rc = hl_device_utilization(hdev, &device_util.utilization); + if (rc) return -EINVAL; - } - - device_util.utilization = hl_device_utilization(hdev, args->period_ms); return copy_to_user(out, &device_util, min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; @@ -446,6 +441,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0; } +static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_power_info power_info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_power_get(hdev, &power_info.power); + if (rc) + return rc; + + return copy_to_user(out, &power_info, + min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -526,6 +540,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_PLL_FREQUENCY: return pll_frequency_info(hpriv, args); + case HL_INFO_POWER: + return power_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; @@ -596,7 +613,7 @@ static const struct hl_ioctl_desc hl_ioctls[] = { HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), - HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), + HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl), HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) }; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 0f335182267f..173438461835 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -629,20 +629,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && first_entry && cs_needs_timeout(cs)) { cs->tdr_active = true; - schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); + schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies); } spin_unlock(&hdev->cs_mirror_lock); - if (!hdev->cs_active_cnt++) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; - ts->busy_to_idle_ts = ktime_set(0, 0); - ts->idle_to_busy_ts = ktime_get(); - } - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) switch (job->queue_type) { case QUEUE_TYPE_EXT: diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 44a0522b59b9..27129868c711 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -137,6 +137,62 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } +static void handle_user_cq(struct hl_device *hdev, + struct hl_user_interrupt *user_cq) +{ + struct hl_user_pending_interrupt *pend; + + spin_lock(&user_cq->wait_list_lock); + list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) + complete_all(&pend->fence.completion); + spin_unlock(&user_cq->wait_list_lock); +} + +/** + * hl_irq_handler_user_cq - irq handler for user completion queues + * + * @irq: irq number + * @arg: pointer to user interrupt structure + * + */ +irqreturn_t hl_irq_handler_user_cq(int irq, void *arg) +{ + struct hl_user_interrupt *user_cq = arg; + struct hl_device *hdev = user_cq->hdev; + + dev_dbg(hdev->dev, + "got user completion interrupt id %u", + user_cq->interrupt_id); + + /* Handle user cq interrupts registered on all interrupts */ + handle_user_cq(hdev, &hdev->common_user_interrupt); + + /* Handle user cq interrupts registered on this specific interrupt */ + handle_user_cq(hdev, user_cq); + + return IRQ_HANDLED; +} + +/** + * hl_irq_handler_default - default irq handler + * + * @irq: irq number + * @arg: pointer to user interrupt structure + * + */ +irqreturn_t hl_irq_handler_default(int irq, void *arg) +{ + struct hl_user_interrupt *user_interrupt = arg; + struct hl_device *hdev = user_interrupt->hdev; + u32 interrupt_id = user_interrupt->interrupt_id; + + dev_err(hdev->dev, + "got invalid user interrupt %u", + interrupt_id); + + return IRQ_HANDLED; +} + /** * hl_irq_handler_eq - irq handler for event queue * diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 1f5910517b0e..2938cbbafbbc 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } - - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, - total_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub contiguous device memory\n"); - goto pages_pack_err; - } - } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -128,24 +118,13 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, - phys_pg_pack->pages[i], - page_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub device memory\n"); - goto page_err; - } - } - num_curr_pgs++; } } spin_lock(&vm->idr_lock); handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_ATOMIC); + GFP_KERNEL); spin_unlock(&vm->idr_lock); if (handle < 0) { @@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref) * @phys_pg_pack: physical page pack to free. * * This function does the following: - * - For DRAM memory only, iterate over the pack and free each physical block - * structure by returning it to the general pool. + * - For DRAM memory only + * - iterate over the pack, scrub and free each physical block structure by + * returning it to the general pool. + * In case of error during scrubbing, initiate hard reset. + * Once hard reset is triggered, scrubbing is bypassed while freeing the + * memory continues. * - Free the hl_vm_phys_pg_pack structure. */ -static void free_phys_pg_pack(struct hl_device *hdev, +static int free_phys_pg_pack(struct hl_device *hdev, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; + int rc = 0; - if (!phys_pg_pack->created_from_userptr) { - if (phys_pg_pack->contiguous) { - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + if (phys_pg_pack->created_from_userptr) + goto end; + + if (phys_pg_pack->contiguous) { + if (hdev->memory_scrub && !hdev->disabled) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[0], phys_pg_pack->total_size); + if (rc) + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + } - for (i = 0; i < phys_pg_pack->npages ; i++) - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } else { - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - gen_pool_free(vm->dram_pg_pool, + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + if (hdev->memory_scrub && !hdev->disabled && rc == 0) { + rc = hdev->asic_funcs->scrub_device_mem( + hdev, phys_pg_pack->pages[i], phys_pg_pack->page_size); - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); + if (rc) + dev_err(hdev->dev, + "Failed to scrub device memory\n"); } + gen_pool_free(vm->dram_pg_pool, + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); } } + if (rc && !hdev->disabled) + hl_device_reset(hdev, HL_RESET_HARD); + +end: kvfree(phys_pg_pack->pages); kfree(phys_pg_pack); + + return rc; } /** @@ -349,7 +358,7 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_pack); + return free_phys_pg_pack(hdev, phys_pg_pack); } else { spin_unlock(&vm->idr_lock); dev_err(hdev->dev, @@ -857,6 +866,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; u32 page_size = phys_pg_pack->page_size; int rc = 0; + bool is_host_addr; for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; @@ -878,6 +888,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, return 0; err: + is_host_addr = !hl_is_dram_va(hdev, vaddr); + next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, @@ -888,6 +900,17 @@ err: phys_pg_pack->pages[i], page_size); next_vaddr += page_size; + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + * + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup + */ + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } return rc; @@ -921,9 +944,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages * - * In addition, when unmapping host memory we pass through - * the Linux kernel to unpin the pages and that takes a long - * time. Therefore, sleep every 32K pages to avoid soft lockup + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup */ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) usleep_range(50, 200); @@ -1117,9 +1140,9 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = ret_vaddr; if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); - return 0; + return rc; map_err: if (add_va_block(hdev, va_range, ret_vaddr, @@ -1272,7 +1295,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, kfree(hnode); if (is_userptr) { - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); dma_unmap_host_va(hdev, userptr); } @@ -1305,9 +1328,15 @@ static int map_block(struct hl_device *hdev, u64 address, u64 *handle, static void hw_block_vm_close(struct vm_area_struct *vma) { - struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data; + struct hl_vm_hw_block_list_node *lnode = + (struct hl_vm_hw_block_list_node *) vma->vm_private_data; + struct hl_ctx *ctx = lnode->ctx; + mutex_lock(&ctx->hw_block_list_lock); + list_del(&lnode->node); + mutex_unlock(&ctx->hw_block_list_lock); hl_ctx_put(ctx); + kfree(lnode); vma->vm_private_data = NULL; } @@ -1325,7 +1354,9 @@ static const struct vm_operations_struct hw_block_vm_ops = { */ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { + struct hl_vm_hw_block_list_node *lnode; struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; u32 block_id, block_size; int rc; @@ -1351,17 +1382,31 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) return -EINVAL; } + lnode = kzalloc(sizeof(*lnode), GFP_KERNEL); + if (!lnode) + return -ENOMEM; + vma->vm_ops = &hw_block_vm_ops; - vma->vm_private_data = hpriv->ctx; + vma->vm_private_data = lnode; - hl_ctx_get(hdev, hpriv->ctx); + hl_ctx_get(hdev, ctx); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { - hl_ctx_put(hpriv->ctx); + hl_ctx_put(ctx); + kfree(lnode); return rc; } + lnode->ctx = ctx; + lnode->vaddr = vma->vm_start; + lnode->size = block_size; + lnode->id = block_id; + + mutex_lock(&ctx->hw_block_list_lock); + list_add_tail(&lnode->node, &ctx->hw_block_mem_list); + mutex_unlock(&ctx->hw_block_list_lock); + vma->vm_pgoff = block_id; return 0; @@ -1574,7 +1619,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, rc = sg_alloc_table_from_pages(userptr->sgt, userptr->pages, - npages, offset, size, GFP_ATOMIC); + npages, offset, size, GFP_KERNEL); if (rc < 0) { dev_err(hdev->dev, "failed to create SG table from pages\n"); goto put_pages; @@ -1624,11 +1669,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - /* - * This function can be called also from data path, hence use atomic - * always as it is not a big allocation. - */ - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); if (!userptr->sgt) return -ENOMEM; @@ -2122,3 +2163,38 @@ void hl_vm_fini(struct hl_device *hdev) vm->init_done = false; } + +/** + * hl_hw_block_mem_init() - HW block memory initialization. + * @ctx: pointer to the habanalabs context structure. + * + * This function initializes the HW block virtual mapped addresses list and + * it's lock. + */ +void hl_hw_block_mem_init(struct hl_ctx *ctx) +{ + mutex_init(&ctx->hw_block_list_lock); + INIT_LIST_HEAD(&ctx->hw_block_mem_list); +} + +/** + * hl_hw_block_mem_fini() - HW block memory teardown. + * @ctx: pointer to the habanalabs context structure. + * + * This function clears the HW block virtual mapped addresses list and destroys + * it's lock. + */ +void hl_hw_block_mem_fini(struct hl_ctx *ctx) +{ + struct hl_vm_hw_block_list_node *lnode, *tmp; + + if (!list_empty(&ctx->hw_block_mem_list)) + dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n"); + + list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { + list_del(&lnode->node); + kfree(lnode); + } + + mutex_destroy(&ctx->hw_block_list_lock); +} diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 93c9e5f587e1..b37189956b14 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -532,6 +532,8 @@ int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) struct hl_mmu_hop_info hops; int rc; + memset(&hops, 0, sizeof(hops)); + rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops); if (rc) return rc; @@ -589,6 +591,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: case ASIC_GAUDI: + case ASIC_GAUDI_SEC: hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; default: diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index b799f9258fb0..e941b7eef346 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev) pci_release_regions(pdev); } +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u64 msec; + u32 val; + + if (hdev->pldm) + msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; + else + msec = HL_PCI_ELBI_TIMEOUT_MSEC; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0); + + timeout = ktime_add_ms(ktime_get(), msec); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + + return 0; + } + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error reading from ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI read didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI read has undefined bits in status\n"); + return -EIO; +} + /** * hl_pci_elbi_write() - Write through the ELBI interface. * @hdev: Pointer to hl_device structure. diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 4366d8f93842..9fa61573a89d 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,12 +9,18 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr) { struct cpucp_packet pkt; + u32 used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); if (curr) @@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) else pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) if (rc) { dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); return rc; } return (long) result; } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq) { struct cpucp_packet pkt; + u32 used_pll_idx; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) if (rc) dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); } u64 hl_get_max_power(struct hl_device *hdev) @@ -203,7 +215,7 @@ static ssize_t soft_reset_store(struct device *dev, dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); out: return count; @@ -226,7 +238,7 @@ static ssize_t hard_reset_store(struct device *dev, dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); out: return count; @@ -245,6 +257,9 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI: str = "GAUDI"; break; + case ASIC_GAUDI_SEC: + str = "GAUDI SEC"; + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); @@ -344,7 +359,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct hl_device *hdev = dev_get_drvdata(dev); char *data; int rc; |