diff options
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r-- | drivers/misc/habanalabs/common/Makefile | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c | 413 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 89 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/context.c | 4 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c | 304 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 280 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c | 86 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 415 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 44 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_ioctl.c | 108 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/irq.c | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 289 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory_mgr.c | 349 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu/mmu.c | 296 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu/mmu_v1.c | 297 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/pci/pci.c | 10 |
16 files changed, 1813 insertions, 1187 deletions
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile index 6ebe3c7001ff..934a3a4aedc9 100644 --- a/drivers/misc/habanalabs/common/Makefile +++ b/drivers/misc/habanalabs/common/Makefile @@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_buffer.o common/hw_queue.o common/irq.o \ common/sysfs.o common/hwmon.o common/memory.o \ common/command_submission.o common/firmware_if.o \ - common/state_dump.o + common/state_dump.o common/memory_mgr.o diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index a507110f6443..e13b2b39c058 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -160,24 +160,6 @@ static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) } } -static void cb_release(struct kref *ref) -{ - struct hl_device *hdev; - struct hl_cb *cb; - - cb = container_of(ref, struct hl_cb, refcount); - hdev = cb->hdev; - - hl_debugfs_remove_cb(cb); - - if (cb->is_mmu_mapped) - cb_unmap_mem(cb->ctx, cb); - - hl_ctx_put(cb->ctx); - - cb_do_release(hdev, cb); -} - static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { @@ -238,168 +220,175 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, return cb; } -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, - struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - bool map_cb, u64 *handle) +struct hl_cb_mmap_mem_alloc_args { + struct hl_device *hdev; + struct hl_ctx *ctx; + u32 cb_size; + bool internal_cb; + bool map_cb; +}; + +static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) { - struct hl_cb *cb; - bool alloc_new_cb = true; - int rc, ctx_id = ctx->asid; + struct hl_cb *cb = buf->private; - /* - * Can't use generic function to check this because of special case - * where we create a CB as part of the reset process - */ - if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { - dev_warn_ratelimited(hdev->dev, - "Device is disabled or in reset. Can't create new CBs\n"); - rc = -EBUSY; - goto out_err; - } + hl_debugfs_remove_cb(cb); - if (cb_size > SZ_2M) { - dev_err(hdev->dev, "CB size %d must be less than %d\n", - cb_size, SZ_2M); - rc = -EINVAL; - goto out_err; - } + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); + + hl_ctx_put(cb->ctx); - if (!internal_cb) { + cb_do_release(cb->hdev, cb); +} + +static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) +{ + struct hl_cb_mmap_mem_alloc_args *cb_args = args; + struct hl_cb *cb; + int rc, ctx_id = cb_args->ctx->asid; + bool alloc_new_cb = true; + + if (!cb_args->internal_cb) { /* Minimum allocation must be PAGE SIZE */ - if (cb_size < PAGE_SIZE) - cb_size = PAGE_SIZE; + if (cb_args->cb_size < PAGE_SIZE) + cb_args->cb_size = PAGE_SIZE; if (ctx_id == HL_KERNEL_ASID_ID && - cb_size <= hdev->asic_prop.cb_pool_cb_size) { + cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { - spin_lock(&hdev->cb_pool_lock); - if (!list_empty(&hdev->cb_pool)) { - cb = list_first_entry(&hdev->cb_pool, + spin_lock(&cb_args->hdev->cb_pool_lock); + if (!list_empty(&cb_args->hdev->cb_pool)) { + cb = list_first_entry(&cb_args->hdev->cb_pool, typeof(*cb), pool_list); list_del(&cb->pool_list); - spin_unlock(&hdev->cb_pool_lock); + spin_unlock(&cb_args->hdev->cb_pool_lock); alloc_new_cb = false; } else { - spin_unlock(&hdev->cb_pool_lock); - dev_dbg(hdev->dev, "CB pool is empty\n"); + spin_unlock(&cb_args->hdev->cb_pool_lock); + dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); } } } if (alloc_new_cb) { - cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb); - if (!cb) { - rc = -ENOMEM; - goto out_err; - } + cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); + if (!cb) + return -ENOMEM; } - cb->hdev = hdev; - cb->ctx = ctx; - hl_ctx_get(hdev, cb->ctx); + cb->hdev = cb_args->hdev; + cb->ctx = cb_args->ctx; + cb->buf = buf; + cb->buf->mappable_size = cb->size; + cb->buf->private = cb; + + hl_ctx_get(cb->ctx); - if (map_cb) { + if (cb_args->map_cb) { if (ctx_id == HL_KERNEL_ASID_ID) { - dev_err(hdev->dev, + dev_err(cb_args->hdev->dev, "CB mapping is not supported for kernel context\n"); rc = -EINVAL; goto release_cb; } - rc = cb_map_mem(ctx, cb); + rc = cb_map_mem(cb_args->ctx, cb); if (rc) goto release_cb; } - spin_lock(&mgr->cb_lock); - rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); - spin_unlock(&mgr->cb_lock); - - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); - goto unmap_mem; - } - - cb->id = (u64) rc; - - kref_init(&cb->refcount); - spin_lock_init(&cb->lock); - - /* - * idr is 32-bit so we can safely OR it with a mask that is above - * 32 bit - */ - *handle = cb->id | HL_MMAP_TYPE_CB; - *handle <<= PAGE_SHIFT; - hl_debugfs_add_cb(cb); return 0; -unmap_mem: - if (cb->is_mmu_mapped) - cb_unmap_mem(cb->ctx, cb); release_cb: hl_ctx_put(cb->ctx); - cb_do_release(hdev, cb); -out_err: - *handle = 0; + cb_do_release(cb_args->hdev, cb); return rc; } -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) +static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, + struct vm_area_struct *vma, void *args) { - struct hl_cb *cb; - u32 handle; - int rc = 0; + struct hl_cb *cb = buf->private; - /* - * handle was given to user to do mmap, I need to shift it back to - * how the idr module gave it to me - */ - cb_handle >>= PAGE_SHIFT; - handle = (u32) cb_handle; + return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, + cb->bus_address, cb->size); +} - spin_lock(&mgr->cb_lock); +static struct hl_mmap_mem_buf_behavior cb_behavior = { + .topic = "CB", + .mem_id = HL_MMAP_TYPE_CB, + .alloc = hl_cb_mmap_mem_alloc, + .release = hl_cb_mmap_mem_release, + .mmap = hl_cb_mmap, +}; - cb = idr_find(&mgr->cb_handles, handle); - if (cb) { - idr_remove(&mgr->cb_handles, handle); - spin_unlock(&mgr->cb_lock); - kref_put(&cb->refcount, cb_release); - } else { - spin_unlock(&mgr->cb_lock); - dev_err(hdev->dev, - "CB destroy failed, no match to handle 0x%x\n", handle); - rc = -EINVAL; +int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + bool map_cb, u64 *handle) +{ + struct hl_cb_mmap_mem_alloc_args args = { + .hdev = hdev, + .ctx = ctx, + .cb_size = cb_size, + .internal_cb = internal_cb, + .map_cb = map_cb, + }; + struct hl_mmap_mem_buf *buf; + int ctx_id = ctx->asid; + + if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { + dev_warn_ratelimited(hdev->dev, + "Device is disabled or in reset. Can't create new CBs\n"); + return -EBUSY; } - return rc; + if (cb_size > SZ_2M) { + dev_err(hdev->dev, "CB size %d must be less than %d\n", + cb_size, SZ_2M); + return -EINVAL; + } + + buf = hl_mmap_mem_buf_alloc( + mmg, &cb_behavior, + ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); + if (!buf) + return -ENOMEM; + + *handle = buf->handle; + + return 0; +} + +int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) +{ + int rc; + + rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); + if (rc < 0) + return rc; /* Invalid handle */ + + if (rc == 0) + dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); + + return 0; } -static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va) +static int hl_cb_info(struct hl_mem_mgr *mmg, + u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) { struct hl_vm_va_block *va_block; struct hl_cb *cb; - u32 handle; int rc = 0; - /* The CB handle was given to user to do mmap, so need to shift it back - * to the value which was allocated by the IDR module. - */ - cb_handle >>= PAGE_SHIFT; - handle = (u32) cb_handle; - - spin_lock(&mgr->cb_lock); - - cb = idr_find(&mgr->cb_handles, handle); + cb = hl_cb_get(mmg, handle); if (!cb) { - dev_err(hdev->dev, - "CB info failed, no match to handle 0x%x\n", handle); - rc = -EINVAL; - goto out; + dev_err(mmg->dev, + "CB info failed, no match to handle 0x%llx\n", handle); + return -EINVAL; } if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { @@ -407,7 +396,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, if (va_block) { *device_va = va_block->start; } else { - dev_err(hdev->dev, "CB is not mapped to the device's MMU\n"); + dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); rc = -EINVAL; goto out; } @@ -416,7 +405,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, } out: - spin_unlock(&mgr->cb_lock); + hl_cb_put(cb); return rc; } @@ -444,7 +433,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) args->in.cb_size, HL_MAX_CB_SIZE); rc = -EINVAL; } else { - rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, + rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, args->in.cb_size, false, !!(args->in.flags & HL_CB_FLAGS_MAP), &handle); @@ -455,12 +444,12 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_CB_OP_DESTROY: - rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, + rc = hl_cb_destroy(&hpriv->mem_mgr, args->in.cb_handle); break; case HL_CB_OP_INFO: - rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle, + rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, args->in.flags, &usage_cnt, &device_va); @@ -483,163 +472,20 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } -static void cb_vm_close(struct vm_area_struct *vma) +struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) { - struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; - long new_mmap_size; - - new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); - - if (new_mmap_size > 0) { - cb->mmap_size = new_mmap_size; - return; - } - - spin_lock(&cb->lock); - cb->mmap = false; - spin_unlock(&cb->lock); - - hl_cb_put(cb); - vma->vm_private_data = NULL; -} - -static const struct vm_operations_struct cb_vm_ops = { - .close = cb_vm_close -}; - -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_cb *cb; - u32 handle, user_cb_size; - int rc; - - /* We use the page offset to hold the idr and thus we need to clear - * it before doing the mmap itself - */ - handle = vma->vm_pgoff; - vma->vm_pgoff = 0; - - /* reference was taken here */ - cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); - if (!cb) { - dev_err(hdev->dev, - "CB mmap failed, no match to handle 0x%x\n", handle); - return -EINVAL; - } - - /* Validation check */ - user_cb_size = vma->vm_end - vma->vm_start; - if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) { - dev_err(hdev->dev, - "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", - vma->vm_end - vma->vm_start, cb->size); - rc = -EINVAL; - goto put_cb; - } - - if (!access_ok((void __user *) (uintptr_t) vma->vm_start, - user_cb_size)) { - dev_err(hdev->dev, - "user pointer is invalid - 0x%lx\n", - vma->vm_start); - - rc = -EINVAL; - goto put_cb; - } - - spin_lock(&cb->lock); + struct hl_mmap_mem_buf *buf; - if (cb->mmap) { - dev_err(hdev->dev, - "CB mmap failed, CB already mmaped to user\n"); - rc = -EINVAL; - goto release_lock; - } - - cb->mmap = true; - - spin_unlock(&cb->lock); - - vma->vm_ops = &cb_vm_ops; - - /* - * Note: We're transferring the cb reference to - * vma->vm_private_data here. - */ - - vma->vm_private_data = cb; - - rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address, - cb->bus_address, cb->size); - if (rc) { - spin_lock(&cb->lock); - cb->mmap = false; - goto release_lock; - } - - cb->mmap_size = cb->size; - vma->vm_pgoff = handle; - - return 0; - -release_lock: - spin_unlock(&cb->lock); -put_cb: - hl_cb_put(cb); - return rc; -} - -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle) -{ - struct hl_cb *cb; - - spin_lock(&mgr->cb_lock); - cb = idr_find(&mgr->cb_handles, handle); - - if (!cb) { - spin_unlock(&mgr->cb_lock); - dev_warn(hdev->dev, - "CB get failed, no match to handle 0x%x\n", handle); + buf = hl_mmap_mem_buf_get(mmg, handle); + if (!buf) return NULL; - } - - kref_get(&cb->refcount); - - spin_unlock(&mgr->cb_lock); - - return cb; + return buf->private; } void hl_cb_put(struct hl_cb *cb) { - kref_put(&cb->refcount, cb_release); -} - -void hl_cb_mgr_init(struct hl_cb_mgr *mgr) -{ - spin_lock_init(&mgr->cb_lock); - idr_init(&mgr->cb_handles); -} - -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) -{ - struct hl_cb *cb; - struct idr *idp; - u32 id; - - idp = &mgr->cb_handles; - - idr_for_each_entry(idp, cb, id) { - if (kref_put(&cb->refcount, cb_release) != 1) - dev_err(hdev->dev, - "CB %d for CTX ID %d is still alive\n", - id, cb->ctx->asid); - } - - idr_destroy(&mgr->cb_handles); + hl_mmap_mem_buf_put(cb->buf); } struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, @@ -649,7 +495,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, struct hl_cb *cb; int rc; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, internal_cb, false, &cb_handle); if (rc) { dev_err(hdev->dev, @@ -657,8 +503,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, return NULL; } - cb_handle >>= PAGE_SHIFT; - cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); + cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); /* hl_cb_get should never fail here */ if (!cb) { dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", @@ -669,7 +514,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, return cb; destroy_cb: - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); return NULL; } diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index d93ef9f1c45c..fb30b7de4aab 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -407,8 +407,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) { - bool next_entry_found = false; - struct hl_cs *next, *first_cs; + struct hl_cs *next = NULL, *iter, *first_cs; if (!cs_needs_timeout(cs)) return; @@ -443,13 +442,13 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) spin_lock(&hdev->cs_mirror_lock); /* queue TDR for next CS */ - list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node) - if (cs_needs_timeout(next)) { - next_entry_found = true; + list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) + if (cs_needs_timeout(iter)) { + next = iter; break; } - if (next_entry_found && !next->tdr_active) { + if (next && !next->tdr_active) { next->tdr_active = true; schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); } @@ -736,11 +735,10 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; /* Save only the first CS timeout parameters */ - rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.cs_timeout_timestamp = ktime_get(); - hdev->last_error.cs_timeout_seq = cs->sequence; + hdev->last_error.cs_timeout.timestamp = ktime_get(); + hdev->last_error.cs_timeout.seq = cs->sequence; } switch (cs->type) { @@ -806,7 +804,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, } /* increment refcnt for context */ - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); cs->ctx = ctx; cs->submitted = false; @@ -958,9 +956,9 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { - if (pend->ts_reg_info.ts_buff) { + if (pend->ts_reg_info.buf) { list_del(&pend->wait_list_node); - hl_ts_put(pend->ts_reg_info.ts_buff); + hl_mmap_mem_buf_put(pend->ts_reg_info.buf); hl_cb_put(pend->ts_reg_info.cq_cb); } else { pend->fence.error = -EIO; @@ -1072,17 +1070,14 @@ static int validate_queue_index(struct hl_device *hdev, } static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, + struct hl_mem_mgr *mmg, struct hl_cs_chunk *chunk) { struct hl_cb *cb; - u32 cb_handle; - cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); - - cb = hl_cb_get(hdev, cb_mgr, cb_handle); + cb = hl_cb_get(mmg, chunk->cb_handle); if (!cb) { - dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); + dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); return NULL; } @@ -1344,7 +1339,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (is_kernel_allocated_cb) { - cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); + cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); if (!cb) { atomic64_inc( &ctx->cs_counters.validation_drop_cnt); @@ -1772,7 +1767,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, */ job->patched_cb = job->user_cb; job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); /* increment refcount as for external queues we get completion */ cs_get(cs); @@ -1834,7 +1829,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, handle->count = count; - hl_ctx_get(hdev, hpriv->ctx); + hl_ctx_get(hpriv->ctx); handle->ctx = hpriv->ctx; mgr = &hpriv->ctx->sig_mgr; @@ -2528,7 +2523,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, if (timestamp) *timestamp = 0; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); fence = hl_ctx_get_fence(ctx, seq); @@ -2668,7 +2663,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct multi_cs_completion *mcs_compl; struct hl_device *hdev = hpriv->hdev; - struct multi_cs_data mcs_data = {0}; + struct multi_cs_data mcs_data = {}; union hl_wait_cs_args *args = data; struct hl_ctx *ctx = hpriv->ctx; struct hl_fence **fence_arr; @@ -2719,7 +2714,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) mcs_data.fence_arr = fence_arr; mcs_data.arr_len = seq_arr_len; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); /* wait (with timeout) for the first CS to be completed */ mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); @@ -2868,12 +2863,13 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff, +static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, struct hl_cb *cq_cb, u64 ts_offset, u64 cq_offset, u64 target_value, spinlock_t *wait_list_lock, struct hl_user_pending_interrupt **pend) { + struct hl_ts_buff *ts_buff = buf->private; struct hl_user_pending_interrupt *requested_offset_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + ts_offset; @@ -2885,7 +2881,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff, /* Validate ts_offset not exceeding last max */ if (requested_offset_record > cb_last) { - dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n", + dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", (u64)(uintptr_t)cb_last); return -EINVAL; } @@ -2904,18 +2900,21 @@ start_over: list_del(&requested_offset_record->wait_list_node); spin_unlock_irqrestore(wait_list_lock, flags); - hl_ts_put(requested_offset_record->ts_reg_info.ts_buff); + hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); - dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n"); + dev_dbg(buf->mmg->dev, + "ts node removed from interrupt list now can re-use\n"); } else { - dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n"); + dev_dbg(buf->mmg->dev, + "ts node in middle of irq handling\n"); /* irq handling in the middle give it time to finish */ spin_unlock_irqrestore(wait_list_lock, flags); usleep_range(1, 10); if (++iter_counter == MAX_TS_ITER_NUM) { - dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n"); + dev_err(buf->mmg->dev, + "handling registration interrupt took too long!!\n"); return -EINVAL; } @@ -2927,7 +2926,7 @@ start_over: /* Fill up the new registration node info */ requested_offset_record->ts_reg_info.in_use = 1; - requested_offset_record->ts_reg_info.ts_buff = ts_buff; + requested_offset_record->ts_reg_info.buf = buf; requested_offset_record->ts_reg_info.cq_cb = cq_cb; requested_offset_record->ts_reg_info.timestamp_kernel_addr = (u64 *) ts_buff->user_buff_address + ts_offset; @@ -2937,21 +2936,20 @@ start_over: *pend = requested_offset_record; - dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n", + dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB(0x%llx)\n", (u64)(uintptr_t)requested_offset_record); return 0; } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr, + struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, u64 target_value, struct hl_user_interrupt *interrupt, bool register_ts_record, u64 ts_handle, u64 ts_offset, u32 *status, u64 *timestamp) { - u32 cq_patched_handle, ts_patched_handle; struct hl_user_pending_interrupt *pend; - struct hl_ts_buff *ts_buff; + struct hl_mmap_mem_buf *buf; struct hl_cb *cq_cb; unsigned long timeout, flags; long completion_rc; @@ -2959,10 +2957,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, timeout = hl_usecs64_to_jiffies(timeout_us); - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); - cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT); - cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle); + cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); if (!cq_cb) { rc = -EINVAL; goto put_ctx; @@ -2971,16 +2968,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, if (register_ts_record) { dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", interrupt->interrupt_id, ts_offset, cq_counters_offset); - - ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT); - ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle); - if (!ts_buff) { + buf = hl_mmap_mem_buf_get(mmg, ts_handle); + if (!buf) { rc = -EINVAL; goto put_cq_cb; } /* Find first available record */ - rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset, + rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, cq_counters_offset, target_value, &interrupt->wait_list_lock, &pend); if (rc) @@ -3087,7 +3082,7 @@ ts_registration_exit: return rc; put_ts_buff: - hl_ts_put(ts_buff); + hl_mmap_mem_buf_put(buf); put_cq_cb: hl_cb_put(cq_cb); put_ctx: @@ -3111,7 +3106,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ timeout = hl_usecs64_to_jiffies(timeout_us); - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); pend = kzalloc(sizeof(*pend), GFP_KERNEL); if (!pend) { @@ -3249,7 +3244,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr, + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, args->in.interrupt_timeout_us, args->in.cq_counters_handle, args->in.cq_counters_offset, args->in.target, interrupt, diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index c6360e33bce8..ed2cfd0c6e99 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -262,7 +262,7 @@ err_hw_block_mem_fini: return rc; } -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx) +void hl_ctx_get(struct hl_ctx *ctx) { kref_get(&ctx->refcount); } @@ -284,7 +284,7 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev) * immediately once we find him */ ctx = hpriv->ctx; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); break; } diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index f18495545854..c6744bfc6da4 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <linux/iommu.h> #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 @@ -125,9 +126,9 @@ static int command_buffers_show(struct seq_file *s, void *data) } seq_printf(s, " %03llu %d 0x%08x %d %d %d\n", - cb->id, cb->ctx->asid, cb->size, - kref_read(&cb->refcount), - cb->mmap, atomic_read(&cb->cs_cnt)); + cb->buf->handle, cb->ctx->asid, cb->size, + kref_read(&cb->buf->refcount), + atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt)); } spin_unlock(&dev_entry->cb_spinlock); @@ -369,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data) if (dev_entry->userptr_lookup >= userptr->addr && dev_entry->userptr_lookup < userptr->addr + userptr->size) { total_npages = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, - i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); sg_start = userptr->addr + total_npages * PAGE_SIZE; @@ -538,6 +538,39 @@ static int engines_show(struct seq_file *s, void *data) return 0; } +static ssize_t hl_memory_scrub(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 val = entry->memory_scrub_val; + int rc; + + if (!hl_device_operational(hdev, NULL)) { + dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n"); + return -EIO; + } + + mutex_lock(&hdev->fpriv_list_lock); + if (hdev->is_compute_ctx_active) { + mutex_unlock(&hdev->fpriv_list_lock); + dev_err(hdev->dev, "can't scrub dram, context exist\n"); + return -EBUSY; + } + hdev->is_in_dram_scrub = true; + mutex_unlock(&hdev->fpriv_list_lock); + + rc = hdev->asic_funcs->scrub_device_dram(hdev, val); + + mutex_lock(&hdev->fpriv_list_lock); + hdev->is_in_dram_scrub = false; + mutex_unlock(&hdev->fpriv_list_lock); + + if (rc) + return rc; + return count; +} + static bool hl_is_device_va(struct hl_device *hdev, u64 addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -647,13 +680,105 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, return rc; } +static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr, + u64 *val, enum debugfs_access_type acc_type, bool *found) +{ + size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ? + sizeof(u64) : sizeof(u32); + struct pci_mem_region *mem_reg; + int i; + + for (i = 0; i < PCI_REGION_NUMBER; i++) { + mem_reg = &hdev->pci_mem_region[i]; + if (!mem_reg->used) + continue; + if (addr >= mem_reg->region_base && + addr <= mem_reg->region_base + mem_reg->region_size - acc_size) { + *found = true; + return hdev->asic_funcs->access_dev_mem(hdev, mem_reg, i, + addr, val, acc_type); + } + } + return 0; +} + +static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 offset = prop->device_dma_offset_for_host_access; + + switch (acc_type) { + case DEBUGFS_READ32: + *val = *(u32 *) phys_to_virt(addr - offset); + break; + case DEBUGFS_WRITE32: + *(u32 *) phys_to_virt(addr - offset) = *val; + break; + case DEBUGFS_READ64: + *val = *(u64 *) phys_to_virt(addr - offset); + break; + case DEBUGFS_WRITE64: + *(u64 *) phys_to_virt(addr - offset) = *val; + break; + default: + dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type); + break; + } +} + +static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ? + sizeof(u64) : sizeof(u32); + u64 host_start = hdev->asic_prop.host_base_address; + u64 host_end = hdev->asic_prop.host_end_address; + bool user_address, found = false; + int rc; + + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, acc_size, &addr); + if (rc) + return rc; + } + + rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found); + if (rc) { + dev_err(hdev->dev, + "Failed reading addr %#llx from dev mem (%d)\n", + addr, rc); + return rc; + } + + if (found) + return 0; + + if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) { + rc = -EINVAL; + goto err; + } + + if (addr >= host_start && addr <= host_end - acc_size) { + hl_access_host_mem(hdev, addr, val, acc_type); + } else { + rc = -EINVAL; + goto err; + } + + return 0; +err: + dev_err(hdev->dev, "invalid addr %#llx\n", addr); + return rc; +} + static ssize_t hl_data_read32(struct file *f, char __user *buf, size_t count, loff_t *ppos) { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - bool user_address; + u64 value64, addr = entry->addr; char tmp_buf[32]; ssize_t rc; u32 val; @@ -666,18 +791,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32); + if (rc) return rc; - } + + val = value64; /* downcast back to 32 */ sprintf(tmp_buf, "0x%08x\n", val); return simple_read_from_buffer(buf, count, ppos, tmp_buf, @@ -689,8 +807,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - bool user_address; + u64 value64, addr = entry->addr; u32 value; ssize_t rc; @@ -703,19 +820,10 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", - value, addr); + value64 = value; + rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32); + if (rc) return rc; - } return count; } @@ -726,7 +834,6 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; - bool user_address; char tmp_buf[32]; ssize_t rc; u64 val; @@ -739,18 +846,9 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, if (*ppos) return 0; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64); + if (rc) return rc; - } sprintf(tmp_buf, "0x%016llx\n", val); return simple_read_from_buffer(buf, count, ppos, tmp_buf, @@ -763,7 +861,6 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; - bool user_address; u64 value; ssize_t rc; @@ -776,19 +873,9 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, if (rc) return rc; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", - value, addr); + rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64); + if (rc) return rc; - } return count; } @@ -829,23 +916,67 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, } /* Free the previous allocation, if there was any */ - entry->blob_desc.size = 0; - vfree(entry->blob_desc.data); + entry->data_dma_blob_desc.size = 0; + vfree(entry->data_dma_blob_desc.data); - entry->blob_desc.data = vmalloc(size); - if (!entry->blob_desc.data) + entry->data_dma_blob_desc.data = vmalloc(size); + if (!entry->data_dma_blob_desc.data) return -ENOMEM; rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size, - entry->blob_desc.data); + entry->data_dma_blob_desc.data); if (rc) { dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr); - vfree(entry->blob_desc.data); - entry->blob_desc.data = NULL; + vfree(entry->data_dma_blob_desc.data); + entry->data_dma_blob_desc.data = NULL; + return -EIO; + } + + entry->data_dma_blob_desc.size = size; + + return count; +} + +static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 size, trig; + ssize_t rc; + + if (hdev->reset_info.in_reset) { + dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 10, &trig); + if (rc) + return rc; + + if (trig != 1) { + dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n"); + return -EINVAL; + } + + size = sizeof(struct cpucp_monitor_dump); + + /* Free the previous allocation, if there was any */ + entry->mon_dump_blob_desc.size = 0; + vfree(entry->mon_dump_blob_desc.data); + + entry->mon_dump_blob_desc.data = vmalloc(size); + if (!entry->mon_dump_blob_desc.data) + return -ENOMEM; + + rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data); + if (rc) { + dev_err(hdev->dev, "Failed to dump monitors\n"); + vfree(entry->mon_dump_blob_desc.data); + entry->mon_dump_blob_desc.data = NULL; return -EIO; } - entry->blob_desc.size = size; + entry->mon_dump_blob_desc.size = size; return count; } @@ -1218,6 +1349,11 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, return count; } +static const struct file_operations hl_mem_scrub_fops = { + .owner = THIS_MODULE, + .write = hl_memory_scrub, +}; + static const struct file_operations hl_data32b_fops = { .owner = THIS_MODULE, .read = hl_data_read32, @@ -1235,6 +1371,11 @@ static const struct file_operations hl_dma_size_fops = { .write = hl_dma_size_write }; +static const struct file_operations hl_monitor_dump_fops = { + .owner = THIS_MODULE, + .write = hl_monitor_dump_trigger +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1350,8 +1491,10 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!dev_entry->entry_arr) return; - dev_entry->blob_desc.size = 0; - dev_entry->blob_desc.data = NULL; + dev_entry->data_dma_blob_desc.size = 0; + dev_entry->data_dma_blob_desc.data = NULL; + dev_entry->mon_dump_blob_desc.size = 0; + dev_entry->mon_dump_blob_desc.data = NULL; INIT_LIST_HEAD(&dev_entry->file_list); INIT_LIST_HEAD(&dev_entry->cb_list); @@ -1370,6 +1513,17 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root); + debugfs_create_x64("memory_scrub_val", + 0644, + dev_entry->root, + &dev_entry->memory_scrub_val); + + debugfs_create_file("memory_scrub", + 0200, + dev_entry->root, + dev_entry, + &hl_mem_scrub_fops); + debugfs_create_x64("addr", 0644, dev_entry->root, @@ -1470,7 +1624,18 @@ void hl_debugfs_add_device(struct hl_device *hdev) debugfs_create_blob("data_dma", 0400, dev_entry->root, - &dev_entry->blob_desc); + &dev_entry->data_dma_blob_desc); + + debugfs_create_file("monitor_dump_trig", + 0200, + dev_entry->root, + dev_entry, + &hl_monitor_dump_fops); + + debugfs_create_blob("monitor_dump", + 0400, + dev_entry->root, + &dev_entry->mon_dump_blob_desc); debugfs_create_x8("skip_reset_on_timeout", 0644, @@ -1509,7 +1674,8 @@ void hl_debugfs_remove_device(struct hl_device *hdev) mutex_destroy(&entry->file_mutex); - vfree(entry->blob_desc.data); + vfree(entry->data_dma_blob_desc.data); + vfree(entry->mon_dump_blob_desc.data); for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i) vfree(entry->state_dump[i]); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index dc9341a64541..b4f14c6d3970 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -15,6 +15,182 @@ #define HL_RESET_DELAY_USEC 10000 /* 10ms */ +/* + * hl_set_dram_bar- sets the bar to allow later access to address + * + * @hdev: pointer to habanalabs device structure + * @addr: the address the caller wants to access. + * + * @return: the old BAR base address on success, U64_MAX for failure. + * The caller should set it back to the old address after use. + * + * In case the bar space does not cover the whole address space, + * the bar base address should be set to allow access to a given address. + * This function can be called also if the bar doesn't need to be set, + * in that case it just won't change the base. + */ +static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_base_addr; + + bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + + return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); +} + + +static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type, enum pci_region region_type) +{ + struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; + u64 old_base, rc; + + if (region_type == PCI_REGION_DRAM) { + old_base = hl_set_dram_bar(hdev, addr); + if (old_base == U64_MAX) + return -EIO; + } + + switch (acc_type) { + case DEBUGFS_READ8: + *val = readb(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE8: + writeb(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_READ32: + *val = readl(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE32: + writel(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_READ64: + *val = readq(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE64: + writeq(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + } + + if (region_type == PCI_REGION_DRAM) { + rc = hl_set_dram_bar(hdev, old_base); + if (rc == U64_MAX) + return -EIO; + } + + return 0; +} + +int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int rc, i; + + rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); + if (rc) + return rc; + + /* Shift to the device's base physical address of host memory if necessary */ + if (prop->device_dma_offset_for_host_access) + for_each_sgtable_dma_sg(sgt, sg, i) + sg->dma_address += prop->device_dma_offset_for_host_access; + + return 0; +} + +void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int i; + + /* Cancel the device's base physical address of host memory if necessary */ + if (prop->device_dma_offset_for_host_access) + for_each_sgtable_dma_sg(sgt, sg, i) + sg->dma_address -= prop->device_dma_offset_for_host_access; + + dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); +} + +/* + * hl_access_cfg_region - access the config region + * + * @hdev: pointer to habanalabs device structure + * @addr: the address to access + * @val: the value to write from or read to + * @acc_type: the type of access (read/write 64/32) + */ +int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; + u32 val_h, val_l; + + if (!IS_ALIGNED(addr, sizeof(u32))) { + dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); + return -EINVAL; + } + + switch (acc_type) { + case DEBUGFS_READ32: + *val = RREG32(addr - cfg_region->region_base); + break; + case DEBUGFS_WRITE32: + WREG32(addr - cfg_region->region_base, *val); + break; + case DEBUGFS_READ64: + val_l = RREG32(addr - cfg_region->region_base); + val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); + + *val = (((u64) val_h) << 32) | val_l; + break; + case DEBUGFS_WRITE64: + WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); + WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); + break; + default: + dev_err(hdev->dev, "access type %d is not supported\n", acc_type); + return -EOPNOTSUPP; + } + + return 0; +} + +/* + * hl_access_dev_mem - access device memory + * + * @hdev: pointer to habanalabs device structure + * @region: the memory region the address belongs to + * @region_type: the type of the region the address belongs to + * @addr: the address to access + * @val: the value to write from or read to + * @acc_type: the type of access (r/w, 32/64) + */ +int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type) +{ + switch (region_type) { + case PCI_REGION_CFG: + return hl_access_cfg_region(hdev, addr, val, acc_type); + case PCI_REGION_SRAM: + case PCI_REGION_DRAM: + return hl_access_sram_dram_region(hdev, addr, val, acc_type, + region_type); + default: + return -EFAULT; + } + + return 0; +} + enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; @@ -107,6 +283,14 @@ static void hpriv_release(struct kref *ref) hdev->is_compute_ctx_active = false; mutex_unlock(&hdev->fpriv_list_lock); + hdev->compute_ctx_in_release = 0; + + /* release the eventfd */ + if (hpriv->notifier_event.eventfd) + eventfd_ctx_put(hpriv->notifier_event.eventfd); + + mutex_destroy(&hpriv->notifier_event.lock); + kfree(hpriv); } @@ -146,10 +330,11 @@ static int hl_device_release(struct inode *inode, struct file *filp) */ hl_release_pending_user_interrupts(hpriv->hdev); - hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); - hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); + hdev->compute_ctx_in_release = 1; + if (!hl_hpriv_put(hpriv)) dev_notice(hdev->dev, "User process closed FD but device still in use\n"); @@ -176,6 +361,11 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: + /* release the eventfd */ + if (hpriv->notifier_event.eventfd) + eventfd_ctx_put(hpriv->notifier_event.eventfd); + + mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -204,17 +394,15 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) } vm_pgoff = vma->vm_pgoff; - vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); switch (vm_pgoff & HL_MMAP_TYPE_MASK) { - case HL_MMAP_TYPE_CB: - return hl_cb_mmap(hpriv, vma); - case HL_MMAP_TYPE_BLOCK: + vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); return hl_hw_block_mmap(hpriv, vma); + case HL_MMAP_TYPE_CB: case HL_MMAP_TYPE_TS_BUFF: - return hl_ts_mmap(hpriv, vma); + return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); } return -EINVAL; @@ -424,18 +612,25 @@ static int device_early_init(struct hl_device *hdev) goto free_eq_wq; } + hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); + if (!hdev->pf_wq) { + dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); + rc = -ENOMEM; + goto free_ts_free_wq; + } + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_ts_free_wq; + goto free_pf_wq; } rc = hl_mmu_if_set_funcs(hdev); if (rc) goto free_chip_info; - hl_cb_mgr_init(&hdev->kernel_cb_mgr); + hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); hdev->device_reset_work.wq = create_singlethread_workqueue("hl_device_reset"); @@ -464,9 +659,11 @@ static int device_early_init(struct hl_device *hdev) return 0; free_cb_mgr: - hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); +free_pf_wq: + destroy_workqueue(hdev->pf_wq); free_ts_free_wq: destroy_workqueue(hdev->ts_free_obj_wq); free_eq_wq: @@ -503,10 +700,11 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->clk_throttling.lock); - hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr); kfree(hdev->hl_chip_info); + destroy_workqueue(hdev->pf_wq); destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -703,6 +901,9 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev, skip_wq_flush); + /* flush the MMU prefetch workqueue */ + flush_workqueue(hdev->pf_wq); + /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context */ @@ -847,10 +1048,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool put_task_struct(task); } else { - dev_warn(hdev->dev, - "Can't get task struct for PID so giving up on killing process\n"); - mutex_unlock(fd_lock); - return -ETIME; + /* + * If we got here, it means that process was killed from outside the driver + * right after it started looping on fd_list and before get_pid_task, thus + * we don't need to kill it. + */ + dev_dbg(hdev->dev, + "Can't get task struct for user process, assuming process was killed from outside the driver\n"); } } @@ -1062,9 +1266,9 @@ do_reset: if (hard_reset) dev_info(hdev->dev, "Going to reset device\n"); else if (reset_upon_device_release) - dev_info(hdev->dev, "Going to reset device after release by user\n"); + dev_dbg(hdev->dev, "Going to reset device after release by user\n"); else - dev_info(hdev->dev, "Going to reset engines of inference device\n"); + dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); } again: @@ -1270,7 +1474,10 @@ kill_processes: hdev->reset_info.needs_reset = false; - dev_notice(hdev->dev, "Successfully finished resetting the device\n"); + if (hard_reset) + dev_info(hdev->dev, "Successfully finished resetting the device\n"); + else + dev_dbg(hdev->dev, "Successfully finished resetting the device\n"); if (hard_reset) { hdev->reset_info.hard_reset_cnt++; @@ -1323,6 +1530,43 @@ out_err: return rc; } +static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event) +{ + mutex_lock(¬ifier_event->lock); + notifier_event->events_mask |= event; + if (notifier_event->eventfd) + eventfd_signal(notifier_event->eventfd, 1); + + mutex_unlock(¬ifier_event->lock); +} + +/* + * hl_notifier_event_send_all - notify all user processes via eventfd + * + * @hdev: pointer to habanalabs device structure + * @event: the occurred event + * Returns 0 for success or an error on failure. + */ +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event) +{ + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + hl_notifier_event_send(&hpriv->notifier_event, event); + + mutex_unlock(&hdev->fpriv_list_lock); + + /* control device */ + mutex_lock(&hdev->fpriv_ctrl_list_lock); + + list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) + hl_notifier_event_send(&hpriv->notifier_event, event); + + mutex_unlock(&hdev->fpriv_ctrl_list_lock); +} + /* * hl_device_init - main initialization function for habanalabs device * diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 3262126cc7ca..828a36af5b14 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -18,8 +18,9 @@ static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; + u32 ver_offset; - fw_ver = kmalloc(16, GFP_KERNEL); + fw_ver = kmalloc(VERSION_MAX_LEN, GFP_KERNEL); if (!fw_ver) return NULL; @@ -29,9 +30,10 @@ static char *extract_fw_ver_from_str(const char *fw_str) /* Skip the fw- part */ str += 3; + ver_offset = str - fw_str; /* Copy until the next whitespace */ - whitespace = strnstr(str, " ", 15); + whitespace = strnstr(str, " ", VERSION_MAX_LEN - ver_offset); if (!whitespace) goto free_fw_ver; @@ -819,6 +821,54 @@ out: return rc; } +int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) +{ + struct cpucp_monitor_dump *mon_dump_cpu_addr; + dma_addr_t mon_dump_dma_addr; + struct cpucp_packet pkt = {}; + size_t data_size; + __le32 *src_ptr; + u32 *dst_ptr; + u64 result; + int i, rc; + + data_size = sizeof(struct cpucp_monitor_dump); + mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size, + &mon_dump_dma_addr); + if (!mon_dump_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n"); + return -ENOMEM; + } + + memset(mon_dump_cpu_addr, 0, data_size); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(mon_dump_dma_addr); + pkt.data_max_size = cpu_to_le32(data_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); + goto out; + } + + /* result contains the actual size */ + src_ptr = (__le32 *) mon_dump_cpu_addr; + dst_ptr = data; + for (i = 0; i < (data_size / sizeof(u32)); i++) { + *dst_ptr = le32_to_cpu(*src_ptr); + src_ptr++; + dst_ptr++; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr); + + return rc; +} + int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters) { @@ -1539,7 +1589,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, le32_to_cpu(dyn_regs->cpu_cmd_status_to_host), status, FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status, - hdev->fw_poll_interval_usec, + hdev->fw_comms_poll_interval_usec, timeout); if (rc) { @@ -1909,7 +1959,7 @@ static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev, * @fwc: the firmware component * @fw_version: fw component's version string */ -static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, +static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc, const char *fw_version) { @@ -1933,23 +1983,33 @@ static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, VERSION_MAX_LEN); if (preboot_ver && preboot_ver != prop->preboot_ver) { strscpy(btl_ver, prop->preboot_ver, - min((int) (preboot_ver - prop->preboot_ver), - 31)); + min((int) (preboot_ver - prop->preboot_ver), 31)); dev_info(hdev->dev, "%s\n", btl_ver); } preboot_ver = extract_fw_ver_from_str(prop->preboot_ver); if (preboot_ver) { - dev_info(hdev->dev, "preboot version %s\n", - preboot_ver); + char major[8]; + int rc; + + dev_info(hdev->dev, "preboot version %s\n", preboot_ver); + sprintf(major, "%.2s", preboot_ver); kfree(preboot_ver); + + rc = kstrtou32(major, 10, &hdev->fw_major_version); + if (rc) { + dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc); + return rc; + } } break; default: dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); - return; + return -EINVAL; } + + return 0; } /** @@ -2121,9 +2181,10 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev, goto release_fw; /* read preboot version */ - hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc, + rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - + if (rc) + goto release_fw; /* update state according to boot stage */ if (cur_fwc == FW_COMP_BOOT_FIT) { @@ -2390,9 +2451,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, goto protocol_err; /* read preboot version */ - hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, + return hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - return 0; } /* load boot fit to FW */ diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 1edaf6ab67bd..b0b0f3f89865 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -21,6 +21,7 @@ #include <linux/hashtable.h> #include <linux/debugfs.h> #include <linux/rwsem.h> +#include <linux/eventfd.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -61,8 +62,10 @@ #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */ #define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ +#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ @@ -394,18 +397,8 @@ enum hl_device_hw_state { * struct hl_mmu_properties - ASIC specific MMU address translation properties. * @start_addr: virtual start address of the memory region. * @end_addr: virtual end address of the memory region. - * @hop0_shift: shift of hop 0 mask. - * @hop1_shift: shift of hop 1 mask. - * @hop2_shift: shift of hop 2 mask. - * @hop3_shift: shift of hop 3 mask. - * @hop4_shift: shift of hop 4 mask. - * @hop5_shift: shift of hop 5 mask. - * @hop0_mask: mask to get the PTE address in hop 0. - * @hop1_mask: mask to get the PTE address in hop 1. - * @hop2_mask: mask to get the PTE address in hop 2. - * @hop3_mask: mask to get the PTE address in hop 3. - * @hop4_mask: mask to get the PTE address in hop 4. - * @hop5_mask: mask to get the PTE address in hop 5. + * @hop_shifts: array holds HOPs shifts. + * @hop_masks: array holds HOPs masks. * @last_mask: mask to get the bit indicating this is the last hop. * @pgt_size: size for page tables. * @page_size: default page size used to allocate memory. @@ -418,18 +411,8 @@ enum hl_device_hw_state { struct hl_mmu_properties { u64 start_addr; u64 end_addr; - u64 hop0_shift; - u64 hop1_shift; - u64 hop2_shift; - u64 hop3_shift; - u64 hop4_shift; - u64 hop5_shift; - u64 hop0_mask; - u64 hop1_mask; - u64 hop2_mask; - u64 hop3_mask; - u64 hop4_mask; - u64 hop5_mask; + u64 hop_shifts[MMU_HOP_MAX]; + u64 hop_masks[MMU_HOP_MAX]; u64 last_mask; u64 pgt_size; u32 page_size; @@ -486,8 +469,10 @@ struct hl_hints_range { * the device's MMU. * @dram_hints_align_mask: dram va hint addresses alignment mask which is used * for hints validity check. - * device_dma_offset_for_host_access: the offset to add to host DMA addresses - * to enable the device to access them. + * @device_dma_offset_for_host_access: the offset to add to host DMA addresses + * to enable the device to access them. + * @host_base_address: host physical start address for host DMA from device + * @host_end_address: host physical end address for host DMA from device * @max_freq_value: current max clk frequency. * @clk_pll_index: clock PLL index that specify which PLL determines the clock * we display to the user @@ -528,6 +513,10 @@ struct hl_hints_range { * @fw_app_cpu_boot_dev_sts1: bitmap representation of application security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS1 + * @device_mem_alloc_default_page_size: may be different than dram_page_size only for ASICs for + * which the property supports_user_set_page_size is true + * (i.e. the DRAM supports multiple page sizes), otherwise + * it will shall be equal to dram_page_size. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -568,6 +557,7 @@ struct hl_hints_range { * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. + * @dma_mask: the dma mask to be set for this device */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -599,6 +589,8 @@ struct asic_fixed_properties { u64 cb_va_end_addr; u64 dram_hints_align_mask; u64 device_dma_offset_for_host_access; + u64 host_base_address; + u64 host_end_address; u64 max_freq_value; u32 clk_pll_index; u32 mmu_pgt_size; @@ -626,6 +618,7 @@ struct asic_fixed_properties { u32 fw_bootfit_cpu_boot_dev_sts1; u32 fw_app_cpu_boot_dev_sts0; u32 fw_app_cpu_boot_dev_sts1; + u32 device_mem_alloc_default_page_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -654,6 +647,7 @@ struct asic_fixed_properties { u8 configurable_stop_on_err; u8 set_max_power_on_device_init; u8 supports_user_set_page_size; + u8 dma_mask; }; /** @@ -711,85 +705,102 @@ struct hl_cs_compl { */ /** - * struct hl_cb_mgr - describes a Command Buffer Manager. - * @cb_lock: protects cb_handles. - * @cb_handles: an idr to hold all command buffer handles. - */ -struct hl_cb_mgr { - spinlock_t cb_lock; - struct idr cb_handles; /* protected by cb_lock */ -}; - -/** - * struct hl_ts_mgr - describes the timestamp registration memory manager. - * @ts_lock: protects ts_handles. - * @ts_handles: an idr to hold all ts bufferes handles. - */ -struct hl_ts_mgr { - spinlock_t ts_lock; - struct idr ts_handles; -}; - -/** * struct hl_ts_buff - describes a timestamp buffer. - * @refcount: reference counter for usage of the buffer. - * @hdev: pointer to device this buffer belongs to. - * @mmap: true if the buff is currently mapped to user. * @kernel_buff_address: Holds the internal buffer's kernel virtual address. * @user_buff_address: Holds the user buffer's kernel virtual address. - * @id: the buffer ID. - * @mmap_size: Holds the buffer size that was mmaped. * @kernel_buff_size: Holds the internal kernel buffer size. - * @user_buff_size: Holds the user buffer size. */ struct hl_ts_buff { - struct kref refcount; - struct hl_device *hdev; - atomic_t mmap; void *kernel_buff_address; void *user_buff_address; - u32 id; - u32 mmap_size; u32 kernel_buff_size; - u32 user_buff_size; +}; + +struct hl_mmap_mem_buf; + +/** + * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks. + * @dev: back pointer to the owning device + * @lock: protects handles + * @handles: an idr holding all active handles to the memory buffers in the system. + */ +struct hl_mem_mgr { + struct device *dev; + spinlock_t lock; + struct idr handles; +}; + +/** + * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior + * @topic: string identifier used for logging + * @mem_id: memory type identifier, embedded in the handle and used to identify + * the memory type by handle. + * @alloc: callback executed on buffer allocation, shall allocate the memory, + * set it under buffer private, and set mappable size. + * @mmap: callback executed on mmap, must map the buffer to vma + * @release: callback executed on release, must free the resources used by the buffer + */ +struct hl_mmap_mem_buf_behavior { + const char *topic; + u64 mem_id; + + int (*alloc)(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args); + int (*mmap)(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args); + void (*release)(struct hl_mmap_mem_buf *buf); +}; + +/** + * struct hl_mmap_mem_buf - describes a single unified memory buffer + * @behavior: buffer behavior + * @mmg: back pointer to the unified memory manager + * @refcount: reference counter for buffer users + * @private: pointer to buffer behavior private data + * @mmap: atomic boolean indicating whether or not the buffer is mapped right now + * @real_mapped_size: the actual size of buffer mapped, after part of it may be released, + * may change at runtime. + * @mappable_size: the original mappable size of the buffer, does not change after + * the allocation. + * @handle: the buffer id in mmg handles store + */ +struct hl_mmap_mem_buf { + struct hl_mmap_mem_buf_behavior *behavior; + struct hl_mem_mgr *mmg; + struct kref refcount; + void *private; + atomic_t mmap; + u64 real_mapped_size; + u64 mappable_size; + u64 handle; }; /** * struct hl_cb - describes a Command Buffer. - * @refcount: reference counter for usage of the CB. * @hdev: pointer to device this CB belongs to. * @ctx: pointer to the CB owner's context. - * @lock: spinlock to protect mmap flows. + * @buf: back pointer to the parent mappable memory buffer * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to * the device's MMU. - * @id: the CB's ID. * @kernel_address: Holds the CB's kernel virtual address. * @bus_address: Holds the CB's DMA address. - * @mmap_size: Holds the CB's size that was mmaped. * @size: holds the CB's size. * @cs_cnt: holds number of CS that this CB participates in. - * @mmap: true if the CB is currently mmaped to user. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internaly allocated * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { - struct kref refcount; struct hl_device *hdev; struct hl_ctx *ctx; - spinlock_t lock; + struct hl_mmap_mem_buf *buf; struct list_head debugfs_list; struct list_head pool_list; struct list_head va_block_list; - u64 id; void *kernel_address; dma_addr_t bus_address; - u32 mmap_size; u32 size; atomic_t cs_cnt; - u8 mmap; u8 is_pool; u8 is_internal; u8 is_mmu_mapped; @@ -935,12 +946,12 @@ struct hl_user_interrupt { * struct timestamp_reg_free_node - holds the timestamp registration free objects node * @free_objects_node: node in the list free_obj_jobs * @cq_cb: pointer to cq command buffer to be freed - * @ts_buff: pointer to timestamp buffer to be freed + * @buf: pointer to timestamp buffer to be freed */ struct timestamp_reg_free_node { struct list_head free_objects_node; struct hl_cb *cq_cb; - struct hl_ts_buff *ts_buff; + struct hl_mmap_mem_buf *buf; }; /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job @@ -957,8 +968,8 @@ struct timestamp_reg_work_obj { }; /* struct timestamp_reg_info - holds the timestamp registration related data. - * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers. - * relevant only when doing timestamps records registration. + * @buf: pointer to the timestamp buffer which include both user/kernel buffers. + * relevant only when doing timestamps records registration. * @cq_cb: pointer to CQ counter CB. * @timestamp_kernel_addr: timestamp handle address, where to set timestamp * relevant only when doing timestamps records @@ -969,7 +980,7 @@ struct timestamp_reg_work_obj { * allocating records dynamically. */ struct timestamp_reg_info { - struct hl_ts_buff *ts_buff; + struct hl_mmap_mem_buf *buf; struct hl_cb *cq_cb; u64 *timestamp_kernel_addr; u8 in_use; @@ -1068,6 +1079,15 @@ enum div_select_defs { DIV_SEL_DIVIDED_PLL = 3, }; +enum debugfs_access_type { + DEBUGFS_READ8, + DEBUGFS_WRITE8, + DEBUGFS_READ32, + DEBUGFS_WRITE32, + DEBUGFS_READ64, + DEBUGFS_WRITE64, +}; + enum pci_region { PCI_REGION_CFG, PCI_REGION_SRAM, @@ -1229,6 +1249,7 @@ struct fw_load_mgr { * its implementation is not trivial when the driver * is loaded in simulation mode (not upstreamed). * @scrub_device_mem: Scrub device memory given an address and size + * @scrub_device_dram: Scrub the dram memory of the device. * @get_int_queue_base: get the internal queue base address. * @test_queues: run simple test on all queues for sanity check. * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. @@ -1236,18 +1257,14 @@ struct fw_load_mgr { * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @hl_dma_unmap_sg: DMA unmap scatter-gather list. + * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table. * @cs_parser: parse Command Submission. - * @asic_dma_map_sg: DMA map scatter-gather list. + * @asic_dma_map_sgtable: DMA map scatter-gather table. * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory. - * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory. - * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory. * @debugfs_read_dma: debug interface for reading up to 2MB from the device's * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. @@ -1257,8 +1274,8 @@ struct fw_load_mgr { * @write_pte: write MMU page table entry to DRAM. * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft * (L1 only) or hard (L0 & L1) flush. - * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with - * ASID-VA-size mask. + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask. + * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask. * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. @@ -1267,6 +1284,7 @@ struct fw_load_mgr { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. + * @get_monitor_dump: retrieve monitor registers dump from F/W. * @send_cpu_message: send message to F/W. If the message is timedout, the * driver will eventually reset the device. The timeout can * be determined by the calling function or it can be 0 and @@ -1289,8 +1307,6 @@ struct fw_load_mgr { * @gen_wait_cb: Generate a wait CB. * @reset_sob: Reset a SOB. * @reset_sob_group: Reset SOB group - * @set_dma_mask_from_fw: set the DMA mask in the driver according to the - * firmware configuration * @get_device_time: Get the device time. * @collective_wait_init_cs: Generate collective master/slave packets * and place them in the relevant cs jobs @@ -1319,6 +1335,9 @@ struct fw_load_mgr { * @get_stream_master_qid_arr: get pointer to stream masters QID array * @is_valid_dram_page_size: return true if page size is supported in device * memory allocation, otherwise false. + * @get_valid_dram_page_orders: get valid device memory allocation page orders + * @access_dev_mem: access device memory + * @set_dram_bar_base: set the base of the DRAM BAR */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1342,6 +1361,7 @@ struct hl_asic_funcs { void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle); int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size); + int (*scrub_device_dram)(struct hl_device *hdev, u64 val); void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, dma_addr_t *dma_handle, u16 *queue_len); int (*test_queues)(struct hl_device *hdev); @@ -1353,12 +1373,11 @@ struct hl_asic_funcs { size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, size_t size, void *vaddr); - void (*hl_dma_unmap_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, + void (*hl_dma_unmap_sgtable)(struct hl_device *hdev, + struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, + int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); u32 (*get_dma_desc_list_size)(struct hl_device *hdev, struct sg_table *sgt); @@ -1369,14 +1388,6 @@ struct hl_asic_funcs { void (*update_eq_ci)(struct hl_device *hdev, u32 val); int (*context_switch)(struct hl_device *hdev, u32 asid); void (*restore_phase_topology)(struct hl_device *hdev); - int (*debugfs_read32)(struct hl_device *hdev, u64 addr, - bool user_address, u32 *val); - int (*debugfs_write32)(struct hl_device *hdev, u64 addr, - bool user_address, u32 val); - int (*debugfs_read64)(struct hl_device *hdev, u64 addr, - bool user_address, u64 *val); - int (*debugfs_write64)(struct hl_device *hdev, u64 addr, - bool user_address, u64 val); int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, @@ -1391,6 +1402,7 @@ struct hl_asic_funcs { u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); + int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, @@ -1399,8 +1411,8 @@ struct hl_asic_funcs { void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); u32 (*get_pci_id)(struct hl_device *hdev); - int (*get_eeprom_data)(struct hl_device *hdev, void *data, - size_t max_size); + int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); + int (*get_monitor_dump)(struct hl_device *hdev, void *data); int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, u64 *result); int (*pci_bars_map)(struct hl_device *hdev); @@ -1421,7 +1433,6 @@ struct hl_asic_funcs { struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group); - void (*set_dma_mask_from_fw)(struct hl_device *hdev); u64 (*get_device_time)(struct hl_device *hdev); int (*collective_wait_init_cs)(struct hl_cs *cs); int (*collective_wait_create_jobs)(struct hl_device *hdev, @@ -1445,6 +1456,12 @@ struct hl_asic_funcs { void (*set_pci_memory_regions)(struct hl_device *hdev); u32* (*get_stream_master_qid_arr)(void); bool (*is_valid_dram_page_size)(u32 page_size); + int (*mmu_get_real_page_size)(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr); + void (*get_valid_dram_page_orders)(struct hl_info_dev_memalloc_page_sizes *info); + int (*access_dev_mem)(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); + u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); }; @@ -1915,6 +1932,18 @@ struct hl_debug_params { bool enable; }; +/** + * struct hl_notifier_event - holds the notifier data structure + * @eventfd: the event file descriptor to raise the notifications + * @lock: mutex lock to protect the notifier data flows + * @events_mask: indicates the bitmap events + */ +struct hl_notifier_event { + struct eventfd_ctx *eventfd; + struct mutex lock; + u64 events_mask; +}; + /* * FILE PRIVATE STRUCTURE */ @@ -1926,25 +1955,25 @@ struct hl_debug_params { * @taskpid: current process ID. * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. - * @cb_mgr: command buffer manager to handle multiple buffers for this FD. - * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers. + * @mem_mgr: manager descriptor for memory exportable via mmap + * @notifier_event: notifier eventfd towards user process * @debugfs_list: list of relevant ASIC debugfs. * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. */ struct hl_fpriv { - struct hl_device *hdev; - struct file *filp; - struct pid *taskpid; - struct hl_ctx *ctx; - struct hl_ctx_mgr ctx_mgr; - struct hl_cb_mgr cb_mgr; - struct hl_ts_mgr ts_mem_mgr; - struct list_head debugfs_list; - struct list_head dev_node; - struct kref refcount; - struct mutex restore_phase_mutex; + struct hl_device *hdev; + struct file *filp; + struct pid *taskpid; + struct hl_ctx *ctx; + struct hl_ctx_mgr ctx_mgr; + struct hl_mem_mgr mem_mgr; + struct hl_notifier_event notifier_event; + struct list_head debugfs_list; + struct list_head dev_node; + struct kref refcount; + struct mutex restore_phase_mutex; }; @@ -1992,12 +2021,14 @@ struct hl_debugfs_entry { * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. - * @blob_desc: descriptor of blob + * @data_dma_blob_desc: data DMA descriptor of blob. + * @mon_dump_blob_desc: monitor dump descriptor of blob. * @state_dump: data of the system states in case of a bad cs. * @state_dump_sem: protects state_dump. * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. * @userptr_lookup: the target user ptr to look up for on demand. + * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram * @mmu_asid: ASID to use while translating in mmu_show. * @state_dump_head: index of the latest state dump * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. @@ -2021,12 +2052,14 @@ struct hl_dbg_device_entry { spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; - struct debugfs_blob_wrapper blob_desc; + struct debugfs_blob_wrapper data_dma_blob_desc; + struct debugfs_blob_wrapper mon_dump_blob_desc; char *state_dump[HL_STATE_DUMP_HIST_LEN]; struct rw_semaphore state_dump_sem; u64 addr; u64 mmu_addr; u64 userptr_lookup; + u64 memory_scrub_val; u32 mmu_asid; u32 state_dump_head; u8 i2c_bus; @@ -2442,6 +2475,24 @@ struct hl_mmu_funcs { }; /** + * struct hl_prefetch_work - prefetch work structure handler + * @pf_work: actual work struct. + * @ctx: compute context. + * @va: virtual address to pre-fetch. + * @size: pre-fetch size. + * @flags: operation flags. + * @asid: ASID for maintenance operation. + */ +struct hl_prefetch_work { + struct work_struct pf_work; + struct hl_ctx *ctx; + u64 va; + u64 size; + u32 flags; + u32 asid; +}; + +/* * number of user contexts allowed to call wait_for_multi_cs ioctl in * parallel */ @@ -2517,37 +2568,50 @@ struct hl_clk_throttle { }; /** - * struct last_error_session_info - info about last session in which CS timeout or - * razwi error occurred. - * @open_dev_timestamp: device open timestamp. - * @cs_timeout_timestamp: CS timeout timestamp. - * @razwi_timestamp: razwi timestamp. - * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the - * first (root cause) CS timeout will not be overwritten. - * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the - * first (root cause) razwi will not be overwritten. - * @cs_timeout_seq: CS timeout sequence number. - * @razwi_addr: address that caused razwi. - * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does - * not have engine id it will be set to U16_MAX. - * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id. - * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + * struct cs_timeout_info - info of last CS timeout occurred. + * @timestamp: CS timeout timestamp. + * @write_disable: if set writing to CS parameters in the structure is disabled so, + * the first (root cause) CS timeout will not be overwritten. + * @seq: CS timeout sequence number. + */ +struct cs_timeout_info { + ktime_t timestamp; + atomic_t write_disable; + u64 seq; +}; + +/** + * struct razwi_info - info about last razwi error occurred. + * @timestamp: razwi timestamp. + * @write_disable: if set writing to razwi parameters in the structure is disabled so the + * first (root cause) razwi will not be overwritten. + * @addr: address that caused razwi. + * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does + * not have engine id it will be set to U16_MAX. + * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @non_engine_initiator: in case the initiator of the razwi does not have engine id. + * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + */ +struct razwi_info { + ktime_t timestamp; + atomic_t write_disable; + u64 addr; + u16 engine_id_1; + u16 engine_id_2; + u8 non_engine_initiator; + u8 type; +}; + +/** + * struct last_error_session_info - info about last session errors occurred. + * @cs_timeout: CS timeout error last information. + * @razwi: razwi last information. */ struct last_error_session_info { - ktime_t open_dev_timestamp; - ktime_t cs_timeout_timestamp; - ktime_t razwi_timestamp; - atomic_t cs_write_disable; - atomic_t razwi_write_disable; - u64 cs_timeout_seq; - u64 razwi_addr; - u16 razwi_engine_id_1; - u16 razwi_engine_id_2; - u8 razwi_non_engine_initiator; - u8 razwi_type; + struct cs_timeout_info cs_timeout; + struct razwi_info razwi; }; /** @@ -2614,11 +2678,12 @@ struct hl_reset_info { * context. * @eq_wq: work queue of event queue for executing work in process context. * @ts_free_obj_wq: work queue for timestamp registration objects release. + * @pf_wq: work queue for MMU pre-fetch operations. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. * @cs_mirror_lock: protects cs_mirror_list. - * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CBs. + * @kernel_mem_mgr: memory manager for memory buffers with lifespan of driver. * @event_queue: event queue for IRQ from CPU-CP. * @dma_pool: DMA pool for small allocations. * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address. @@ -2656,9 +2721,10 @@ struct hl_reset_info { * @state_dump_specs: constants and dictionaries needed to dump system state. * @multi_cs_completion: array of multi-CS completion. * @clk_throttling: holds information about current/previous clock throttling events - * @reset_info: holds current device reset information. * @last_error: holds information about last session in which CS timeout or razwi error occurred. + * @reset_info: holds current device reset information. * @stream_master_qid_arr: pointer to array with QIDs of master streams. + * @fw_major_version: major version of current loaded preboot * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2678,6 +2744,9 @@ struct hl_reset_info { * session. * @open_counter: number of successful device open operations. * @fw_poll_interval_usec: FW status poll interval in usec. + * used for CPU boot status + * @fw_comms_poll_interval_usec: FW comms/protocol poll interval in usec. + * used for COMMs protocols cmds(COMMS_STS_*) * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. @@ -2686,6 +2755,7 @@ struct hl_reset_info { * @id_control: minor of the control device * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. + * @is_in_dram_scrub: true if dram scrub operation is on going. * @disabled: is device disabled. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. @@ -2699,7 +2769,6 @@ struct hl_reset_info { * huge pages. * @init_done: is the initialization of the device done. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) - * @dma_mask: the dma mask that was set for this device * @in_debug: whether the device is in a state where the profiling/tracing infrastructure * can be used. This indication is needed because in some ASICs we need to do * specific operations to enable that infrastructure. @@ -2721,6 +2790,8 @@ struct hl_reset_info { * cases where Linux was not loaded to device CPU * @supports_wait_for_multi_cs: true if wait for multi CS is supported * @is_compute_ctx_active: Whether there is an active compute context executing. + * @compute_ctx_in_release: true if the current compute context is being released. + * @supports_mmu_prefetch: true if prefetch is supported, otherwise false. */ struct hl_device { struct pci_dev *pdev; @@ -2742,11 +2813,12 @@ struct hl_device { struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; struct workqueue_struct *ts_free_obj_wq; + struct workqueue_struct *pf_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; spinlock_t cs_mirror_lock; - struct hl_cb_mgr kernel_cb_mgr; + struct hl_mem_mgr kernel_mem_mgr; struct hl_eq event_queue; struct dma_pool *dma_pool; void *cpu_accessible_dma_mem; @@ -2797,6 +2869,7 @@ struct hl_device { struct hl_reset_info reset_info; u32 *stream_master_qid_arr; + u32 fw_major_version; atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2807,12 +2880,15 @@ struct hl_device { u64 open_counter; u64 fw_poll_interval_usec; ktime_t last_successful_open_ktime; + u64 fw_comms_poll_interval_usec; + enum cpucp_card_types card_type; u32 major; u32 high_pll; u16 id; u16 id_control; u16 cpu_pci_msb_addr; + u8 is_in_dram_scrub; u8 disabled; u8 late_init_done; u8 hwmon_initialized; @@ -2823,7 +2899,6 @@ struct hl_device { u8 pmmu_huge_range; u8 init_done; u8 device_cpu_disabled; - u8 dma_mask; u8 in_debug; u8 cdev_sysfs_created; u8 stop_on_err; @@ -2839,6 +2914,8 @@ struct hl_device { u8 supports_wait_for_multi_cs; u8 stream_master_qid_arr_size; u8 is_compute_ctx_active; + u8 compute_ctx_in_release; + u8 supports_mmu_prefetch; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -2971,6 +3048,14 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, return ((address <= range_end_address) && (range_start_address <= end_address)); } +uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); +int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); +void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir); +int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type); +int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_device_open(struct inode *inode, struct file *filp); int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, @@ -3013,7 +3098,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv); void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx); int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); void hl_ctx_do_release(struct kref *ref); -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); +void hl_ctx_get(struct hl_ctx *ctx); int hl_ctx_put(struct hl_ctx *ctx); struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev); struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); @@ -3034,23 +3119,21 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event); + int hl_sysfs_init(struct hl_device *hdev); void hl_sysfs_fini(struct hl_device *hdev); int hl_hwmon_init(struct hl_device *hdev); void hl_hwmon_fini(struct hl_device *hdev); -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, +int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, bool map_cb, u64 *handle); -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle); int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle); +struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle); void hl_cb_put(struct hl_cb *cb); -void hl_cb_mgr_init(struct hl_cb_mgr *mgr); -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, bool internal_cb); int hl_cb_pool_init(struct hl_device *hdev); @@ -3104,6 +3187,8 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx); int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte); +int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr); int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte); int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, @@ -3112,6 +3197,7 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte); u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, u8 hop_idx, u64 hop_addr, u64 virt_addr); @@ -3149,6 +3235,7 @@ int hl_fw_cpucp_handshake(struct hl_device *hdev, u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg, u32 boot_err1_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, @@ -3224,11 +3311,19 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset, const char *format, ...); char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); -void hl_ts_mgr_init(struct hl_ts_mgr *mgr); -void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr); -int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); -struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle); -void hl_ts_put(struct hl_ts_buff *buff); + +void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, + void *args); +struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, + u64 handle); +int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle); +int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf); +struct hl_mmap_mem_buf * +hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, + struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, + void *args); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index ca404ed9d9a7..37edb69a7255 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -134,13 +134,14 @@ int hl_device_open(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; + + mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); kref_init(&hpriv->refcount); nonseekable_open(inode, filp); - hl_cb_mgr_init(&hpriv->cb_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr); - hl_ts_mgr_init(&hpriv->ts_mem_mgr); + hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); @@ -150,7 +151,28 @@ int hl_device_open(struct inode *inode, struct file *filp) dev_err_ratelimited(hdev->dev, "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); - rc = -EPERM; + + if (status == HL_DEVICE_STATUS_IN_RESET) + rc = -EAGAIN; + else + rc = -EPERM; + + goto out_err; + } + + if (hdev->is_in_dram_scrub) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s during dram scrub\n", + dev_name(hdev->dev)); + rc = -EAGAIN; + goto out_err; + } + + if (hdev->compute_ctx_in_release) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s because another user is still releasing it\n", + dev_name(hdev->dev)); + rc = -EAGAIN; goto out_err; } @@ -173,8 +195,8 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_write_disable, 0); - atomic_set(&hdev->last_error.razwi_write_disable, 0); + atomic_set(&hdev->last_error.cs_timeout.write_disable, 0); + atomic_set(&hdev->last_error.razwi.write_disable, 0); hdev->open_counter++; hdev->last_successful_open_jif = jiffies; @@ -184,11 +206,11 @@ int hl_device_open(struct inode *inode, struct file *filp) out_err: mutex_unlock(&hdev->fpriv_list_lock); - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; mutex_destroy(&hpriv->restore_phase_mutex); + mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -222,9 +244,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; + + mutex_init(&hpriv->notifier_event.lock); nonseekable_open(inode, filp); - hpriv->taskpid = find_get_pid(current->pid); + hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); mutex_lock(&hdev->fpriv_ctrl_list_lock); @@ -288,6 +312,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; + hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; hdev->stop_on_err = true; hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; @@ -296,9 +321,6 @@ static int fixup_device_params(struct hl_device *hdev) /* Enable only after the initialization of the device */ hdev->disabled = true; - /* Set default DMA mask to 32 bits */ - hdev->dma_mask = 32; - return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index c13a3c2a7013..c7864d6bb0a1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -76,6 +76,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; hw_ip.dram_page_size = prop->dram_page_size; + hw_ip.device_mem_alloc_default_page_size = prop->device_mem_alloc_default_page_size; hw_ip.num_of_events = prop->num_of_events; memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version, @@ -115,6 +116,23 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate, return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; } +static int events_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + u32 max_size = args->return_size; + u64 events_mask; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((max_size < sizeof(u64)) || (!out)) + return -EINVAL; + + mutex_lock(&hpriv->notifier_event.lock); + events_mask = hpriv->notifier_event.events_mask; + hpriv->notifier_event.events_mask = 0; + mutex_unlock(&hpriv->notifier_event.lock); + + return copy_to_user(out, &events_mask, sizeof(u64)) ? -EFAULT : 0; +} + static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; @@ -497,6 +515,8 @@ static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args) open_stats_info.last_open_period_ms = jiffies64_to_msecs( hdev->last_open_session_duration_jif); open_stats_info.open_counter = hdev->open_counter; + open_stats_info.is_compute_ctx_active = hdev->is_compute_ctx_active; + open_stats_info.compute_ctx_in_release = hdev->compute_ctx_in_release; return copy_to_user(out, &open_stats_info, min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0; @@ -549,7 +569,7 @@ static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *a if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp); + info.timestamp = ktime_to_ns(hdev->last_successful_open_ktime); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -564,8 +584,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.seq = hdev->last_error.cs_timeout_seq; - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp); + info.seq = hdev->last_error.cs_timeout.seq; + info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -580,16 +600,74 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp); - info.addr = hdev->last_error.razwi_addr; - info.engine_id_1 = hdev->last_error.razwi_engine_id_1; - info.engine_id_2 = hdev->last_error.razwi_engine_id_2; - info.no_engine_id = hdev->last_error.razwi_non_engine_initiator; - info.error_type = hdev->last_error.razwi_type; + info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); + info.addr = hdev->last_error.razwi.addr; + info.engine_id_1 = hdev->last_error.razwi.engine_id_1; + info.engine_id_2 = hdev->last_error.razwi.engine_id_2; + info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; + info.error_type = hdev->last_error.razwi.type; + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + +static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct hl_info_dev_memalloc_page_sizes info = {0}; + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + + if ((!max_size) || (!out)) + return -EINVAL; + + /* + * Future ASICs that will support multiple DRAM page sizes will support only "powers of 2" + * pages (unlike some of the ASICs before supporting multiple page sizes). + * For this reason for all ASICs that not support multiple page size the function will + * return an empty bitmask indicating that multiple page sizes is not supported. + */ + hdev->asic_funcs->get_valid_dram_page_orders(&info); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + int rc; + + /* check if there is already a registered on that process */ + mutex_lock(&hpriv->notifier_event.lock); + if (hpriv->notifier_event.eventfd) { + mutex_unlock(&hpriv->notifier_event.lock); + return -EINVAL; + } + + hpriv->notifier_event.eventfd = eventfd_ctx_fdget(args->eventfd); + if (IS_ERR(hpriv->notifier_event.eventfd)) { + rc = PTR_ERR(hpriv->notifier_event.eventfd); + hpriv->notifier_event.eventfd = NULL; + mutex_unlock(&hpriv->notifier_event.lock); + return rc; + } + + mutex_unlock(&hpriv->notifier_event.lock); + return 0; +} + +static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + mutex_lock(&hpriv->notifier_event.lock); + if (!hpriv->notifier_event.eventfd) { + mutex_unlock(&hpriv->notifier_event.lock); + return -EINVAL; + } + + eventfd_ctx_put(hpriv->notifier_event.eventfd); + hpriv->notifier_event.eventfd = NULL; + mutex_unlock(&hpriv->notifier_event.lock); + return 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -640,6 +718,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_RAZWI_EVENT: return razwi_info(hpriv, args); + case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES: + return dev_mem_alloc_page_sizes_info(hpriv, args); + + case HL_INFO_GET_EVENTS: + return events_info(hpriv, args); + default: break; } @@ -690,6 +774,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DRAM_PENDING_ROWS: return dram_pending_rows_info(hpriv, args); + case HL_INFO_REGISTER_EVENTFD: + return eventfd_register(hpriv, args); + + case HL_INFO_UNREGISTER_EVENTFD: + return eventfd_unregister(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index e2bc128f2291..8500e15ef743 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -152,11 +152,11 @@ static void hl_ts_free_objects(struct work_struct *work) struct hl_device *hdev = job->hdev; list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { - dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n", - free_obj->ts_buff, + dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n", + free_obj->buf, free_obj->cq_cb); - hl_ts_put(free_obj->ts_buff); + hl_mmap_mem_buf_put(free_obj->buf); hl_cb_put(free_obj->cq_cb); kfree(free_obj); } @@ -210,7 +210,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi /* Putting the refcount for ts_buff and cq_cb objects will be handled * in workqueue context, just add job to free_list. */ - free_node->ts_buff = pend->ts_reg_info.ts_buff; + free_node->buf = pend->ts_reg_info.buf; free_node->cq_cb = pend->ts_reg_info.cq_cb; list_add(&free_node->free_objects_node, *free_list); @@ -244,7 +244,7 @@ static void handle_user_cq(struct hl_device *hdev, list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) { if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { - if (pend->ts_reg_info.ts_buff) { + if (pend->ts_reg_info.buf) { if (!reg_node_handle_fail) { rc = handle_registration_node(hdev, pend, &ts_reg_free_list_head); @@ -282,10 +282,6 @@ irqreturn_t hl_irq_handler_user_cq(int irq, void *arg) struct hl_user_interrupt *user_cq = arg; struct hl_device *hdev = user_cq->hdev; - dev_dbg(hdev->dev, - "got user completion interrupt id %u", - user_cq->interrupt_id); - /* Handle user cq interrupts registered on all interrupts */ handle_user_cq(hdev, &hdev->common_user_interrupt); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index a13506dd8119..663dd7e589d4 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -41,7 +41,7 @@ static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u return -EINVAL; } } else { - psize = hdev->asic_prop.dram_page_size; + psize = prop->device_mem_alloc_default_page_size; } *page_size = psize; @@ -117,7 +117,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, - "failed to allocate %llu contiguous pages with total size of %llu\n", + "Cannot allocate %llu contiguous pages with total size of %llu\n", num_pgs, total_size); return -ENOMEM; } @@ -156,9 +156,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, else phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, page_size); + if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, - "Failed to allocate device memory (out of memory)\n"); + "Cannot allocate device memory (out of memory)\n"); rc = -ENOMEM; goto page_err; } @@ -237,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, goto pin_err; } - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, DMA_BIDIRECTIONAL); - if (rc) { - dev_err(hdev->dev, "failed to map sgt with DMA region\n"); - goto dma_map_err; - } - userptr->dma_mapped = true; userptr->dir = DMA_BIDIRECTIONAL; userptr->vm_type = VM_TYPE_USERPTR; *p_userptr = userptr; + rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); + if (rc) { + dev_err(hdev->dev, "failed to map sgt with DMA region\n"); + goto dma_map_err; + } + return 0; dma_map_err: @@ -900,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, * consecutive block. */ total_npages = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); total_npages += npages; @@ -929,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->total_size = total_npages * page_size; j = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); /* align down to physical page size and save the offset */ @@ -1102,21 +1102,24 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, * map a device virtual block to this pages and return the start address of * this block. */ -static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *device_addr) +static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) { - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; struct hl_vm_phys_pg_pack *phys_pg_pack; + enum hl_va_range_type va_range_type = 0; + struct hl_device *hdev = ctx->hdev; struct hl_userptr *userptr = NULL; + u32 handle = 0, va_block_align; struct hl_vm_hash_node *hnode; + struct hl_vm *vm = &hdev->vm; struct hl_va_range *va_range; - enum vm_type *vm_type; + bool is_userptr, do_prefetch; u64 ret_vaddr, hint_addr; - u32 handle = 0, va_block_align; + enum vm_type *vm_type; int rc; - bool is_userptr = args->flags & HL_MEM_USERPTR; - enum hl_va_range_type va_range_type = 0; + + /* set map flags */ + is_userptr = args->flags & HL_MEM_USERPTR; + do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); /* Assume failure */ *device_addr = 0; @@ -1241,19 +1244,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { - mutex_unlock(&ctx->mmu_lock); - dev_err(hdev->dev, "mapping page pack failed for handle %u\n", - handle); + dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); + if (rc) + goto map_err; mutex_unlock(&ctx->mmu_lock); - if (rc) - goto map_err; + /* + * prefetch is done upon user's request. it is performed in WQ as and so can + * be outside the MMU lock. the operation itself is already protected by the mmu lock + */ + if (do_prefetch) { + rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr, + phys_pg_pack->total_size); + if (rc) + goto map_err; + } ret_vaddr += phys_pg_pack->offset; @@ -1272,6 +1283,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; map_err: + mutex_unlock(&ctx->mmu_lock); + if (add_va_block(hdev, va_range, ret_vaddr, ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, @@ -1509,7 +1522,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_ops = &hw_block_vm_ops; vma->vm_private_data = lnode; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { @@ -1819,7 +1832,7 @@ static int export_dmabuf_common(struct hl_ctx *ctx, } hl_dmabuf->ctx = ctx; - hl_ctx_get(hdev, hl_dmabuf->ctx); + hl_ctx_get(hl_dmabuf->ctx); *dmabuf_fd = fd; @@ -2076,164 +2089,34 @@ out: return rc; } -static void ts_buff_release(struct kref *ref) -{ - struct hl_ts_buff *buff; - - buff = container_of(ref, struct hl_ts_buff, refcount); - - vfree(buff->kernel_buff_address); - vfree(buff->user_buff_address); - kfree(buff); -} - -struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, - u32 handle) -{ - struct hl_ts_buff *buff; - - spin_lock(&mgr->ts_lock); - buff = idr_find(&mgr->ts_handles, handle); - if (!buff) { - spin_unlock(&mgr->ts_lock); - dev_warn(hdev->dev, - "TS buff get failed, no match to handle 0x%x\n", handle); - return NULL; - } - kref_get(&buff->refcount); - spin_unlock(&mgr->ts_lock); - - return buff; -} - -void hl_ts_put(struct hl_ts_buff *buff) +static void ts_buff_release(struct hl_mmap_mem_buf *buf) { - kref_put(&buff->refcount, ts_buff_release); -} - -static void buff_vm_close(struct vm_area_struct *vma) -{ - struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data; - long new_mmap_size; - - new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start); + struct hl_ts_buff *ts_buff = buf->private; - if (new_mmap_size > 0) { - buff->mmap_size = new_mmap_size; - return; - } - - atomic_set(&buff->mmap, 0); - hl_ts_put(buff); - vma->vm_private_data = NULL; + vfree(ts_buff->kernel_buff_address); + vfree(ts_buff->user_buff_address); + kfree(ts_buff); } -static const struct vm_operations_struct ts_buff_vm_ops = { - .close = buff_vm_close -}; - -int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) +static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args) { - struct hl_device *hdev = hpriv->hdev; - struct hl_ts_buff *buff; - u32 handle, user_buff_size; - int rc; - - /* We use the page offset to hold the idr and thus we need to clear - * it before doing the mmap itself - */ - handle = vma->vm_pgoff; - vma->vm_pgoff = 0; - - buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle); - if (!buff) { - dev_err(hdev->dev, - "TS buff mmap failed, no match to handle 0x%x\n", handle); - return -EINVAL; - } - - /* Validation check */ - user_buff_size = vma->vm_end - vma->vm_start; - if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) { - dev_err(hdev->dev, - "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n", - user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE)); - rc = -EINVAL; - goto put_buff; - } - -#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK - if (!access_ok(VERIFY_WRITE, - (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) { -#else - if (!access_ok((void __user *) (uintptr_t) vma->vm_start, - user_buff_size)) { -#endif - dev_err(hdev->dev, - "user pointer is invalid - 0x%lx\n", - vma->vm_start); - - rc = -EINVAL; - goto put_buff; - } + struct hl_ts_buff *ts_buff = buf->private; - if (atomic_cmpxchg(&buff->mmap, 0, 1)) { - dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n"); - rc = -EINVAL; - goto put_buff; - } - - vma->vm_ops = &ts_buff_vm_ops; - vma->vm_private_data = buff; vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = remap_vmalloc_range(vma, buff->user_buff_address, 0); - if (rc) { - atomic_set(&buff->mmap, 0); - goto put_buff; - } - - buff->mmap_size = buff->user_buff_size; - vma->vm_pgoff = handle; - - return 0; - -put_buff: - hl_ts_put(buff); - return rc; -} - -void hl_ts_mgr_init(struct hl_ts_mgr *mgr) -{ - spin_lock_init(&mgr->ts_lock); - idr_init(&mgr->ts_handles); + return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0); } -void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr) -{ - struct hl_ts_buff *buff; - struct idr *idp; - u32 id; - - idp = &mgr->ts_handles; - - idr_for_each_entry(idp, buff, id) { - if (kref_put(&buff->refcount, ts_buff_release) != 1) - dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n", - id); - } - - idr_destroy(&mgr->ts_handles); -} - -static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements) +static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) { struct hl_ts_buff *ts_buff = NULL; - u32 size; + u32 size, num_elements; void *p; + num_elements = *(u32 *)args; + ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL); if (!ts_buff) - return NULL; + return -ENOMEM; /* Allocate the user buffer */ size = num_elements * sizeof(u64); @@ -2242,7 +2125,7 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme goto free_mem; ts_buff->user_buff_address = p; - ts_buff->user_buff_size = size; + buf->mappable_size = size; /* Allocate the internal kernel buffer */ size = num_elements * sizeof(struct hl_user_pending_interrupt); @@ -2253,15 +2136,25 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme ts_buff->kernel_buff_address = p; ts_buff->kernel_buff_size = size; - return ts_buff; + buf->private = ts_buff; + + return 0; free_user_buff: vfree(ts_buff->user_buff_address); free_mem: kfree(ts_buff); - return NULL; + return -ENOMEM; } +static struct hl_mmap_mem_buf_behavior hl_ts_behavior = { + .topic = "TS", + .mem_id = HL_MMAP_TYPE_TS_BUFF, + .mmap = hl_ts_mmap, + .alloc = hl_ts_alloc_buf, + .release = ts_buff_release, +}; + /** * allocate_timestamps_buffers() - allocate timestamps buffers * This function will allocate ts buffer that will later on be mapped to the user @@ -2278,54 +2171,22 @@ free_mem: */ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle) { - struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr; - struct hl_device *hdev = hpriv->hdev; - struct hl_ts_buff *ts_buff; - int rc = 0; + struct hl_mem_mgr *mmg = &hpriv->mem_mgr; + struct hl_mmap_mem_buf *buf; if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { - dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", + dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", args->num_of_elements, TS_MAX_ELEMENTS_NUM); return -EINVAL; } - /* Allocate ts buffer object - * This object will contain two buffers one that will be mapped to the user - * and another internal buffer for the driver use only, which won't be mapped - * to the user. - */ - ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements); - if (!ts_buff) { - rc = -ENOMEM; - goto out_err; - } - - spin_lock(&ts_mgr->ts_lock); - rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC); - spin_unlock(&ts_mgr->ts_lock); - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n"); - goto release_ts_buff; - } - - ts_buff->id = rc; - ts_buff->hdev = hdev; - - kref_init(&ts_buff->refcount); - - /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */ - *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF; - *handle <<= PAGE_SHIFT; + buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements); + if (!buf) + return -ENOMEM; - dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id); + *handle = buf->handle; return 0; - -release_ts_buff: - kref_put(&ts_buff->refcount, ts_buff_release); -out_err: - *handle = 0; - return rc; } int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) @@ -2587,9 +2448,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); + hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c new file mode 100644 index 000000000000..ea5f2bd31b0a --- /dev/null +++ b/drivers/misc/habanalabs/common/memory_mgr.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2022 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +/** + * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to + * the buffer descriptor. + * + * @mmg: parent unifed memory manager + * @handle: requested buffer handle + * + * Find the buffer in the store and return a pointer to its descriptor. + * Increase buffer refcount. If not found - return NULL. + */ +struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, u64 handle) +{ + struct hl_mmap_mem_buf *buf; + + spin_lock(&mmg->lock); + buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT)); + if (!buf) { + spin_unlock(&mmg->lock); + dev_warn(mmg->dev, + "Buff get failed, no match to handle %#llx\n", handle); + return NULL; + } + kref_get(&buf->refcount); + spin_unlock(&mmg->lock); + return buf; +} + +/** + * hl_mmap_mem_buf_destroy - destroy the unused buffer + * + * @buf: memory manager buffer descriptor + * + * Internal function, used as a final step of buffer release. Shall be invoked + * only when the buffer is no longer in use (removed from idr). Will call the + * release callback (if applicable), and free the memory. + */ +static void hl_mmap_mem_buf_destroy(struct hl_mmap_mem_buf *buf) +{ + if (buf->behavior->release) + buf->behavior->release(buf); + + kfree(buf); +} + +/** + * hl_mmap_mem_buf_release - release buffer + * + * @kref: kref that reached 0. + * + * Internal function, used as a kref release callback, when the last user of + * the buffer is released. Shall be called from an interrupt context. + */ +static void hl_mmap_mem_buf_release(struct kref *kref) +{ + struct hl_mmap_mem_buf *buf = + container_of(kref, struct hl_mmap_mem_buf, refcount); + + spin_lock(&buf->mmg->lock); + idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); + spin_unlock(&buf->mmg->lock); + + hl_mmap_mem_buf_destroy(buf); +} + +/** + * hl_mmap_mem_buf_remove_idr_locked - remove handle from idr + * + * @kref: kref that reached 0. + * + * Internal function, used for kref put by handle. Assumes mmg lock is taken. + * Will remove the buffer from idr, without destroying it. + */ +static void hl_mmap_mem_buf_remove_idr_locked(struct kref *kref) +{ + struct hl_mmap_mem_buf *buf = + container_of(kref, struct hl_mmap_mem_buf, refcount); + + idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); +} + +/** + * hl_mmap_mem_buf_put - decrease the reference to the buffer + * + * @buf: memory manager buffer descriptor + * + * Decrease the reference to the buffer, and release it if it was the last one. + * Shall be called from an interrupt context. + */ +int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf) +{ + return kref_put(&buf->refcount, hl_mmap_mem_buf_release); +} + +/** + * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the + * given handle. + * + * @mmg: parent unifed memory manager + * @handle: requested buffer handle + * + * Decrease the reference to the buffer, and release it if it was the last one. + * Shall not be called from an interrupt context. Return -EINVAL if handle was + * not found, else return the put outcome (0 or 1). + */ +int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle) +{ + struct hl_mmap_mem_buf *buf; + + spin_lock(&mmg->lock); + buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT)); + if (!buf) { + spin_unlock(&mmg->lock); + dev_dbg(mmg->dev, + "Buff put failed, no match to handle %#llx\n", handle); + return -EINVAL; + } + + if (kref_put(&buf->refcount, hl_mmap_mem_buf_remove_idr_locked)) { + spin_unlock(&mmg->lock); + hl_mmap_mem_buf_destroy(buf); + return 1; + } + + spin_unlock(&mmg->lock); + return 0; +} + +/** + * @hl_mmap_mem_buf_alloc - allocate a new mappable buffer + * + * @mmg: parent unifed memory manager + * @behavior: behavior object describing this buffer polymorphic behavior + * @gfp: gfp flags to use for the memory allocations + * @args: additional args passed to behavior->alloc + * + * Allocate and register a new memory buffer inside the give memory manager. + * Return the pointer to the new buffer on success or NULL on failure. + */ +struct hl_mmap_mem_buf * +hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, + struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, + void *args) +{ + struct hl_mmap_mem_buf *buf; + int rc; + + buf = kzalloc(sizeof(*buf), gfp); + if (!buf) + return NULL; + + spin_lock(&mmg->lock); + rc = idr_alloc(&mmg->handles, buf, 1, 0, GFP_ATOMIC); + spin_unlock(&mmg->lock); + if (rc < 0) { + dev_err(mmg->dev, + "%s: Failed to allocate IDR for a new buffer, rc=%d\n", + behavior->topic, rc); + goto free_buf; + } + + buf->mmg = mmg; + buf->behavior = behavior; + buf->handle = (((u64)rc | buf->behavior->mem_id) << PAGE_SHIFT); + kref_init(&buf->refcount); + + rc = buf->behavior->alloc(buf, gfp, args); + if (rc) { + dev_err(mmg->dev, "%s: Failure in buffer alloc callback %d\n", + behavior->topic, rc); + goto remove_idr; + } + + return buf; + +remove_idr: + spin_lock(&mmg->lock); + idr_remove(&mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); + spin_unlock(&mmg->lock); +free_buf: + kfree(buf); + return NULL; +} + +/** + * hl_mmap_mem_buf_vm_close - handle mmap close + * + * @vma: the vma object for which mmap was closed. + * + * Put the memory buffer if it is no longer mapped. + */ +static void hl_mmap_mem_buf_vm_close(struct vm_area_struct *vma) +{ + struct hl_mmap_mem_buf *buf = + (struct hl_mmap_mem_buf *)vma->vm_private_data; + long new_mmap_size; + + new_mmap_size = buf->real_mapped_size - (vma->vm_end - vma->vm_start); + + if (new_mmap_size > 0) { + buf->real_mapped_size = new_mmap_size; + return; + } + + atomic_set(&buf->mmap, 0); + hl_mmap_mem_buf_put(buf); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = { + .close = hl_mmap_mem_buf_vm_close +}; + +/** + * hl_mem_mgr_mmap - map the given buffer to the user + * + * @mmg: unifed memory manager + * @vma: the vma object for which mmap was closed. + * @args: additional args passed to behavior->mmap + * + * Map the buffer specified by the vma->vm_pgoff to the given vma. + */ +int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, + void *args) +{ + struct hl_mmap_mem_buf *buf; + u64 user_mem_size; + u64 handle; + int rc; + + /* We use the page offset to hold the idr and thus we need to clear + * it before doing the mmap itself + */ + handle = vma->vm_pgoff << PAGE_SHIFT; + vma->vm_pgoff = 0; + + /* Reference was taken here */ + buf = hl_mmap_mem_buf_get(mmg, handle); + if (!buf) { + dev_err(mmg->dev, + "Memory mmap failed, no match to handle %#llx\n", handle); + return -EINVAL; + } + + /* Validation check */ + user_mem_size = vma->vm_end - vma->vm_start; + if (user_mem_size != ALIGN(buf->mappable_size, PAGE_SIZE)) { + dev_err(mmg->dev, + "%s: Memory mmap failed, mmap VM size 0x%llx != 0x%llx allocated physical mem size\n", + buf->behavior->topic, user_mem_size, buf->mappable_size); + rc = -EINVAL; + goto put_mem; + } + +#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK + if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start, + user_mem_size)) { +#else + if (!access_ok((void __user *)(uintptr_t)vma->vm_start, + user_mem_size)) { +#endif + dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n", + buf->behavior->topic, vma->vm_start); + + rc = -EINVAL; + goto put_mem; + } + + if (atomic_cmpxchg(&buf->mmap, 0, 1)) { + dev_err(mmg->dev, + "%s, Memory mmap failed, already mmaped to user\n", + buf->behavior->topic); + rc = -EINVAL; + goto put_mem; + } + + vma->vm_ops = &hl_mmap_mem_buf_vm_ops; + + /* Note: We're transferring the memory reference to vma->vm_private_data here. */ + + vma->vm_private_data = buf; + + rc = buf->behavior->mmap(buf, vma, args); + if (rc) { + atomic_set(&buf->mmap, 0); + goto put_mem; + } + + buf->real_mapped_size = buf->mappable_size; + vma->vm_pgoff = handle >> PAGE_SHIFT; + + return 0; + +put_mem: + hl_mmap_mem_buf_put(buf); + return rc; +} + +/** + * hl_mem_mgr_init - initialize unified memory manager + * + * @dev: owner device pointer + * @mmg: structure to initialize + * + * Initialize an instance of unified memory manager + */ +void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) +{ + mmg->dev = dev; + spin_lock_init(&mmg->lock); + idr_init(&mmg->handles); +} + +/** + * hl_mem_mgr_fini - release unified memory manager + * + * @mmg: parent unifed memory manager + * + * Release the unified memory manager. Shall be called from an interrupt context. + */ +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg) +{ + struct hl_mmap_mem_buf *buf; + struct idr *idp; + const char *topic; + u32 id; + + idp = &mmg->handles; + + idr_for_each_entry(idp, buf, id) { + topic = buf->behavior->topic; + if (hl_mmap_mem_buf_put(buf) != 1) + dev_err(mmg->dev, + "%s: Buff handle %u for CTX is still alive\n", + topic, id); + } + + /* TODO: can it happen that some buffer is still in use at this point? */ + + idr_destroy(&mmg->handles); +} diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 810b73421ce1..f3734718d94f 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -9,6 +9,20 @@ #include "../habanalabs.h" +/** + * hl_mmu_get_funcs() - get MMU functions structure + * @hdev: habanalabs device structure. + * @pgt_residency: page table residency. + * @is_dram_addr: true if we need HMMU functions + * + * @return appropriate MMU functions structure + */ +static struct hl_mmu_funcs *hl_mmu_get_funcs(struct hl_device *hdev, int pgt_residency, + bool is_dram_addr) +{ + return &hdev->mmu_func[pgt_residency]; +} + bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -122,6 +136,53 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) } /* + * hl_mmu_get_real_page_size - get real page size to use in map/unmap operation + * + * @hdev: pointer to device data. + * @mmu_prop: MMU properties. + * @page_size: page size + * @real_page_size: set here the actual page size to use for the operation + * @is_dram_addr: true if DRAM address, otherwise false. + * + * @return 0 on success, otherwise non 0 error code + * + * note that this is general implementation that can fit most MMU arch. but as this is used as an + * MMU function: + * 1. it shall not be called directly- only from mmu_func structure instance + * 2. each MMU may modify the implementation internally + */ +int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr) +{ + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + *real_page_size = mmu_prop->page_size; + return 0; + } + + dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; +} + +static struct hl_mmu_properties *hl_mmu_get_prop(struct hl_device *hdev, u32 page_size, + bool is_dram_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (is_dram_addr) + return &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + return &prop->pmmu_huge; + + return &prop->pmmu; +} + +/* * hl_mmu_unmap_page - unmaps a virtual addr * * @ctx: pointer to the context structure @@ -142,60 +203,35 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) * For optimization reasons PCI flush may be requested once after unmapping of * large area. */ -int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, - bool flush_pte) +int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte) { struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 real_virt_addr; + struct hl_mmu_funcs *mmu_funcs; + int i, pgt_residency, rc = 0; u32 real_page_size, npages; - int i, rc = 0, pgt_residency; + u64 real_virt_addr; bool is_dram_addr; if (!hdev->mmu_enable) return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; + mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and unmap them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else { - /* - * MMU page size may differ from DRAM page size. - * In such case work with the DRAM page size and let the MMU - * scrambling routine to handle this mismatch when - * calculating the address to remove from the MMU page table - */ - if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) { - real_page_size = prop->dram_page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", - page_size, mmu_prop->page_size >> 10); + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - return -EFAULT; - } - } + rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, + is_dram_addr); + if (rc) + return rc; npages = page_size / real_page_size; real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func[pgt_residency].unmap(ctx, - real_virt_addr, is_dram_addr); + rc = mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr); if (rc) break; @@ -203,7 +239,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return rc; } @@ -230,15 +266,15 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, * For optimization reasons PCI flush may be requested once after mapping of * large area. */ -int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size, bool flush_pte) +int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, + bool flush_pte) { + int i, rc, pgt_residency, mapped_cnt = 0; struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; + struct hl_mmu_funcs *mmu_funcs; u32 real_page_size, npages; - int i, rc, pgt_residency, mapped_cnt = 0; bool is_dram_addr; @@ -246,40 +282,15 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; + mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and map them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) && - (prop->dram_page_size < mmu_prop->page_size)) { - /* - * MMU page size may differ from DRAM page size. - * In such case work with the DRAM page size and let the MMU - * scrambling routine handle this mismatch when calculating - * the address to place in the MMU page table. (in that case - * also make sure that the dram_page_size smaller than the - * mmu page size) - */ - real_page_size = prop->dram_page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't map\n", - page_size, mmu_prop->page_size >> 10); - - return -EFAULT; - } + rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, + is_dram_addr); + if (rc) + return rc; /* * Verify that the phys and virt addresses are aligned with the @@ -302,9 +313,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, real_phys_addr = phys_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func[pgt_residency].map(ctx, - real_virt_addr, real_phys_addr, - real_page_size, is_dram_addr); + rc = mmu_funcs->map(ctx, real_virt_addr, real_phys_addr, real_page_size, + is_dram_addr); if (rc) goto err; @@ -314,22 +324,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } if (flush_pte) - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return 0; err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (hdev->mmu_func[pgt_residency].unmap(ctx, - real_virt_addr, is_dram_addr)) + if (mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); real_virt_addr += real_page_size; } - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return rc; } @@ -480,11 +489,9 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops, u64 *phys_addr) { - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr; - u32 hop0_shift_off; - void *p; + struct hl_mmu_properties *mmu_prop; /* last hop holds the phys address and flags */ if (hops->unscrambled_paddr) @@ -493,11 +500,11 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val; if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE) - p = &prop->pmmu_huge; + mmu_prop = &prop->pmmu_huge; else if (hops->range_type == HL_VA_RANGE_TYPE_HOST) - p = &prop->pmmu; + mmu_prop = &prop->pmmu; else /* HL_VA_RANGE_TYPE_DRAM */ - p = &prop->dmmu; + mmu_prop = &prop->dmmu; if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && !is_power_of_2(prop->dram_page_size)) { @@ -508,7 +515,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, /* * Bit arithmetics cannot be used for non power of two page * sizes. In addition, since bit arithmetics is not used, - * we cannot ignore dram base. All that shall be considerd. + * we cannot ignore dram base. All that shall be considered. */ dram_page_size = prop->dram_page_size; @@ -526,10 +533,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, * structure in order to determine the right masks * for the page offset. */ - hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); - p = (char *)p + hop0_shift_off; - p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); - hop_shift = *(u64 *)p; + hop_shift = mmu_prop->hop_shifts[hops->used_hops - 1]; offset_mask = (1ull << hop_shift) - 1; addr_mask = ~(offset_mask); *phys_addr = (tmp_phys_addr & addr_mask) | @@ -557,40 +561,39 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) { struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; + struct asic_fixed_properties *prop; struct hl_mmu_properties *mmu_prop; - int rc; + struct hl_mmu_funcs *mmu_funcs; + int pgt_residency, rc; bool is_dram_addr; if (!hdev->mmu_enable) return -EOPNOTSUPP; + prop = &hdev->asic_prop; hops->scrambled_vaddr = virt_addr; /* assume no scrambling */ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); + prop->dmmu.start_addr, + prop->dmmu.end_addr); - /* host-residency is the same in PMMU and HPMMU, use one of them */ + /* host-residency is the same in PMMU and PMMU huge, no need to distinguish here */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); mutex_lock(&ctx->mmu_lock); - - if (mmu_prop->host_resident) - rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx, - virt_addr, hops); - else - rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx, - virt_addr, hops); - + rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); mutex_unlock(&ctx->mmu_lock); + if (rc) + return rc; + /* add page offset to physical address */ if (hops->unscrambled_paddr) - hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, - &hops->unscrambled_paddr); + hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, &hops->unscrambled_paddr); - return rc; + return 0; } int hl_mmu_if_set_funcs(struct hl_device *hdev) @@ -662,6 +665,55 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, return rc; } +static void hl_mmu_prefetch_work_function(struct work_struct *work) +{ + struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); + struct hl_ctx *ctx = pfw->ctx; + + if (!hl_device_operational(ctx->hdev, NULL)) + goto put_ctx; + + mutex_lock(&ctx->mmu_lock); + + ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, + pfw->va, pfw->size); + + mutex_unlock(&ctx->mmu_lock); + +put_ctx: + /* + * context was taken in the common mmu prefetch function- see comment there about + * context handling. + */ + hl_ctx_put(ctx); + kfree(pfw); +} + +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size) +{ + struct hl_prefetch_work *handle_pf_work; + + handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL); + if (!handle_pf_work) + return -ENOMEM; + + INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function); + handle_pf_work->ctx = ctx; + handle_pf_work->va = va; + handle_pf_work->size = size; + handle_pf_work->flags = flags; + handle_pf_work->asid = asid; + + /* + * as actual prefetch is done in a WQ we must get the context (and put it + * at the end of the work function) + */ + hl_ctx_get(ctx); + queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work); + + return 0; +} + u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX; @@ -670,6 +722,7 @@ u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) /** * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP * @ctx: pointer to the context structure to initialize. + * @mmu_prop: MMU properties. * @hop_idx: HOP index. * @hop_addr: HOP address. * @virt_addr: virtual address fro the translation. @@ -686,33 +739,8 @@ u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *m return U64_MAX; } - /* currently max number of HOPs is 6 */ - switch (hop_idx) { - case 0: - mask = mmu_prop->hop0_mask; - shift = mmu_prop->hop0_shift; - break; - case 1: - mask = mmu_prop->hop1_mask; - shift = mmu_prop->hop1_shift; - break; - case 2: - mask = mmu_prop->hop2_mask; - shift = mmu_prop->hop2_shift; - break; - case 3: - mask = mmu_prop->hop3_mask; - shift = mmu_prop->hop3_shift; - break; - case 4: - mask = mmu_prop->hop4_mask; - shift = mmu_prop->hop4_shift; - break; - default: - mask = mmu_prop->hop5_mask; - shift = mmu_prop->hop5_shift; - break; - } + shift = mmu_prop->hop_shifts[hop_idx]; + mask = mmu_prop->hop_masks[hop_idx]; return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index d03786d0c407..e2d91a69acc2 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -10,6 +10,8 @@ #include <linux/slab.h> +#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) + static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) @@ -170,51 +172,15 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) return num_of_ptes_left; } -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, - mmu_prop->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, - mmu_prop->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) +static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, - mmu_prop->hop2_shift); -} + u64 mask, shift; -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, - mmu_prop->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, - mmu_prop->hop4_shift); + mask = mmu_prop->hop_masks[hop_idx]; + shift = mmu_prop->hop_shifts[hop_idx]; + return hop_addr_arr[hop_idx] + + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, @@ -516,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) } } -static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, +static int hl_mmu_v1_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte; bool is_huge, clear_hop3 = true; + int hop_idx; /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - - hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; + for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto not_mapped; + } - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; + } is_huge = curr_pte & mmu_prop->last_mask; if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, - "DRAM unmapping should use huge pages only\n"); + dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); return -EFAULT; } if (!is_huge) { - hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop4_addr == ULLONG_MAX) + hop_idx = MMU_HOP4; + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; clear_hop3 = false; } @@ -605,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, goto not_mapped; } - write_final_pte(ctx, hop3_pte_addr, default_pte); - put_pte(ctx, hop3_addr); + hop_idx = MMU_HOP3; + write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); + put_pte(ctx, hop_addr[hop_idx]); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; - if (hop4_addr) - clear_pte(ctx, hop4_pte_addr); + if (hop_addr[MMU_HOP4]) + clear_pte(ctx, hop_pte_addr[MMU_HOP4]); else - clear_pte(ctx, hop3_pte_addr); + clear_pte(ctx, hop_pte_addr[MMU_HOP3]); - if (hop4_addr && !put_pte(ctx, hop4_addr)) + if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) clear_hop3 = true; if (!clear_hop3) goto mapped; - clear_pte(ctx, hop3_pte_addr); + for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { + clear_pte(ctx, hop_pte_addr[hop_idx]); - if (put_pte(ctx, hop3_addr)) - goto mapped; + if (hop_idx == MMU_HOP0) + break; - clear_pte(ctx, hop2_pte_addr); - - if (put_pte(ctx, hop2_addr)) - goto mapped; - - clear_pte(ctx, hop1_pte_addr); - - if (put_pte(ctx, hop1_addr)) - goto mapped; - - clear_pte(ctx, hop0_pte_addr); + if (put_pte(ctx, hop_addr[hop_idx])) + goto mapped; + } } mapped: @@ -650,21 +586,15 @@ not_mapped: return -EINVAL; } -static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, +static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte = 0; - bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge; - int rc = -ENOMEM; + bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false}; + int num_hops, hop_idx, prev_hop, rc = -ENOMEM; /* * This mapping function can map a page or a huge page. For huge page @@ -684,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, is_huge = false; } - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); - if (hop1_addr == ULLONG_MAX) - goto err; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); - if (hop2_addr == ULLONG_MAX) - goto err; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS; - hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); - if (hop3_addr == ULLONG_MAX) - goto err; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - - if (!is_huge) { - hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); - if (hop4_addr == ULLONG_MAX) - goto err; + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = + get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto err; + } - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; } if (hdev->dram_default_page_mapping && is_dram_addr) { @@ -732,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - if (hop1_new || hop2_new || hop3_new || hop4_new) { - dev_err(hdev->dev, - "DRAM mapping should not allocate more hops\n"); - rc = -EFAULT; - goto err; + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + if (hop_new[hop_idx]) { + dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } } } else if (curr_pte & PAGE_PRESENT_MASK) { dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n", virt_addr); - dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); - dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); - dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); - dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); - - if (!is_huge) - dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop4_pte_addr, - hop4_pte_addr); + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) + dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx, + *(u64 *) (uintptr_t) hop_pte_addr[hop_idx], + hop_pte_addr[hop_idx]); rc = -EINVAL; goto err; @@ -764,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; - if (is_huge) - write_final_pte(ctx, hop3_pte_addr, curr_pte); - else - write_final_pte(ctx, hop4_pte_addr, curr_pte); + write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); - if (hop1_new) { - curr_pte = - (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_pte_addr, curr_pte); - } - if (hop2_new) { - curr_pte = - (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_pte_addr, curr_pte); - get_pte(ctx, hop1_addr); - } - if (hop3_new) { - curr_pte = - (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, curr_pte); - get_pte(ctx, hop2_addr); - } + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + prev_hop = hop_idx - 1; - if (!is_huge) { - if (hop4_new) { - curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - write_pte(ctx, hop3_pte_addr, curr_pte); - get_pte(ctx, hop3_addr); + if (hop_new[hop_idx]) { + curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); + if (hop_idx != MMU_HOP1) + get_pte(ctx, hop_addr[prev_hop]); } - - get_pte(ctx, hop4_addr); - } else { - get_pte(ctx, hop3_addr); } + get_pte(ctx, hop_addr[num_hops - 1]); + return 0; err: - if (hop4_new) - free_hop(ctx, hop4_addr); - if (hop3_new) - free_hop(ctx, hop3_addr); - if (hop2_new) - free_hop(ctx, hop2_addr); - if (hop1_new) - free_hop(ctx, hop1_addr); + for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { + if (hop_new[hop_idx]) + free_hop(ctx, hop_addr[hop_idx]); + } return rc; } @@ -928,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_fini = hl_mmu_v1_ctx_fini; - mmu->map = _hl_mmu_v1_map; - mmu->unmap = _hl_mmu_v1_unmap; + mmu->map = hl_mmu_v1_map; + mmu->unmap = hl_mmu_v1_unmap; mmu->flush = flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index bb9ce22bafc4..610acd4a8057 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -392,6 +392,7 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr) */ int hl_pci_init(struct hl_device *hdev) { + struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; int rc; @@ -419,17 +420,14 @@ int hl_pci_init(struct hl_device *hdev) } /* Driver must sleep in order for FW to finish the iATU configuration */ - if (hdev->asic_prop.iatu_done_by_fw) { + if (hdev->asic_prop.iatu_done_by_fw) usleep_range(2000, 3000); - hdev->asic_funcs->set_dma_mask_from_fw(hdev); - } - rc = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(hdev->dma_mask)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(prop->dma_mask)); if (rc) { dev_err(hdev->dev, "Failed to set dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); + prop->dma_mask, rc); goto unmap_pci_bars; } |