diff options
Diffstat (limited to 'drivers/accel')
45 files changed, 1637 insertions, 500 deletions
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO index 5119bccd1917..ad8ac6e315b6 100644 --- a/drivers/accel/amdxdna/TODO +++ b/drivers/accel/amdxdna/TODO @@ -1,3 +1,2 @@ -- Add import and export BO support - Add debugfs support - Add debug BO support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 00d215ac866e..2cff5419bd2f 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -361,7 +361,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) aie2_hwctx_restart(xdna, hwctx); mutex_unlock(&xdna->dev_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops sched_ops = { @@ -566,7 +566,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) .size = MAX_CHAIN_CMDBUF_SIZE, }; - abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true); + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp); if (IS_ERR(abo)) { ret = PTR_ERR(abo); goto free_cmd_bufs; @@ -758,27 +758,42 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu static int aie2_populate_range(struct amdxdna_gem_obj *abo) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - struct mm_struct *mm = abo->mem.notifier.mm; - struct hmm_range range = { 0 }; + struct amdxdna_umap *mapp; unsigned long timeout; + struct mm_struct *mm; + bool found; int ret; - XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", - abo->mem.userptr, abo->mem.size); - range.notifier = &abo->mem.notifier; - range.start = abo->mem.userptr; - range.end = abo->mem.userptr + abo->mem.size; - range.hmm_pfns = abo->mem.pfns; - range.default_flags = HMM_PFN_REQ_FAULT; + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); +again: + found = false; + down_write(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (mapp->invalid) { + found = true; + break; + } + } - if (!mmget_not_zero(mm)) + if (!found) { + abo->mem.map_invalid = false; + up_write(&xdna->notifier_lock); + return 0; + } + kref_get(&mapp->refcnt); + up_write(&xdna->notifier_lock); + + XDNA_DBG(xdna, "populate memory range %lx %lx", + mapp->vma->vm_start, mapp->vma->vm_end); + mm = mapp->notifier.mm; + if (!mmget_not_zero(mm)) { + amdxdna_umap_put(mapp); return -EFAULT; + } - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); -again: - range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); + mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); mmap_read_lock(mm); - ret = hmm_range_fault(&range); + ret = hmm_range_fault(&mapp->range); mmap_read_unlock(mm); if (ret) { if (time_after(jiffies, timeout)) { @@ -786,21 +801,27 @@ again: goto put_mm; } - if (ret == -EBUSY) + if (ret == -EBUSY) { + amdxdna_umap_put(mapp); goto again; + } goto put_mm; } - down_read(&xdna->notifier_lock); - if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { - up_read(&xdna->notifier_lock); + down_write(&xdna->notifier_lock); + if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); goto again; } - abo->mem.map_invalid = false; - up_read(&xdna->notifier_lock); + mapp->invalid = false; + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); + goto again; put_mm: + amdxdna_umap_put(mapp); mmput(mm); return ret; } @@ -827,7 +848,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto up_sem; } - ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx, + hwctx->client->filp->client_id); if (ret) { XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); goto free_chain; @@ -908,10 +930,6 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, struct drm_gem_object *gobj = to_gobj(abo); long ret; - down_write(&xdna->notifier_lock); - abo->mem.map_invalid = true; - mmu_interval_set_seq(&abo->mem.notifier, cur_seq); - up_write(&xdna->notifier_lock); ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); if (!ret || ret == -ERESTARTSYS) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index bf4219e32cc1..82412eec9a4b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, if (!payload) return -EINVAL; - if (!slot_cf_has_space(offset, payload_len)) + if (!slot_has_space(*buf, offset, payload_len)) return -ENOSPC; buf->cu_idx = cu_idx; @@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - if (!slot_dpu_has_space(offset, arg_sz)) + if (!slot_has_space(*buf, offset, arg_sz)) return -ENOSPC; buf->inst_buf_addr = sn->buffer; @@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, memcpy(buf->args, sn->prop_args, arg_sz); /* Accurate buf size to hint firmware to do necessary copy */ - *size += sizeof(*buf) + arg_sz; + *size = sizeof(*buf) + arg_sz; return 0; } diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 4e02e744b470..6df9065b13f6 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -319,18 +319,16 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_cf_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +#define slot_has_space(slot, offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ + sizeof(typeof(slot))) + struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; __u32 arg_cnt; __u32 args[] __counted_by(arg_cnt); }; -#define slot_dpu_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_dpu, args[0])) struct cmd_chain_slot_dpu { __u64 inst_buf_addr; __u32 inst_size; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 5a058e565b01..c6cf7068d23c 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -512,12 +512,6 @@ static int aie2_init(struct amdxdna_dev *xdna) goto release_fw; } - ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); - if (ret) { - XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret); - goto free_irq; - } - psp_conf.fw_size = fw->size; psp_conf.fw_buf = fw->data; for (i = 0; i < PSP_MAX_REGS; i++) @@ -526,14 +520,14 @@ static int aie2_init(struct amdxdna_dev *xdna) if (!ndev->psp_hdl) { XDNA_ERR(xdna, "failed to create psp"); ret = -ENOMEM; - goto disable_sva; + goto free_irq; } xdna->dev_handle = ndev; ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "start npu failed, ret %d", ret); - goto disable_sva; + goto free_irq; } ret = aie2_mgmt_fw_query(ndev); @@ -584,8 +578,6 @@ async_event_free: aie2_error_async_events_free(ndev); stop_hw: aie2_hw_stop(xdna); -disable_sva: - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); free_irq: pci_free_irq_vectors(pdev); release_fw: @@ -601,7 +593,6 @@ static void aie2_fini(struct amdxdna_dev *xdna) aie2_hw_stop(xdna); aie2_error_async_events_free(ndev); - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); pci_free_irq_vectors(pdev); } diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6..f28a060a8810 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 43442b9e273b..be073224bd69 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -496,11 +496,11 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, struct amdxdna_drm_exec_cmd *args) { struct amdxdna_dev *xdna = client->xdna; - u32 *arg_bo_hdls; + u32 *arg_bo_hdls = NULL; u32 cmd_bo_hdl; int ret; - if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { + if (args->arg_count > MAX_ARG_COUNT) { XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); return -EINVAL; } @@ -512,14 +512,16 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, } cmd_bo_hdl = (u32)args->cmd_handles; - arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); - if (!arg_bo_hdls) - return -ENOMEM; - ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), - args->arg_count * sizeof(u32)); - if (ret) { - ret = -EFAULT; - goto free_cmd_bo_hdls; + if (args->arg_count) { + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); + if (!arg_bo_hdls) + return -ENOMEM; + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), + args->arg_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_cmd_bo_hdls; + } } ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 606433d73236..0f85a0105178 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -9,7 +9,10 @@ #include <drm/drm_gem.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/gpu_scheduler.h> +#include <linux/dma-buf.h> +#include <linux/dma-direct.h> #include <linux/iosys-map.h> +#include <linux/pagemap.h> #include <linux/vmalloc.h> #include "amdxdna_ctx.h" @@ -18,94 +21,116 @@ #define XDNA_MAX_CMD_BO_SIZE SZ_32K +MODULE_IMPORT_NS("DMA_BUF"); + static int -amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) +amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo) { struct amdxdna_client *client = abo->client; struct amdxdna_dev *xdna = client->xdna; struct amdxdna_mem *mem = &abo->mem; + struct amdxdna_gem_obj *heap; u64 offset; u32 align; int ret; + mutex_lock(&client->mm_lock); + + heap = client->dev_heap; + if (!heap) { + ret = -EINVAL; + goto unlock_out; + } + + if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) { + XDNA_ERR(xdna, "Invalid dev heap userptr"); + ret = -EINVAL; + goto unlock_out; + } + + if (mem->size == 0 || mem->size > heap->mem.size) { + XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx", + mem->size, heap->mem.size); + ret = -EINVAL; + goto unlock_out; + } + align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift); - ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node, + ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node, mem->size, align, 0, DRM_MM_INSERT_BEST); if (ret) { XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); - return ret; + goto unlock_out; } mem->dev_addr = abo->mm_node.start; - offset = mem->dev_addr - abo->dev_heap->mem.dev_addr; - mem->userptr = abo->dev_heap->mem.userptr + offset; - mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT]; - mem->nr_pages = mem->size >> PAGE_SHIFT; - - if (use_vmap) { - mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL); - if (!mem->kva) { - XDNA_ERR(xdna, "Failed to vmap"); - drm_mm_remove_node(&abo->mm_node); - return -EFAULT; - } - } + offset = mem->dev_addr - heap->mem.dev_addr; + mem->userptr = heap->mem.userptr + offset; + mem->kva = heap->mem.kva + offset; - return 0; + drm_gem_object_get(to_gobj(heap)); + +unlock_out: + mutex_unlock(&client->mm_lock); + + return ret; } -static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) +static void +amdxdna_gem_destroy_obj(struct amdxdna_gem_obj *abo) { - struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); - struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); + mutex_destroy(&abo->lock); + kfree(abo); +} - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); - if (abo->pinned) - amdxdna_gem_unpin(abo); +static void +amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_gem_obj *heap; - if (abo->type == AMDXDNA_BO_DEV) { - mutex_lock(&abo->client->mm_lock); - drm_mm_remove_node(&abo->mm_node); - mutex_unlock(&abo->client->mm_lock); + mutex_lock(&abo->client->mm_lock); - vunmap(abo->mem.kva); - drm_gem_object_put(to_gobj(abo->dev_heap)); - drm_gem_object_release(gobj); - mutex_destroy(&abo->lock); - kfree(abo); - return; - } + drm_mm_remove_node(&abo->mm_node); - if (abo->type == AMDXDNA_BO_DEV_HEAP) - drm_mm_takedown(&abo->mm); + heap = abo->client->dev_heap; + drm_gem_object_put(to_gobj(heap)); - drm_gem_vunmap_unlocked(gobj, &map); - mutex_destroy(&abo->lock); - drm_gem_shmem_free(&abo->base); + mutex_unlock(&abo->client->mm_lock); } -static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { - .free = amdxdna_gem_obj_free, -}; - static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj, - mem.notifier); - struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier); + struct amdxdna_gem_obj *abo = mapp->abo; + struct amdxdna_dev *xdna; - XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d", - abo->mem.userptr, abo->mem.size, abo->type); + xdna = to_xdna_dev(to_gobj(abo)->dev); + XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d", + mapp->vma->vm_start, mapp->vma->vm_end, abo->type); if (!mmu_notifier_range_blockable(range)) return false; + down_write(&xdna->notifier_lock); + abo->mem.map_invalid = true; + mapp->invalid = true; + mmu_interval_set_seq(&mapp->notifier, cur_seq); + up_write(&xdna->notifier_lock); + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + if (range->event == MMU_NOTIFY_UNMAP) { + down_write(&xdna->notifier_lock); + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + up_write(&xdna->notifier_lock); + } + return true; } @@ -113,102 +138,311 @@ static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { .invalidate = amdxdna_hmm_invalidate, }; -static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo) +static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp; + + down_read(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (!vma || mapp->vma == vma) { + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + if (vma) + break; + } + } + up_read(&xdna->notifier_lock); +} - if (!xdna->dev_info->ops->hmm_invalidate) - return; +static void amdxdna_umap_release(struct kref *ref) +{ + struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt); + struct vm_area_struct *vma = mapp->vma; + struct amdxdna_dev *xdna; + + mmu_interval_notifier_remove(&mapp->notifier); + if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_clear_unevictable(vma->vm_file->f_mapping); + + xdna = to_xdna_dev(to_gobj(mapp->abo)->dev); + down_write(&xdna->notifier_lock); + list_del(&mapp->node); + up_write(&xdna->notifier_lock); - mmu_interval_notifier_remove(&abo->mem.notifier); - kvfree(abo->mem.pfns); - abo->mem.pfns = NULL; + kvfree(mapp->range.hmm_pfns); + kfree(mapp); } -static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr, - size_t len) +void amdxdna_umap_put(struct amdxdna_umap *mapp) +{ + kref_put(&mapp->refcnt, amdxdna_umap_release); +} + +static void amdxdna_hmm_unreg_work(struct work_struct *work) +{ + struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap, + hmm_unreg_work); + + amdxdna_umap_put(mapp); +} + +static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long len = vma->vm_end - vma->vm_start; + unsigned long addr = vma->vm_start; + struct amdxdna_umap *mapp; u32 nr_pages; int ret; if (!xdna->dev_info->ops->hmm_invalidate) return 0; - if (abo->mem.pfns) - return -EEXIST; + mapp = kzalloc(sizeof(*mapp), GFP_KERNEL); + if (!mapp) + return -ENOMEM; nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; - abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns), - GFP_KERNEL); - if (!abo->mem.pfns) - return -ENOMEM; + mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns), + GFP_KERNEL); + if (!mapp->range.hmm_pfns) { + ret = -ENOMEM; + goto free_map; + } - ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier, + ret = mmu_interval_notifier_insert_locked(&mapp->notifier, current->mm, addr, len, &amdxdna_hmm_ops); if (ret) { XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); - kvfree(abo->mem.pfns); + goto free_pfns; } - abo->mem.userptr = addr; + mapp->range.notifier = &mapp->notifier; + mapp->range.start = vma->vm_start; + mapp->range.end = vma->vm_end; + mapp->range.default_flags = HMM_PFN_REQ_FAULT; + mapp->vma = vma; + mapp->abo = abo; + kref_init(&mapp->refcnt); + + if (abo->mem.userptr == AMDXDNA_INVALID_ADDR) + abo->mem.userptr = addr; + INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work); + if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_set_unevictable(vma->vm_file->f_mapping); + + down_write(&xdna->notifier_lock); + list_add_tail(&mapp->node, &abo->mem.umap_list); + up_write(&xdna->notifier_lock); + + return 0; + +free_pfns: + kvfree(mapp->range.hmm_pfns); +free_map: + kfree(mapp); return ret; } +static void amdxdna_gem_dev_obj_free(struct drm_gem_object *gobj) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + if (abo->pinned) + amdxdna_gem_unpin(abo); + + amdxdna_gem_heap_free(abo); + drm_gem_object_release(gobj); + amdxdna_gem_destroy_obj(abo); +} + +static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long num_pages = vma_pages(vma); + unsigned long offset = 0; + int ret; + + if (!is_import_bo(abo)) { + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) { + XDNA_ERR(xdna, "Failed shmem mmap %d", ret); + return ret; + } + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages %d", ret); + vma->vm_ops->close(vma); + return ret; + } + + return 0; + } + + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0); + if (ret) { + XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret); + return ret; + } + + do { + vm_fault_t fault_ret; + + fault_ret = handle_mm_fault(vma, vma->vm_start + offset, + FAULT_FLAG_WRITE, NULL); + if (fault_ret & VM_FAULT_ERROR) { + vma->vm_ops->close(vma); + XDNA_ERR(xdna, "Fault in page failed"); + return -EFAULT; + } + + offset += PAGE_SIZE; + } while (--num_pages); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(to_gobj(abo)); + + return 0; +} + static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, struct vm_area_struct *vma) { + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - unsigned long num_pages; int ret; - ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size); + ret = amdxdna_hmm_register(abo, vma); if (ret) return ret; + ret = amdxdna_insert_pages(abo, vma); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages, ret %d", ret); + goto hmm_unreg; + } + + XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx", + drm_vma_node_offset_addr(&gobj->vma_node), abo->type, + vma->vm_start, gobj->size); + return 0; + +hmm_unreg: + amdxdna_hmm_unregister(abo, vma); + return ret; +} + +static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_gem_object *gobj = dma_buf->priv; + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + unsigned long num_pages = vma_pages(vma); + int ret; + + vma->vm_ops = &drm_gem_shmem_vm_ops; + vma->vm_private_data = gobj; + + drm_gem_object_get(gobj); ret = drm_gem_shmem_mmap(&abo->base, vma); if (ret) - goto hmm_unreg; + goto put_obj; - num_pages = gobj->size >> PAGE_SHIFT; - /* Try to insert the pages */ + /* The buffer is based on memory pages. Fix the flag. */ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); - ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); if (ret) - XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret); + goto close_vma; return 0; -hmm_unreg: - amdxdna_hmm_unregister(abo); +close_vma: + vma->vm_ops->close(vma); +put_obj: + drm_gem_object_put(gobj); return ret; } -static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf) +static const struct dma_buf_ops amdxdna_dmabuf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = amdxdna_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags) { - return drm_gem_shmem_vm_ops.fault(vmf); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &amdxdna_dmabuf_ops; + exp_info.size = gobj->size; + exp_info.flags = flags; + exp_info.priv = gobj; + exp_info.resv = gobj->resv; + + return drm_gem_dmabuf_export(gobj->dev, &exp_info); } -static void amdxdna_gem_vm_open(struct vm_area_struct *vma) +static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo) { - drm_gem_shmem_vm_ops.open(vma); + dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL); + dma_buf_detach(abo->dma_buf, abo->attach); + dma_buf_put(abo->dma_buf); + drm_gem_object_release(to_gobj(abo)); + kfree(abo); } -static void amdxdna_gem_vm_close(struct vm_area_struct *vma) +static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) { - struct drm_gem_object *gobj = vma->vm_private_data; + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + + amdxdna_hmm_unregister(abo, NULL); + flush_workqueue(xdna->notifier_wq); + + if (abo->pinned) + amdxdna_gem_unpin(abo); - amdxdna_hmm_unregister(to_xdna_obj(gobj)); - drm_gem_shmem_vm_ops.close(vma); + if (abo->type == AMDXDNA_BO_DEV_HEAP) + drm_mm_takedown(&abo->mm); + + drm_gem_vunmap(gobj, &map); + mutex_destroy(&abo->lock); + + if (is_import_bo(abo)) { + amdxdna_imported_obj_free(abo); + return; + } + + drm_gem_shmem_free(&abo->base); } -static const struct vm_operations_struct amdxdna_gem_vm_ops = { - .fault = amdxdna_gem_vm_fault, - .open = amdxdna_gem_vm_open, - .close = amdxdna_gem_vm_close, +static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { + .free = amdxdna_gem_dev_obj_free, }; static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { @@ -220,7 +454,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = amdxdna_gem_obj_mmap, - .vm_ops = &amdxdna_gem_vm_ops, + .vm_ops = &drm_gem_shmem_vm_ops, + .export = amdxdna_gem_prime_export, }; static struct amdxdna_gem_obj * @@ -239,6 +474,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) abo->mem.userptr = AMDXDNA_INVALID_ADDR; abo->mem.dev_addr = AMDXDNA_INVALID_ADDR; abo->mem.size = size; + INIT_LIST_HEAD(&abo->mem.umap_list); return abo; } @@ -258,6 +494,51 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size) return to_gobj(abo); } +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + struct sg_table *sgt; + int ret; + + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto put_buf; + } + + sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto fail_detach; + } + + gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(gobj)) { + ret = PTR_ERR(gobj); + goto fail_unmap; + } + + abo = to_xdna_obj(gobj); + abo->attach = attach; + abo->dma_buf = dma_buf; + + return gobj; + +fail_unmap: + dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); +fail_detach: + dma_buf_detach(dma_buf, attach); +put_buf: + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + static struct amdxdna_gem_obj * amdxdna_drm_alloc_shmem(struct drm_device *dev, struct amdxdna_drm_create_bo *args, @@ -286,6 +567,7 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev, struct drm_file *filp) { struct amdxdna_client *client = filp->driver_priv; + struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); struct amdxdna_dev *xdna = to_xdna_dev(dev); struct drm_gem_shmem_object *shmem; struct amdxdna_gem_obj *abo; @@ -312,18 +594,26 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev, shmem->map_wc = false; abo = to_xdna_obj(&shmem->base); - abo->type = AMDXDNA_BO_DEV_HEAP; abo->client = client; abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base; drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size); + ret = drm_gem_vmap(to_gobj(abo), &map); + if (ret) { + XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret); + goto release_obj; + } + abo->mem.kva = map.vaddr; + client->dev_heap = abo; drm_gem_object_get(to_gobj(abo)); mutex_unlock(&client->mm_lock); return abo; +release_obj: + drm_gem_object_put(to_gobj(abo)); mm_unlock: mutex_unlock(&client->mm_lock); return ERR_PTR(ret); @@ -332,58 +622,32 @@ mm_unlock: struct amdxdna_gem_obj * amdxdna_drm_alloc_dev_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, - struct drm_file *filp, bool use_vmap) + struct drm_file *filp) { struct amdxdna_client *client = filp->driver_priv; struct amdxdna_dev *xdna = to_xdna_dev(dev); size_t aligned_sz = PAGE_ALIGN(args->size); - struct amdxdna_gem_obj *abo, *heap; + struct amdxdna_gem_obj *abo; int ret; - mutex_lock(&client->mm_lock); - heap = client->dev_heap; - if (!heap) { - ret = -EINVAL; - goto mm_unlock; - } - - if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) { - XDNA_ERR(xdna, "Invalid dev heap userptr"); - ret = -EINVAL; - goto mm_unlock; - } - - if (args->size > heap->mem.size) { - XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx", - args->size, heap->mem.size); - ret = -EINVAL; - goto mm_unlock; - } - abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz); - if (IS_ERR(abo)) { - ret = PTR_ERR(abo); - goto mm_unlock; - } + if (IS_ERR(abo)) + return abo; + to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs; abo->type = AMDXDNA_BO_DEV; abo->client = client; - abo->dev_heap = heap; - ret = amdxdna_gem_insert_node_locked(abo, use_vmap); + + ret = amdxdna_gem_heap_alloc(abo); if (ret) { XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); - goto mm_unlock; + amdxdna_gem_destroy_obj(abo); + return ERR_PTR(ret); } - drm_gem_object_get(to_gobj(heap)); drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz); - mutex_unlock(&client->mm_lock); return abo; - -mm_unlock: - mutex_unlock(&client->mm_lock); - return ERR_PTR(ret); } static struct amdxdna_gem_obj * @@ -391,10 +655,10 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, struct drm_file *filp) { + struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); struct amdxdna_dev *xdna = to_xdna_dev(dev); struct drm_gem_shmem_object *shmem; struct amdxdna_gem_obj *abo; - struct iosys_map map; int ret; if (args->size > XDNA_MAX_CMD_BO_SIZE) { @@ -417,7 +681,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = drm_gem_vmap_unlocked(to_gobj(abo), &map); + ret = drm_gem_vmap(to_gobj(abo), &map); if (ret) { XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); goto release_obj; @@ -451,7 +715,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f abo = amdxdna_drm_create_dev_heap(dev, args, filp); break; case AMDXDNA_BO_DEV: - abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false); + abo = amdxdna_drm_alloc_dev_bo(dev, args, filp); break; case AMDXDNA_BO_CMD: abo = amdxdna_drm_create_cmd_bo(dev, args, filp); @@ -483,17 +747,13 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); int ret; - switch (abo->type) { - case AMDXDNA_BO_SHMEM: - case AMDXDNA_BO_DEV_HEAP: - ret = drm_gem_shmem_pin(&abo->base); - break; - case AMDXDNA_BO_DEV: - ret = drm_gem_shmem_pin(&abo->dev_heap->base); - break; - default: - ret = -EOPNOTSUPP; - } + if (abo->type == AMDXDNA_BO_DEV) + abo = abo->client->dev_heap; + + if (is_import_bo(abo)) + return 0; + + ret = drm_gem_shmem_pin(&abo->base); XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret); return ret; @@ -503,9 +763,6 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) { int ret; - if (abo->type == AMDXDNA_BO_DEV) - abo = abo->dev_heap; - mutex_lock(&abo->lock); ret = amdxdna_gem_pin_nolock(abo); mutex_unlock(&abo->lock); @@ -516,7 +773,10 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) { if (abo->type == AMDXDNA_BO_DEV) - abo = abo->dev_heap; + abo = abo->client->dev_heap; + + if (is_import_bo(abo)) + return; mutex_lock(&abo->lock); drm_gem_shmem_unpin(&abo->base); @@ -606,10 +866,14 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, goto put_obj; } - if (abo->type == AMDXDNA_BO_DEV) - drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages); - else + if (is_import_bo(abo)) + drm_clflush_sg(abo->base.sgt); + else if (abo->mem.kva) + drm_clflush_virt_range(abo->mem.kva + args->offset, args->size); + else if (abo->base.pages) drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); + else + drm_WARN(&xdna->ddev, 1, "Can not get flush memory"); amdxdna_gem_unpin(abo); diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index 8ccc0375dd9d..ae29db94a9d3 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -6,6 +6,20 @@ #ifndef _AMDXDNA_GEM_H_ #define _AMDXDNA_GEM_H_ +#include <linux/hmm.h> + +struct amdxdna_umap { + struct vm_area_struct *vma; + struct mmu_interval_notifier notifier; + struct hmm_range range; + struct work_struct hmm_unreg_work; + struct amdxdna_gem_obj *abo; + struct list_head node; + struct kref refcnt; + bool invalid; + bool unmapped; +}; + struct amdxdna_mem { u64 userptr; void *kva; @@ -13,8 +27,7 @@ struct amdxdna_mem { size_t size; struct page **pages; u32 nr_pages; - struct mmu_interval_notifier notifier; - unsigned long *pfns; + struct list_head umap_list; bool map_invalid; }; @@ -28,12 +41,14 @@ struct amdxdna_gem_obj { /* Below members is uninitialized when needed */ struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */ - struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ u32 assigned_hwctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *attach; }; #define to_gobj(obj) (&(obj)->base.base) +#define is_import_bo(obj) ((obj)->attach) static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj) { @@ -47,12 +62,16 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) drm_gem_object_put(to_gobj(abo)); } +void amdxdna_umap_put(struct amdxdna_umap *mapp); + struct drm_gem_object * amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size); +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); struct amdxdna_gem_obj * amdxdna_drm_alloc_dev_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, - struct drm_file *filp, bool use_vmap); + struct drm_file *filp); int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo); int amdxdna_gem_pin(struct amdxdna_gem_obj *abo); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index f5b8497cf5ad..f2bf1d374cc7 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -226,6 +226,7 @@ const struct drm_driver amdxdna_drm_drv = { .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), .gem_create_object = amdxdna_gem_create_object_cb, + .gem_prime_import = amdxdna_gem_prime_import, }; static const struct amdxdna_dev_info * @@ -266,12 +267,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) fs_reclaim_release(GFP_KERNEL); } + xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0); + if (!xdna->notifier_wq) + return -ENOMEM; + mutex_lock(&xdna->dev_lock); ret = xdna->dev_info->ops->init(xdna); mutex_unlock(&xdna->dev_lock); if (ret) { XDNA_ERR(xdna, "Hardware init failed, ret %d", ret); - return ret; + goto destroy_notifier_wq; } ret = amdxdna_sysfs_init(xdna); @@ -301,6 +306,8 @@ failed_dev_fini: mutex_lock(&xdna->dev_lock); xdna->dev_info->ops->fini(xdna); mutex_unlock(&xdna->dev_lock); +destroy_notifier_wq: + destroy_workqueue(xdna->notifier_wq); return ret; } @@ -310,6 +317,8 @@ static void amdxdna_remove(struct pci_dev *pdev) struct device *dev = &pdev->dev; struct amdxdna_client *client; + destroy_workqueue(xdna->notifier_wq); + pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index 37848a8d8031..ab79600911aa 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -6,6 +6,7 @@ #ifndef _AMDXDNA_PCI_DRV_H_ #define _AMDXDNA_PCI_DRV_H_ +#include <linux/workqueue.h> #include <linux/xarray.h> #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) @@ -98,6 +99,7 @@ struct amdxdna_dev { struct list_head client_list; struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ + struct workqueue_struct *notifier_wq; }; /* diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c index aa826033b0ce..ca3357acd127 100644 --- a/drivers/accel/drm_accel.c +++ b/drivers/accel/drm_accel.c @@ -20,8 +20,6 @@ DEFINE_XARRAY_ALLOC(accel_minors_xa); -static struct dentry *accel_debugfs_root; - static const struct device_type accel_sysfs_device_minor = { .name = "accel_minor" }; @@ -74,17 +72,6 @@ static const struct drm_info_list accel_debugfs_list[] = { #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list) /** - * accel_debugfs_init() - Initialize debugfs for device - * @dev: Pointer to the device instance. - * - * This function creates a root directory for the device in debugfs. - */ -void accel_debugfs_init(struct drm_device *dev) -{ - drm_debugfs_dev_init(dev, accel_debugfs_root); -} - -/** * accel_debugfs_register() - Register debugfs for device * @dev: Pointer to the device instance. * @@ -194,7 +181,6 @@ static const struct file_operations accel_stub_fops = { void accel_core_exit(void) { unregister_chrdev(ACCEL_MAJOR, "accel"); - debugfs_remove(accel_debugfs_root); accel_sysfs_destroy(); WARN_ON(!xa_empty(&accel_minors_xa)); } @@ -209,8 +195,6 @@ int __init accel_core_init(void) goto error; } - accel_debugfs_root = debugfs_create_dir("accel", NULL); - ret = register_chrdev(ACCEL_MAJOR, "accel", &accel_stub_fops); if (ret < 0) DRM_ERROR("Cannot register ACCEL major: %d\n", ret); diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig index be85336107f9..1919fbb169c7 100644 --- a/drivers/accel/habanalabs/Kconfig +++ b/drivers/accel/habanalabs/Kconfig @@ -6,7 +6,7 @@ config DRM_ACCEL_HABANALABS tristate "HabanaLabs AI accelerators" depends on DRM_ACCEL - depends on X86_64 + depends on X86 && X86_64 depends on PCI && HAS_IOMEM select GENERIC_ALLOCATOR select HWMON diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 68eebed3b050..80fa08bf57bd 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1066,28 +1066,11 @@ static bool is_pci_link_healthy(struct hl_device *hdev) return (device_id == hdev->pdev->device); } -static void stringify_time_of_last_heartbeat(struct hl_device *hdev, char *time_str, size_t size, - bool is_pq_hb) -{ - time64_t seconds = is_pq_hb ? hdev->heartbeat_debug_info.last_pq_heartbeat_ts - : hdev->heartbeat_debug_info.last_eq_heartbeat_ts; - struct tm tm; - - if (!seconds) - return; - - time64_to_tm(seconds, 0, &tm); - - snprintf(time_str, size, "%ld-%02d-%02d %02d:%02d:%02d (UTC)", - tm.tm_year + 1900, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -} - static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) { struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info; u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1; struct asic_fixed_properties *prop = &hdev->asic_prop; - char pq_time_str[64] = "N/A", eq_time_str[64] = "N/A"; if (!prop->cpucp_info.eq_health_check_supported) return true; @@ -1095,17 +1078,15 @@ static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) if (!hdev->eq_heartbeat_received) { dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); - stringify_time_of_last_heartbeat(hdev, pq_time_str, sizeof(pq_time_str), true); - stringify_time_of_last_heartbeat(hdev, eq_time_str, sizeof(eq_time_str), false); dev_err(hdev->dev, - "EQ: {CI %u, HB counter %u, last HB time: %s}, PQ: {PI: %u, CI: %u (%u), last HB time: %s}\n", + "EQ: {CI %u, HB counter %u, last HB time: %ptTs}, PQ: {PI: %u, CI: %u (%u), last HB time: %ptTs}\n", hdev->event_queue.ci, heartbeat_debug_info->heartbeat_event_counter, - eq_time_str, + &hdev->heartbeat_debug_info.last_eq_heartbeat_ts, hdev->kernel_queues[cpu_q_id].pi, atomic_read(&hdev->kernel_queues[cpu_q_id].ci), atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask, - pq_time_str); + &hdev->heartbeat_debug_info.last_pq_heartbeat_ts); hl_eq_dump(hdev, &hdev->event_queue); diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index 8729a0c57d78..dc80ca921d90 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -17,8 +17,6 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> -#include <asm/msr.h> - /* make sure there is space for all the signed info */ static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ); diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 9d58efa2ff38..82f66520ec18 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -446,7 +446,7 @@ static DEVICE_ATTR_RO(parent_device); static const struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, .size = PAGE_SIZE, - .read_new = eeprom_read_handler + .read = eeprom_read_handler }; static struct attribute *hl_dev_attrs[] = { @@ -479,7 +479,7 @@ static const struct bin_attribute *const hl_dev_bin_attrs[] = { static struct attribute_group hl_dev_attr_group = { .attrs = hl_dev_attrs, - .bin_attrs_new = hl_dev_bin_attrs, + .bin_attrs = hl_dev_bin_attrs, }; static struct attribute_group hl_dev_clks_attr_group; diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 0825851656a2..cd24ccd20ba6 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; ivpu_pm_trigger_recovery(vdev, "debugfs"); @@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent) return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; if (active_percent) @@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (ret < 0) return ret; - buf[size] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, &process_quantum); if (ret != 4) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 4fa73189502e..3d6d52492536 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> @@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); + args->value = ivpu_hw_dpu_max_freq_get(vdev); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev) { int ret; + drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter)); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + /* Update boot params located at first 4KB of FW memory */ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem)); @@ -421,9 +424,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev) { ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); - cancel_work_sync(&vdev->irq_ipc_work); - cancel_work_sync(&vdev->irq_dct_work); - cancel_work_sync(&vdev->context_abort_work); + flush_work(&vdev->irq_ipc_work); + flush_work(&vdev->irq_dct_work); + flush_work(&vdev->context_abort_work); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); } @@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); + atomic_set(&vdev->job_timeout_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); @@ -700,6 +704,7 @@ static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) }, { } }; MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 92753effb1c9..62ab1c654e63 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_DRV_H__ @@ -26,6 +26,7 @@ #define PCI_DEVICE_ID_ARL 0xad1d #define PCI_DEVICE_ID_LNL 0x643e #define PCI_DEVICE_ID_PTL_P 0xb03e +#define PCI_DEVICE_ID_WCL 0xfd3e #define IVPU_HW_IP_37XX 37 #define IVPU_HW_IP_40XX 40 @@ -154,6 +155,7 @@ struct ivpu_device { struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ struct xarray submitted_jobs_xa; struct ivpu_ipc_consumer job_done_consumer; + atomic_t job_timeout_counter; atomic64_t unique_id_counter; @@ -164,6 +166,7 @@ struct ivpu_device { int boot; int jsm; int tdr; + int inference; int autosuspend; int d0i3_entry_msg; int state_dump_msg; @@ -206,10 +209,11 @@ extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) #define IVPU_TEST_MODE_MIP_DISABLE BIT(6) #define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) -#define IVPU_TEST_MODE_TURBO BIT(9) -#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10) -#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(11) -#define IVPU_TEST_MODE_D0I2_DISABLE BIT(12) +#define IVPU_TEST_MODE_TURBO_ENABLE BIT(9) +#define IVPU_TEST_MODE_TURBO_DISABLE BIT(10) +#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11) +#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(12) +#define IVPU_TEST_MODE_D0I2_DISABLE BIT(13) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); @@ -239,6 +243,7 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev) case PCI_DEVICE_ID_LNL: return IVPU_HW_IP_40XX; case PCI_DEVICE_ID_PTL_P: + case PCI_DEVICE_ID_WCL: return IVPU_HW_IP_50XX; default: dump_stack(); @@ -255,6 +260,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev) return IVPU_HW_BTRS_MTL; case PCI_DEVICE_ID_LNL: case PCI_DEVICE_ID_PTL_P: + case PCI_DEVICE_ID_WCL: return IVPU_HW_BTRS_LNL; default: dump_stack(); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 7a1bb92d8c81..9db741695401 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> @@ -55,18 +55,18 @@ static struct { int gen; const char *name; } fw_names[] = { - { IVPU_HW_IP_37XX, "vpu_37xx.bin" }, + { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" }, { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, - { IVPU_HW_IP_40XX, "vpu_40xx.bin" }, + { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" }, { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, - { IVPU_HW_IP_50XX, "vpu_50xx.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" }, { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" }, }; /* Production fw_names from the table above */ -MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin"); -MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin"); -MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin"); static int ivpu_fw_request(struct ivpu_device *vdev) { @@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->dvfs_mode = 0; fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + if (fw_hdr->preemption_buffer_1_max_size) + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; + else + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + + if (fw_hdr->preemption_buffer_2_max_size) + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; + else + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, fw_hdr->image_load_address, @@ -534,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -566,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock @@ -637,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 1d0b2bd9d65c..9a3935be1c05 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -39,6 +39,7 @@ struct ivpu_fw_info { u64 read_only_addr; u32 read_only_size; u32 sched_mode; + u64 last_heartbeat; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 8741c73b92ce..59cfcf3eaded 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -28,9 +28,19 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con { ivpu_dbg(vdev, BO, "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n", - action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0, + action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id, (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, - (bool)bo->base.base.import_attach); + (bool)drm_gem_is_imported(&bo->base.base)); +} + +static inline int ivpu_bo_lock(struct ivpu_bo *bo) +{ + return dma_resv_lock(bo->base.base.resv, NULL); +} + +static inline void ivpu_bo_unlock(struct ivpu_bo *bo) +{ + dma_resv_unlock(bo->base.base.resv); } /* @@ -43,22 +53,22 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con int __must_check ivpu_bo_pin(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; int ret = 0; - mutex_lock(&bo->lock); - ivpu_dbg_bo(vdev, bo, "pin"); - drm_WARN_ON(&vdev->drm, !bo->ctx); - if (!bo->mmu_mapped) { - struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base); + sgt = drm_gem_shmem_get_pages_sgt(&bo->base); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); + return ret; + } - if (IS_ERR(sgt)) { - ret = PTR_ERR(sgt); - ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); - goto unlock; - } + ivpu_bo_lock(bo); + if (!bo->mmu_mapped) { + drm_WARN_ON(&vdev->drm, !bo->ctx); ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, ivpu_bo_is_snooped(bo)); if (ret) { @@ -69,7 +79,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo) } unlock: - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); return ret; } @@ -84,7 +94,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, if (!drm_dev_enter(&vdev->drm, &idx)) return -ENODEV; - mutex_lock(&bo->lock); + ivpu_bo_lock(bo); ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node); if (!ret) { @@ -94,9 +104,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret); } - ivpu_dbg_bo(vdev, bo, "alloc"); - - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); drm_dev_exit(idx); @@ -107,7 +115,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount)); + lockdep_assert(dma_resv_held(bo->base.base.resv) || !kref_read(&bo->base.base.refcount)); if (bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); @@ -122,17 +130,15 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) bo->ctx = NULL; } - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) return; - dma_resv_lock(bo->base.base.resv, NULL); if (bo->base.sgt) { dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(bo->base.sgt); kfree(bo->base.sgt); bo->base.sgt = NULL; } - dma_resv_unlock(bo->base.base.resv); } void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) @@ -144,12 +150,12 @@ void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) { - mutex_lock(&bo->lock); + ivpu_bo_lock(bo); if (bo->ctx == ctx) { ivpu_dbg_bo(vdev, bo, "unbind"); ivpu_bo_unbind_locked(bo); } - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); } mutex_unlock(&vdev->bo_list_lock); } @@ -169,7 +175,6 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */ INIT_LIST_HEAD(&bo->bo_list_node); - mutex_init(&bo->lock); return &bo->base.base; } @@ -215,7 +220,7 @@ fail_detach: return ERR_PTR(ret); } -static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags) +static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, u32 ctx_id) { struct drm_gem_shmem_object *shmem; struct ivpu_bo *bo; @@ -233,6 +238,7 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla return ERR_CAST(shmem); bo = to_ivpu_bo(&shmem->base); + bo->ctx_id = ctx_id; bo->base.map_wc = flags & DRM_IVPU_BO_WC; bo->flags = flags; @@ -240,6 +246,8 @@ static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 fla list_add_tail(&bo->bo_list_node, &vdev->bo_list); mutex_unlock(&vdev->bo_list_lock); + ivpu_dbg_bo(vdev, bo, "alloc"); + return bo; } @@ -277,12 +285,16 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj) list_del(&bo->bo_list_node); mutex_unlock(&vdev->bo_list_lock); - drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) && + !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0); + drm_WARN_ON(&vdev->drm, bo->base.vaddr); ivpu_bo_unbind_locked(bo); - mutex_destroy(&bo->lock); + drm_WARN_ON(&vdev->drm, bo->mmu_mapped); + drm_WARN_ON(&vdev->drm, bo->ctx); - drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); + drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1); drm_gem_shmem_free(&bo->base); } @@ -314,7 +326,7 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (size == 0) return -EINVAL; - bo = ivpu_bo_alloc(vdev, size, args->flags); + bo = ivpu_bo_alloc(vdev, size, args->flags, file_priv->ctx.id); if (IS_ERR(bo)) { ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)", bo, file_priv->ctx.id, args->size, args->flags); @@ -322,7 +334,10 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); - if (!ret) + if (ret) + ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)", + bo, file_priv->ctx.id, args->size, args->flags); + else args->vpu_addr = bo->vpu_addr; drm_gem_object_put(&bo->base.base); @@ -345,7 +360,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end)); drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); - bo = ivpu_bo_alloc(vdev, size, flags); + bo = ivpu_bo_alloc(vdev, size, flags, IVPU_GLOBAL_CONTEXT_MMU_SSID); if (IS_ERR(bo)) { ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", bo, range->start, size, flags); @@ -361,9 +376,9 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, goto err_put; if (flags & DRM_IVPU_BO_MAPPABLE) { - dma_resv_lock(bo->base.base.resv, NULL); - ret = drm_gem_shmem_vmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); + ivpu_bo_lock(bo); + ret = drm_gem_shmem_vmap_locked(&bo->base, &map); + ivpu_bo_unlock(bo); if (ret) goto err_put; @@ -386,9 +401,9 @@ void ivpu_bo_free(struct ivpu_bo *bo) struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); if (bo->flags & DRM_IVPU_BO_MAPPABLE) { - dma_resv_lock(bo->base.base.resv, NULL); - drm_gem_shmem_vunmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); + ivpu_bo_lock(bo); + drm_gem_shmem_vunmap_locked(&bo->base, &map); + ivpu_bo_unlock(bo); } drm_gem_object_put(&bo->base.base); @@ -407,12 +422,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file bo = to_ivpu_bo(obj); - mutex_lock(&bo->lock); + ivpu_bo_lock(bo); args->flags = bo->flags; args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node); args->vpu_addr = bo->vpu_addr; args->size = obj->size; - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); drm_gem_object_put(obj); return ret; @@ -449,10 +464,10 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { - mutex_lock(&bo->lock); + ivpu_bo_lock(bo); drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", - bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size, + bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size, bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) @@ -461,12 +476,12 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) if (bo->mmu_mapped) drm_printf(p, " mmu_mapped"); - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) drm_printf(p, " imported"); drm_printf(p, "\n"); - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); } void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index a222a9ec9d61..aa8ff14f7aae 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -17,10 +17,10 @@ struct ivpu_bo { struct list_head bo_list_node; struct drm_mm_node mm_node; - struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */ u64 vpu_addr; u32 flags; u32 job_status; /* Valid only for command buffer */ + u32 ctx_id; bool mmu_mapped; }; diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index ec9a3629da3a..08dcc31b56f4 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -94,12 +94,14 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.boot = -1; vdev->timeout.jsm = -1; vdev->timeout.tdr = -1; + vdev->timeout.inference = -1; vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = -1; } else if (ivpu_is_fpga(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 15000; vdev->timeout.tdr = 30000; + vdev->timeout.inference = 900000; vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = 500; vdev->timeout.state_dump_msg = 10000; @@ -107,6 +109,7 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.boot = 50; vdev->timeout.jsm = 500; vdev->timeout.tdr = 10000; + vdev->timeout.inference = 300000; vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 100; vdev->timeout.state_dump_msg = 10; @@ -114,12 +117,13 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.boot = 1000; vdev->timeout.jsm = 500; vdev->timeout.tdr = 2000; + vdev->timeout.inference = 60000; if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) vdev->timeout.autosuspend = 10; else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 16435f2756d0..d79668fe1609 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_H__ @@ -82,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) return range->end - range->start; } -static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) { - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); + return ivpu_hw_btrs_dpu_max_freq_get(vdev); } -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) { - ivpu_hw_ip_irq_clear(vdev); + return ivpu_hw_btrs_dpu_freq_get(vdev); } -static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - return ivpu_hw_btrs_pll_freq_get(vdev); + ivpu_hw_ip_irq_clear(vdev); } static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 56c56012b980..b236c7234daa 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ +#include <linux/units.h> + #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_hw_btrs.h" @@ -28,17 +30,13 @@ #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) -#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_CONFIG_DEFAULT 0x0 -#define PLL_SIMULATION_FREQ 10000000 -#define PLL_REF_CLK_FREQ 50000000 +#define PLL_REF_CLK_FREQ 50000000ull +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define TIMEOUT_US (150 * USEC_PER_MSEC) @@ -62,6 +60,8 @@ #define DCT_ENABLE 0x1 #define DCT_DISABLE 0x0 +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); + int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) { REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); @@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); return 0; } @@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) prepare_wp_request(vdev, &wp, enable); - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); ret = wp_request_send(vdev, &wp); if (ret) { @@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); } +static u32 pll_config_get_mtl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); +} + +static u32 pll_config_get_lnl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); +} + +static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) +{ + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; +} + +static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio) / 2; +} + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(ratio); + else + return pll_ratio_to_dpu_freq_lnl(ratio); +} + +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) +{ + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); +} + +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); + else + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); +} + /* Handler for IRQs from Buttress core (irqB) */ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) { @@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) if (!status) return false; - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_mtl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); @@ -633,8 +677,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) queue_work(system_wq, &vdev->irq_dct_work); } - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_lnl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", @@ -717,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); } -static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == MTL_PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) -{ - struct ivpu_hw_info *hw = vdev->hw; - - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_ratio_to_freq_mtl(ratio, hw->config); - else - return PLL_RATIO_TO_FREQ(ratio); -} - -static u32 pll_freq_get_mtl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); -} - -static u32 pll_freq_get_lnl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) -{ - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_freq_get_mtl(vdev); - else - return pll_freq_get_lnl(vdev); -} - u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) { if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 1fd71b4d4ab0..d2d82651976d 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_BTRS_H__ @@ -13,9 +13,8 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); @@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 823f6a57dc54..2bf9882ab52e 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -683,6 +683,7 @@ static void pwr_island_delay_set(struct ivpu_device *vdev) return; switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_WCL: case PCI_DEVICE_ID_PTL_P: post = high ? 18 : 0; post1 = 0; diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 0e096fd9b95d..39f83225c181 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req struct ivpu_ipc_consumer cons; int ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) && + pm_runtime_enabled(vdev->drm.dev)); ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 004059e4f1e8..060f1fc031d3 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <drm/drm_file.h> @@ -100,6 +100,43 @@ err_free_cmdq: return NULL; } +/** + * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue. + * @cmdq: Pointer to the command queue structure. + * + * Returns the number of entries that can fit in the command queue memory. + */ +static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) +{ + size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); + + return size / sizeof(struct vpu_job_queue_entry); +} + +/** + * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode. + * @vdev: Pointer to the ivpu device structure. + * @flags: Input flags to determine the command queue flags. + * + * Returns the calculated command queue flags, considering both the input flags + * and the current test mode settings. + */ +static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags) +{ + u32 cmdq_flags = 0; + + if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX)) + cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + + /* Test mode can override the TURBO flag coming from the application */ + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE) + cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE) + cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + + return cmdq_flags; +} + static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); @@ -107,8 +144,7 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c kfree(cmdq); } -static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, - bool is_legacy) +static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags) { struct ivpu_device *vdev = file_priv->vdev; struct ivpu_cmdq *cmdq = NULL; @@ -121,10 +157,6 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p ivpu_err(vdev, "Failed to allocate command queue\n"); return NULL; } - - cmdq->priority = priority; - cmdq->is_legacy = is_legacy; - ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, &file_priv->cmdq_id_next, GFP_KERNEL); if (ret < 0) { @@ -132,7 +164,15 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p goto err_free_cmdq; } - ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id); + cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); + cmdq->priority = priority; + + cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); + cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE; + cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags); + + ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n", + cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags); return cmdq; err_free_cmdq: @@ -188,27 +228,14 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq * return ret; } -static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq) +static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq) { - jobq->header.engine_idx = VPU_ENGINE_COMPUTE; jobq->header.head = 0; jobq->header.tail = 0; - if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { - ivpu_dbg(vdev, JOB, "Turbo mode enabled"); - jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; - } - wmb(); /* Flush WC buffer for jobq->header */ } -static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) -{ - size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); - - return size / sizeof(struct vpu_job_queue_entry); -} - static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { struct ivpu_device *vdev = file_priv->vdev; @@ -219,10 +246,7 @@ static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq if (cmdq->db_id) return 0; - cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); - cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); - - ivpu_cmdq_jobq_init(vdev, cmdq->jobq); + ivpu_cmdq_jobq_reset(vdev, cmdq->jobq); if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority); @@ -247,6 +271,10 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm if (!cmdq->db_id) return 0; + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); if (!ret) @@ -254,10 +282,6 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm cmdq->id, file_priv->ctx.id); } - ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); - if (!ret) - ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); - xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); cmdq->db_id = 0; @@ -291,9 +315,10 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_pr break; if (!cmdq) { - cmdq = ivpu_cmdq_create(file_priv, priority, true); + cmdq = ivpu_cmdq_create(file_priv, priority, 0); if (!cmdq) return NULL; + cmdq->is_legacy = true; } return cmdq; @@ -470,8 +495,8 @@ static void ivpu_job_destroy(struct ivpu_job *job) struct ivpu_device *vdev = job->vdev; u32 i; - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", - job->job_id, job->file_priv->ctx.id, job->engine_idx); + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); for (i = 0; i < job->bo_count; i++) if (job->bos[i]) @@ -564,8 +589,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 dma_fence_signal(job->done_fence); trace_job("done", job); - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); @@ -664,8 +689,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) } trace_job("submit", job); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); mutex_unlock(&file_priv->lock); @@ -681,8 +706,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); err_unlock: - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } @@ -777,7 +802,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, goto err_free_handles; } - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", + file_priv->ctx.id, cmdq_id, buffer_count); job = ivpu_job_create(file_priv, engine, buffer_count); if (!job) { @@ -873,23 +899,31 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file * int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_create *args = data; struct ivpu_cmdq *cmdq; + int ret; - if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); + cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags); if (cmdq) args->cmdq_id = cmdq->id; mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return cmdq ? 0 : -ENOMEM; } @@ -899,28 +933,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_destroy *args = data; struct ivpu_cmdq *cmdq; - u32 cmdq_id; + u32 cmdq_id = 0; int ret; if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); if (!cmdq || cmdq->is_legacy) { ret = -ENOENT; - goto err_unlock; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; } - cmdq_id = cmdq->id; - ivpu_cmdq_destroy(file_priv, cmdq); mutex_unlock(&file_priv->lock); - ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); - return 0; -err_unlock: - mutex_unlock(&file_priv->lock); + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + return ret; } @@ -970,7 +1011,8 @@ void ivpu_context_abort_work_fn(struct work_struct *work) return; if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) - ivpu_jsm_reset_engine(vdev, 0); + if (ivpu_jsm_reset_engine(vdev, 0)) + return; mutex_lock(&vdev->context_list_lock); xa_for_each(&vdev->context_xa, ctx_id, file_priv) { @@ -993,7 +1035,8 @@ void ivpu_context_abort_work_fn(struct work_struct *work) if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) goto runtime_put; - ivpu_jsm_hws_resume_engine(vdev, 0); + if (ivpu_jsm_hws_resume_engine(vdev, 0)) + return; /* * In hardware scheduling mode NPU already has stopped processing jobs * and won't send us any further notifications, thus we have to free job related resources diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 219ab8afefab..0256b2dfefc1 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -7,6 +7,7 @@ #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" #include "vpu_jsm_api.h" const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) @@ -163,8 +164,10 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) + if (ret) { ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret); + ivpu_pm_trigger_recovery(vdev, "Engine reset failed"); + } return ret; } @@ -354,8 +357,10 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) + if (ret) { ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret); + ivpu_pm_trigger_recovery(vdev, "Engine resume failed"); + } return ret; } diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index ffe7b10f8a76..2a043baf10ca 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -4,6 +4,7 @@ */ #include <drm/drm_file.h> +#include <linux/pm_runtime.h> #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); if (get_instance_by_mask(file_priv, args->metric_group_mask)) { @@ -96,6 +101,8 @@ err_free_ms: kfree(ms); unlock: mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); return ret; } @@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * unlock: mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ret; } @@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file { struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_ms_instance *ms; + int ret; if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ms ? 0 : -EINVAL; } @@ -281,6 +300,9 @@ unlock: void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) { struct ivpu_ms_instance *ms, *tmp; + struct ivpu_device *vdev = file_priv->vdev; + + pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&file_priv->ms_lock); @@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) free_instance(file_priv, ms); mutex_unlock(&file_priv->ms_lock); + + pm_runtime_put_autosuspend(vdev->drm.dev); } void ivpu_ms_cleanup_all(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7ab..eacda1dbe840 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -33,6 +33,10 @@ static unsigned long ivpu_tdr_timeout_ms; module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); +static unsigned long ivpu_inference_timeout_ms; +module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644); +MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default"); + #define PM_RESCHEDULE_LIMIT 5 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) @@ -44,6 +48,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; + fw->last_heartbeat = 0; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) @@ -189,7 +194,30 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; + unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms : + vdev->timeout.inference; + u64 inference_max_retries; + u64 heartbeat; + + if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) { + ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n"); + goto recovery; + } + + inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms); + if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) { + ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n", + inference_max_retries); + goto recovery; + } + vdev->fw->last_heartbeat = heartbeat; + ivpu_start_job_timeout_detection(vdev); + return; + +recovery: + atomic_set(&vdev->job_timeout_counter, 0); ivpu_pm_trigger_recovery(vdev, "TDR"); } @@ -204,6 +232,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) { cancel_delayed_work_sync(&vdev->pm->job_timeout_work); + atomic_set(&vdev->job_timeout_counter, 0); } int ivpu_pm_suspend_cb(struct device *dev) @@ -428,16 +457,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +475,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +497,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 97102feaf8dd..268ab7744a8b 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include <linux/device.h> #include <linux/err.h> +#include <linux/pm_runtime.h> +#include <linux/units.h> #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR_RO(sched_mode); +/** + * DOC: npu_max_frequency + * + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data + * processing unit + */ +static ssize_t +npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_max_frequency_mhz); + +/** + * DOC: npu_current_frequency_mhz + * + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's + * data processing unit + */ +static ssize_t +npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = 0; + + /* Read frequency only if device is active, otherwise frequency is 0 */ + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { + freq = ivpu_hw_dpu_freq_get(vdev); + + pm_runtime_put_autosuspend(vdev->drm.dev); + } + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_current_frequency_mhz); + static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, &dev_attr_npu_memory_utilization.attr, &dev_attr_sched_mode.attr, + &dev_attr_npu_max_frequency_mhz.attr, + &dev_attr_npu_current_frequency_mhz.attr, NULL, }; diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 908e68ea1c39..218468bbbcad 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -26,7 +26,7 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 26 +#define VPU_BOOT_API_VER_MINOR 28 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -76,8 +76,15 @@ struct vpu_firmware_header { * submission queue size and device capabilities. */ u32 preemption_buffer_2_size; + /* + * Maximum preemption buffer size that the FW can use: no need for the host + * driver to allocate more space than that specified by these fields. + * A value of 0 means no declared limit. + */ + u32 preemption_buffer_1_max_size; + u32 preemption_buffer_2_max_size; /* Space reserved for future preemption-related fields. */ - u32 preemption_reserved[6]; + u32 preemption_reserved[4]; /* FW image read only section start address, 4KB aligned */ u64 ro_section_start_address; /* FW image read only section size, 4KB aligned */ @@ -134,7 +141,7 @@ enum vpu_trace_destination { /* * Processor bit shifts (for loggable HW components). */ -#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_RESERVED 0 #define VPU_TRACE_PROC_BIT_LRT 1 #define VPU_TRACE_PROC_BIT_LNN 2 #define VPU_TRACE_PROC_BIT_SHV_0 3 diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7215c144158c..4b6b2b3d2583 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 25 +#define VPU_JSM_API_VER_MINOR 29 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -53,8 +53,7 @@ * Engine indexes. */ #define VPU_ENGINE_COMPUTE 0 -#define VPU_ENGINE_COPY 1 -#define VPU_ENGINE_NB 2 +#define VPU_ENGINE_NB 1 /* * VPU status values. @@ -126,11 +125,13 @@ enum { * When set, indicates that job queue uses native fences (as inline commands * in job queue). Such queues may also use legacy fences (as commands in batch buffers). * When cleared, indicates the job queue only uses legacy fences. - * NOTE: For queues using native fences, VPU expects that all jobs in the queue - * are immediately followed by an inline command object. This object is expected - * to be a fence signal command in most cases, but can also be a NOP in case the host - * does not need per-job fence signalling. Other inline commands objects can be - * inserted between "job and inline command" pairs. + * NOTES: + * 1. For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + * 2. Native fence queues are only supported on VPU 40xx onwards. */ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), @@ -275,6 +276,8 @@ struct vpu_inline_cmd { u64 value; /* User VA of the log buffer in which to add log entry on completion. */ u64 log_buffer_va; + /* NPU private data. */ + u64 npu_private_data; } fence; /* Other commands do not have a payload. */ /* Payload definition for future inline commands can be inserted here. */ @@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update { /** Metric group mask that identifies metric streamer instance. */ u64 metric_group_mask; /** - * Address and size of the buffer where the VPU will write metric data. If - * the buffer address is 0 or same as the currently used buffer the VPU will - * continue writing metric data to the current buffer. In this case the - * buffer size is ignored and the size of the current buffer is unchanged. - * If the address is non-zero and differs from the current buffer address the - * VPU will immediately switch data collection to the new buffer. + * Address and size of the buffer where the VPU will write metric data. + * This member dictates how the update operation should perform: + * 1. client needs information about the number of collected samples and the + * amount of data written to the current buffer + * 2. client wants to switch to a new buffer + * + * Case 1. is identified by the buffer address being 0 or the same as the + * currently used buffer address. In this case the buffer size is ignored and + * the size of the current buffer is unchanged. The VPU will return an update + * in the vpu_jsm_metric_streamer_done structure. The internal writing position + * into the buffer is not changed. + * + * Case 2. is identified by the address being non-zero and differs from the + * current buffer address. The VPU will immediately switch data collection to + * the new buffer. Then the VPU will return an update in the + * vpu_jsm_metric_streamer_done structure. */ u64 buffer_addr; u64 buffer_size; @@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* * Default quantum in 100ns units for scheduling across processes * within a priority band + * Minimum value supported by NPU is 1ms (10000 in 100ns units). */ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* @@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { * in situations when it's starved by the focus band. */ u32 normal_band_percentage; - /* Reserved */ - u32 reserved_0; + /* + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. + */ + u32 tdr_timeout; }; /* @@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { s32 in_process_priority; /* Zero padding / Reserved */ u32 reserved_1; - /* Context quantum relative to other contexts of same priority in the same process */ + /* + * Context quantum relative to other contexts of same priority in the same process + * Minimum value supported by NPU is 1ms (10000 in 100ns units). + */ u64 context_quantum; /* Grace period when preempting context of the same priority within the same process */ u64 grace_period_same_priority; diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig index a9f866230058..5e405a19c157 100644 --- a/drivers/accel/qaic/Kconfig +++ b/drivers/accel/qaic/Kconfig @@ -8,7 +8,6 @@ config DRM_ACCEL_QAIC depends on DRM_ACCEL depends on PCI && HAS_IOMEM depends on MHI_BUS - depends on MMU select CRC32 help Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile index 35e883515629..1106b876f737 100644 --- a/drivers/accel/qaic/Makefile +++ b/drivers/accel/qaic/Makefile @@ -10,6 +10,7 @@ qaic-y := \ qaic_control.o \ qaic_data.o \ qaic_drv.o \ + qaic_ras.o \ qaic_timesync.o \ sahara.o diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index 0dbb8e32e4b9..c31081e42cee 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -167,6 +167,14 @@ struct qaic_device { struct workqueue_struct *bootlog_wq; /* Synchronizes access of pages in MHI bootlog device */ struct mutex bootlog_mutex; + /* MHI RAS channel device */ + struct mhi_device *ras_ch; + /* Correctable error count */ + unsigned int ce_count; + /* Un-correctable error count */ + unsigned int ue_count; + /* Un-correctable non-fatal error count */ + unsigned int ue_nf_count; }; struct qaic_drm_device { @@ -213,8 +221,6 @@ struct qaic_bo { bool sliced; /* Request ID of this BO if it is queued for execution */ u16 req_id; - /* Handle assigned to this BO */ - u32 handle; /* Wait on this for completion of DMA transfer of this BO */ struct completion xfer_done; /* diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 43aba57b48f0..797289e9d780 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc struct scatterlist *sg; int ret = 0; - if (obj->import_attach) + if (drm_gem_is_imported(obj)) return -EINVAL; for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { @@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj) { struct qaic_bo *bo = to_qaic_bo(obj); - if (obj->import_attach) { + if (drm_gem_is_imported(obj)) { /* DMABUF/PRIME Path */ drm_prime_gem_destroy(obj, NULL); } else { @@ -731,7 +731,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (ret) goto free_bo; - bo->handle = args->handle; drm_gem_object_put(obj); srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); @@ -870,7 +869,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, { int ret; - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) ret = qaic_prepare_import_bo(bo, hdr); else ret = qaic_prepare_export_bo(qdev, bo, hdr); @@ -894,7 +893,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) { - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) qaic_unprepare_import_bo(bo); else qaic_unprepare_export_bo(qdev, bo); diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index ba0cf2f94732..a991b8198dc4 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -240,7 +240,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d mhi_unprepare: mhi_unprepare_from_transfer(mhi_dev); destroy_workqueue: - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); out: return ret; @@ -253,7 +252,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) qdev = dev_get_drvdata(&mhi_dev->dev); mhi_unprepare_from_transfer(qdev->bootlog_ch); - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); qdev->bootlog_ch = NULL; } diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 3b415e2c9431..e31bcb0ecfc9 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -29,6 +29,7 @@ #include "mhi_controller.h" #include "qaic.h" #include "qaic_debugfs.h" +#include "qaic_ras.h" #include "qaic_timesync.h" #include "sahara.h" @@ -695,6 +696,10 @@ static int __init qaic_init(void) if (ret) pr_debug("qaic: qaic_bootlog_register failed %d\n", ret); + ret = qaic_ras_register(); + if (ret) + pr_debug("qaic: qaic_ras_register failed %d\n", ret); + return 0; free_mhi: @@ -722,6 +727,7 @@ static void __exit qaic_exit(void) * reinitializing the link_up state after the cleanup is done. */ link_up = true; + qaic_ras_unregister(); qaic_bootlog_unregister(); qaic_timesync_deinit(); sahara_unregister(); diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c new file mode 100644 index 000000000000..914ffc4a9970 --- /dev/null +++ b/drivers/accel/qaic/qaic_ras.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ +/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ + +#include <asm/byteorder.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/mhi.h> + +#include "qaic.h" +#include "qaic_ras.h" + +#define MAGIC 0x55AA +#define VERSION 0x2 +#define HDR_SZ 12 +#define NUM_TEMP_LVL 3 +#define POWER_BREAK BIT(0) + +enum msg_type { + MSG_PUSH, /* async push from device */ + MSG_REQ, /* sync request to device */ + MSG_RESP, /* sync response from device */ +}; + +enum err_type { + CE, /* correctable error */ + UE, /* uncorrectable error */ + UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */ + ERR_TYPE_MAX, +}; + +static const char * const err_type_str[] = { + [CE] = "Correctable", + [UE] = "Uncorrectable", + [UE_NF] = "Uncorrectable Non-Fatal", +}; + +static const char * const err_class_str[] = { + [CE] = "Warning", + [UE] = "Fatal", + [UE_NF] = "Warning", +}; + +enum err_source { + SOC_MEM, + PCIE, + DDR, + SYS_BUS1, + SYS_BUS2, + NSP_MEM, + TSENS, +}; + +static const char * const err_src_str[TSENS + 1] = { + [SOC_MEM] = "SoC Memory", + [PCIE] = "PCIE", + [DDR] = "DDR", + [SYS_BUS1] = "System Bus source 1", + [SYS_BUS2] = "System Bus source 2", + [NSP_MEM] = "NSP Memory", + [TSENS] = "Temperature Sensors", +}; + +struct ras_data { + /* header start */ + /* Magic number to validate the message */ + u16 magic; + /* RAS version number */ + u16 ver; + u32 seq_num; + /* RAS message type */ + u8 type; + u8 id; + /* Size of RAS message without the header in byte */ + u16 len; + /* header end */ + s32 result; + /* + * Error source + * 0 : SoC Memory + * 1 : PCIE + * 2 : DDR + * 3 : System Bus source 1 + * 4 : System Bus source 2 + * 5 : NSP Memory + * 6 : Temperature Sensors + */ + u32 source; + /* + * Stores the error type, there are three types of error in RAS + * 0 : correctable error (CE) + * 1 : uncorrectable error (UE) + * 2 : uncorrectable error that is non-fatal (UE_NF) + */ + u32 err_type; + u32 err_threshold; + u32 ce_count; + u32 ue_count; + u32 intr_num; + /* Data specific to error source */ + u8 syndrome[64]; +} __packed; + +struct soc_mem_syndrome { + u64 error_address[8]; +} __packed; + +struct nsp_mem_syndrome { + u32 error_address[8]; + u8 nsp_id; +} __packed; + +struct ddr_syndrome { + u32 count; + u32 irq_status; + u32 data_31_0[2]; + u32 data_63_32[2]; + u32 data_95_64[2]; + u32 data_127_96[2]; + u32 addr_lsb; + u16 addr_msb; + u16 parity_bits; + u16 instance; + u16 err_type; +} __packed; + +struct tsens_syndrome { + u32 threshold_type; + s32 temp; +} __packed; + +struct sysbus1_syndrome { + u32 slave; + u32 err_type; + u16 addr[8]; + u8 instance; +} __packed; + +struct sysbus2_syndrome { + u32 lsb3; + u32 msb3; + u32 lsb2; + u32 msb2; + u32 ext_id; + u16 path; + u16 op_type; + u16 len; + u16 redirect; + u8 valid; + u8 word_error; + u8 non_secure; + u8 opc; + u8 error_code; + u8 trans_type; + u8 addr_space; + u8 instance; +} __packed; + +struct pcie_syndrome { + /* CE info */ + u32 bad_tlp; + u32 bad_dllp; + u32 replay_rollover; + u32 replay_timeout; + u32 rx_err; + u32 internal_ce_count; + /* UE_NF info */ + u32 fc_timeout; + u32 poison_tlp; + u32 ecrc_err; + u32 unsupported_req; + u32 completer_abort; + u32 completion_timeout; + /* UE info */ + u32 addr; + u8 index; + /* + * Flag to indicate specific event of PCIe + * BIT(0): Power break (low power) + * BIT(1) to BIT(7): Reserved + */ + u8 flag; +} __packed; + +static const char * const threshold_type_str[NUM_TEMP_LVL] = { + [0] = "lower", + [1] = "upper", + [2] = "critical", +}; + +static void ras_msg_to_cpu(struct ras_data *msg) +{ + struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; + struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; + struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; + struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; + struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; + struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; + struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; + int i; + + le16_to_cpus(&msg->magic); + le16_to_cpus(&msg->ver); + le32_to_cpus(&msg->seq_num); + le16_to_cpus(&msg->len); + le32_to_cpus(&msg->result); + le32_to_cpus(&msg->source); + le32_to_cpus(&msg->err_type); + le32_to_cpus(&msg->err_threshold); + le32_to_cpus(&msg->ce_count); + le32_to_cpus(&msg->ue_count); + le32_to_cpus(&msg->intr_num); + + switch (msg->source) { + case SOC_MEM: + for (i = 0; i < 8; i++) + le64_to_cpus(&soc_syndrome->error_address[i]); + break; + case PCIE: + le32_to_cpus(&pcie_syndrome->bad_tlp); + le32_to_cpus(&pcie_syndrome->bad_dllp); + le32_to_cpus(&pcie_syndrome->replay_rollover); + le32_to_cpus(&pcie_syndrome->replay_timeout); + le32_to_cpus(&pcie_syndrome->rx_err); + le32_to_cpus(&pcie_syndrome->internal_ce_count); + le32_to_cpus(&pcie_syndrome->fc_timeout); + le32_to_cpus(&pcie_syndrome->poison_tlp); + le32_to_cpus(&pcie_syndrome->ecrc_err); + le32_to_cpus(&pcie_syndrome->unsupported_req); + le32_to_cpus(&pcie_syndrome->completer_abort); + le32_to_cpus(&pcie_syndrome->completion_timeout); + le32_to_cpus(&pcie_syndrome->addr); + break; + case DDR: + le16_to_cpus(&ddr_syndrome->instance); + le16_to_cpus(&ddr_syndrome->err_type); + le32_to_cpus(&ddr_syndrome->count); + le32_to_cpus(&ddr_syndrome->irq_status); + le32_to_cpus(&ddr_syndrome->data_31_0[0]); + le32_to_cpus(&ddr_syndrome->data_31_0[1]); + le32_to_cpus(&ddr_syndrome->data_63_32[0]); + le32_to_cpus(&ddr_syndrome->data_63_32[1]); + le32_to_cpus(&ddr_syndrome->data_95_64[0]); + le32_to_cpus(&ddr_syndrome->data_95_64[1]); + le32_to_cpus(&ddr_syndrome->data_127_96[0]); + le32_to_cpus(&ddr_syndrome->data_127_96[1]); + le16_to_cpus(&ddr_syndrome->parity_bits); + le16_to_cpus(&ddr_syndrome->addr_msb); + le32_to_cpus(&ddr_syndrome->addr_lsb); + break; + case SYS_BUS1: + le32_to_cpus(&sysbus1_syndrome->slave); + le32_to_cpus(&sysbus1_syndrome->err_type); + for (i = 0; i < 8; i++) + le16_to_cpus(&sysbus1_syndrome->addr[i]); + break; + case SYS_BUS2: + le16_to_cpus(&sysbus2_syndrome->op_type); + le16_to_cpus(&sysbus2_syndrome->len); + le16_to_cpus(&sysbus2_syndrome->redirect); + le16_to_cpus(&sysbus2_syndrome->path); + le32_to_cpus(&sysbus2_syndrome->ext_id); + le32_to_cpus(&sysbus2_syndrome->lsb2); + le32_to_cpus(&sysbus2_syndrome->msb2); + le32_to_cpus(&sysbus2_syndrome->lsb3); + le32_to_cpus(&sysbus2_syndrome->msb3); + break; + case NSP_MEM: + for (i = 0; i < 8; i++) + le32_to_cpus(&nsp_syndrome->error_address[i]); + break; + case TSENS: + le32_to_cpus(&tsens_syndrome->threshold_type); + le32_to_cpus(&tsens_syndrome->temp); + break; + } +} + +static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) +{ + struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; + struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; + struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; + struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; + struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; + struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; + struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; + char *class; + char *level; + + if (msg->magic != MAGIC) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic); + return; + } + + if (!msg->ver || msg->ver > VERSION) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver); + return; + } + + if (msg->type != MSG_PUSH) { + pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n"); + return; + } + + if (msg->len != sizeof(*msg) - HDR_SZ) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len); + return; + } + + if (msg->err_type >= ERR_TYPE_MAX) { + pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type); + return; + } + + if (msg->err_type == UE) + level = KERN_ERR; + else + level = KERN_WARNING; + + switch (msg->source) { + case SOC_MEM: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + soc_syndrome->error_address[0], + soc_syndrome->error_address[1], + soc_syndrome->error_address[2], + soc_syndrome->error_address[3], + soc_syndrome->error_address[4], + soc_syndrome->error_address[5], + soc_syndrome->error_address[6], + soc_syndrome->error_address[7]); + break; + case PCIE: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold); + + switch (msg->err_type) { + case CE: + /* + * Modeled after AER prints. This continues the dev_printk() from a few + * lines up. We reduce duplication of code, but also avoid re-printing the + * PCI device info so that the end result looks uniform to the log user. + */ + printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"), + pcie_syndrome->bad_tlp, + pcie_syndrome->bad_dllp, + pcie_syndrome->replay_rollover, + pcie_syndrome->replay_timeout, + pcie_syndrome->rx_err, + pcie_syndrome->internal_ce_count); + if (msg->ver > 0x1) + pr_warn(" Power break %s\n", + pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF"); + break; + case UE: + printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"), + pcie_syndrome->index, pcie_syndrome->addr); + break; + case UE_NF: + printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"), + pcie_syndrome->fc_timeout, + pcie_syndrome->poison_tlp, + pcie_syndrome->ecrc_err, + pcie_syndrome->unsupported_req, + pcie_syndrome->completer_abort, + pcie_syndrome->completion_timeout); + break; + default: + break; + } + break; + case DDR: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + ddr_syndrome->instance, + ddr_syndrome->count, + ddr_syndrome->data_31_0[1], + ddr_syndrome->data_31_0[0], + ddr_syndrome->data_63_32[1], + ddr_syndrome->data_63_32[0], + ddr_syndrome->data_95_64[1], + ddr_syndrome->data_95_64[0], + ddr_syndrome->data_127_96[1], + ddr_syndrome->data_127_96[0], + ddr_syndrome->parity_bits, + ddr_syndrome->addr_msb, + ddr_syndrome->addr_lsb); + break; + case SYS_BUS1: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + sysbus1_syndrome->instance, + sysbus1_syndrome->slave ? "Slave" : "Master", + sysbus1_syndrome->err_type, + sysbus1_syndrome->addr[0], + sysbus1_syndrome->addr[1], + sysbus1_syndrome->addr[2], + sysbus1_syndrome->addr[3], + sysbus1_syndrome->addr[4], + sysbus1_syndrome->addr[5], + sysbus1_syndrome->addr[6], + sysbus1_syndrome->addr[7]); + break; + case SYS_BUS2: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + sysbus2_syndrome->instance, + sysbus2_syndrome->valid, + sysbus2_syndrome->word_error, + sysbus2_syndrome->non_secure, + sysbus2_syndrome->opc, + sysbus2_syndrome->error_code, + sysbus2_syndrome->trans_type, + sysbus2_syndrome->addr_space, + sysbus2_syndrome->op_type, + sysbus2_syndrome->len, + sysbus2_syndrome->redirect, + sysbus2_syndrome->path, + sysbus2_syndrome->ext_id, + sysbus2_syndrome->lsb2, + sysbus2_syndrome->msb2, + sysbus2_syndrome->lsb3, + sysbus2_syndrome->msb3); + break; + case NSP_MEM: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + nsp_syndrome->nsp_id, + nsp_syndrome->error_address[0], + nsp_syndrome->error_address[1], + nsp_syndrome->error_address[2], + nsp_syndrome->error_address[3], + nsp_syndrome->error_address[4], + nsp_syndrome->error_address[5], + nsp_syndrome->error_address[6], + nsp_syndrome->error_address[7]); + break; + case TSENS: + if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n", + tsens_syndrome->threshold_type); + break; + } + + if (msg->err_type) + class = "Fatal"; + else if (tsens_syndrome->threshold_type) + class = "Critical"; + else + class = "Warning"; + + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n", + class, + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + threshold_type_str[tsens_syndrome->threshold_type], + tsens_syndrome->temp); + break; + } + + /* Uncorrectable errors are fatal */ + if (msg->err_type == UE) + mhi_soc_reset(qdev->mhi_cntrl); + + switch (msg->err_type) { + case CE: + if (qdev->ce_count != UINT_MAX) + qdev->ce_count++; + break; + case UE: + if (qdev->ce_count != UINT_MAX) + qdev->ue_count++; + break; + case UE_NF: + if (qdev->ce_count != UINT_MAX) + qdev->ue_nf_count++; + break; + default: + /* not possible */ + break; + } +} + +static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count); +} + +static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count); +} + +static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count); +} + +static DEVICE_ATTR_RO(ce_count); +static DEVICE_ATTR_RO(ue_count); +static DEVICE_ATTR_RO(ue_nonfatal_count); + +static struct attribute *ras_attrs[] = { + &dev_attr_ce_count.attr, + &dev_attr_ue_count.attr, + &dev_attr_ue_nonfatal_count.attr, + NULL, +}; + +static struct attribute_group ras_group = { + .attrs = ras_attrs, +}; + +static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct ras_data *resp; + int ret; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + mhi_unprepare_from_transfer(mhi_dev); + return -ENOMEM; + } + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT); + if (ret) { + kfree(resp); + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + ret = device_add_group(&qdev->pdev->dev, &ras_group); + if (ret) { + mhi_unprepare_from_transfer(mhi_dev); + pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret); + return ret; + } + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->ras_ch = mhi_dev; + + return ret; +} + +static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + qdev->ras_ch = NULL; + device_remove_group(&qdev->pdev->dev, &ras_group); + mhi_unprepare_from_transfer(mhi_dev); +} + +static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {} + +static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ras_data *msg = mhi_result->buf_addr; + int ret; + + if (mhi_result->transaction_status) { + kfree(msg); + return; + } + + ras_msg_to_cpu(msg); + decode_ras_msg(qdev, msg); + + ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT); + if (ret) { + dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret); + kfree(msg); + } +} + +static const struct mhi_device_id qaic_ras_mhi_match_table[] = { + { .chan = "QAIC_STATUS", }, + {}, +}; + +static struct mhi_driver qaic_ras_mhi_driver = { + .id_table = qaic_ras_mhi_match_table, + .remove = qaic_ras_mhi_remove, + .probe = qaic_ras_mhi_probe, + .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_ras", + }, +}; + +int qaic_ras_register(void) +{ + return mhi_driver_register(&qaic_ras_mhi_driver); +} + +void qaic_ras_unregister(void) +{ + mhi_driver_unregister(&qaic_ras_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_ras.h b/drivers/accel/qaic/qaic_ras.h new file mode 100644 index 000000000000..d44a4eeeb060 --- /dev/null +++ b/drivers/accel/qaic/qaic_ras.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ + +#ifndef __QAIC_RAS_H__ +#define __QAIC_RAS_H__ + +int qaic_ras_register(void); +void qaic_ras_unregister(void); + +#endif /* __QAIC_RAS_H__ */ diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c index 2473c66309d4..3fac540f8e03 100644 --- a/drivers/accel/qaic/qaic_timesync.c +++ b/drivers/accel/qaic/qaic_timesync.c @@ -129,7 +129,7 @@ static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_resu static void qaic_timesync_timer(struct timer_list *t) { - struct mqts_dev *mqtsdev = from_timer(mqtsdev, t, timer); + struct mqts_dev *mqtsdev = timer_container_of(mqtsdev, t, timer); struct qts_host_time_sync_msg_data *sync_msg; u64 device_qtimer_us; u64 device_qtimer; @@ -221,7 +221,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev) { struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); - del_timer_sync(&mqtsdev->timer); + timer_delete_sync(&mqtsdev->timer); mhi_unprepare_from_transfer(mqtsdev->mhi_dev); kfree(mqtsdev->sync_msg); kfree(mqtsdev); |