diff options
Diffstat (limited to 'drivers/accel')
62 files changed, 2105 insertions, 756 deletions
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO index 5119bccd1917..ad8ac6e315b6 100644 --- a/drivers/accel/amdxdna/TODO +++ b/drivers/accel/amdxdna/TODO @@ -1,3 +1,2 @@ -- Add import and export BO support - Add debugfs support - Add debug BO support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 5f43db02b240..e04549f64d69 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -34,6 +34,8 @@ static void aie2_job_release(struct kref *ref) job = container_of(ref, struct amdxdna_sched_job, refcnt); amdxdna_sched_job_cleanup(job); + atomic64_inc(&job->hwctx->job_free_cnt); + wake_up(&job->hwctx->priv->job_free_wq); if (job->out_fence) dma_fence_put(job->out_fence); kfree(job); @@ -134,7 +136,8 @@ static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) if (!fence) return; - dma_fence_wait(fence, false); + /* Wait up to 2 seconds for fw to finish all pending requests */ + dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000)); dma_fence_put(fence); } @@ -185,7 +188,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job) } static int -aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) +aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) { struct amdxdna_sched_job *job = handle; struct amdxdna_gem_obj *cmd_abo; @@ -203,7 +206,7 @@ aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) goto out; } - status = *data; + status = readl(data); XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); if (status == AIE2_STATUS_SUCCESS) amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); @@ -216,7 +219,7 @@ out: } static int -aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) +aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size) { struct amdxdna_sched_job *job = handle; u32 ret = 0; @@ -230,7 +233,7 @@ aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) goto out; } - status = *data; + status = readl(data); XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); out: @@ -239,14 +242,14 @@ out: } static int -aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) +aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) { struct amdxdna_sched_job *job = handle; struct amdxdna_gem_obj *cmd_abo; - struct cmd_chain_resp *resp; struct amdxdna_dev *xdna; u32 fail_cmd_status; u32 fail_cmd_idx; + u32 cmd_status; u32 ret = 0; cmd_abo = job->cmd_bo; @@ -256,17 +259,17 @@ aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) goto out; } - resp = (struct cmd_chain_resp *)data; + cmd_status = readl(data + offsetof(struct cmd_chain_resp, status)); xdna = job->hwctx->client->xdna; - XDNA_DBG(xdna, "Status 0x%x", resp->status); - if (resp->status == AIE2_STATUS_SUCCESS) { + XDNA_DBG(xdna, "Status 0x%x", cmd_status); + if (cmd_status == AIE2_STATUS_SUCCESS) { amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); goto out; } /* Slow path to handle error, read from ringbuf on BAR */ - fail_cmd_idx = resp->fail_cmd_idx; - fail_cmd_status = resp->fail_cmd_status; + fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx)); + fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status)); XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", fail_cmd_idx, fail_cmd_status); @@ -361,7 +364,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -const struct drm_sched_backend_ops sched_ops = { +static const struct drm_sched_backend_ops sched_ops = { .run_job = aie2_sched_job_run, .free_job = aie2_sched_job_free, .timedout_job = aie2_sched_job_timedout, @@ -516,6 +519,14 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) { struct amdxdna_client *client = hwctx->client; struct amdxdna_dev *xdna = client->xdna; + const struct drm_sched_init_args args = { + .ops = &sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = HWCTX_MAX_CMDS, + .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT), + .name = hwctx->name, + .dev = xdna->ddev.dev, + }; struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; @@ -573,9 +584,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) might_lock(&priv->io_lock); fs_reclaim_release(GFP_KERNEL); - ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, - HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT), - NULL, NULL, hwctx->name, xdna->ddev.dev); + ret = drm_sched_init(sched, &args); if (ret) { XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); goto free_cmd_bufs; @@ -616,6 +625,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) hwctx->status = HWCTX_STAT_INIT; ndev = xdna->dev_handle; ndev->hwctx_num++; + init_waitqueue_head(&priv->job_free_wq); XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -652,25 +662,23 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) xdna = hwctx->client->xdna; ndev = xdna->dev_handle; ndev->hwctx_num--; - drm_sched_wqueue_stop(&hwctx->priv->sched); - /* Now, scheduler will not send command to device. */ + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); + drm_sched_entity_destroy(&hwctx->priv->entity); + + aie2_hwctx_wait_for_idle(hwctx); + + /* Request fw to destroy hwctx and cancel the rest pending requests */ aie2_release_resource(hwctx); - /* - * All submitted commands are aborted. - * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() - * will return NODEV if it is called. - */ - drm_sched_wqueue_start(&hwctx->priv->sched); + /* Wait for all submitted jobs to be completed or canceled */ + wait_event(hwctx->priv->job_free_wq, + atomic64_read(&hwctx->job_submit_cnt) == + atomic64_read(&hwctx->job_free_cnt)); - aie2_hwctx_wait_for_idle(hwctx); - drm_sched_entity_destroy(&hwctx->priv->entity); drm_sched_fini(&hwctx->priv->sched); aie2_ctx_syncobj_destroy(hwctx); - XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); - for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); amdxdna_gem_unpin(hwctx->priv->heap); @@ -750,27 +758,42 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu static int aie2_populate_range(struct amdxdna_gem_obj *abo) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - struct mm_struct *mm = abo->mem.notifier.mm; - struct hmm_range range = { 0 }; + struct amdxdna_umap *mapp; unsigned long timeout; + struct mm_struct *mm; + bool found; int ret; - XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", - abo->mem.userptr, abo->mem.size); - range.notifier = &abo->mem.notifier; - range.start = abo->mem.userptr; - range.end = abo->mem.userptr + abo->mem.size; - range.hmm_pfns = abo->mem.pfns; - range.default_flags = HMM_PFN_REQ_FAULT; + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); +again: + found = false; + down_write(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (mapp->invalid) { + found = true; + break; + } + } + + if (!found) { + abo->mem.map_invalid = false; + up_write(&xdna->notifier_lock); + return 0; + } + kref_get(&mapp->refcnt); + up_write(&xdna->notifier_lock); - if (!mmget_not_zero(mm)) + XDNA_DBG(xdna, "populate memory range %lx %lx", + mapp->vma->vm_start, mapp->vma->vm_end); + mm = mapp->notifier.mm; + if (!mmget_not_zero(mm)) { + amdxdna_umap_put(mapp); return -EFAULT; + } - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); -again: - range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); + mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); mmap_read_lock(mm); - ret = hmm_range_fault(&range); + ret = hmm_range_fault(&mapp->range); mmap_read_unlock(mm); if (ret) { if (time_after(jiffies, timeout)) { @@ -778,21 +801,27 @@ again: goto put_mm; } - if (ret == -EBUSY) + if (ret == -EBUSY) { + amdxdna_umap_put(mapp); goto again; + } goto put_mm; } - down_read(&xdna->notifier_lock); - if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { - up_read(&xdna->notifier_lock); + down_write(&xdna->notifier_lock); + if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); goto again; } - abo->mem.map_invalid = false; - up_read(&xdna->notifier_lock); + mapp->invalid = false; + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); + goto again; put_mm: + amdxdna_umap_put(mapp); mmput(mm); return ret; } @@ -879,6 +908,7 @@ retry: drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); aie2_job_put(job); + atomic64_inc(&hwctx->job_submit_cnt); return 0; @@ -899,10 +929,6 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, struct drm_gem_object *gobj = to_gobj(abo); long ret; - down_write(&xdna->notifier_lock); - abo->mem.map_invalid = true; - mmu_interval_set_seq(&abo->mem.notifier, cur_seq); - up_write(&xdna->notifier_lock); ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); if (!ret || ret == -ERESTARTSYS) diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c index b1defaa8513b..5ee905632a39 100644 --- a/drivers/accel/amdxdna/aie2_error.c +++ b/drivers/accel/amdxdna/aie2_error.c @@ -209,16 +209,14 @@ static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u3 return err_col; } -static int aie2_error_async_cb(void *handle, const u32 *data, size_t size) +static int aie2_error_async_cb(void *handle, void __iomem *data, size_t size) { - struct async_event_msg_resp *resp; struct async_event *e = handle; if (data) { - resp = (struct async_event_msg_resp *)data; - e->resp.type = resp->type; + e->resp.type = readl(data + offsetof(struct async_event_msg_resp, type)); wmb(); /* Update status in the end, so that no lock for here */ - e->resp.status = resp->status; + e->resp.status = readl(data + offsetof(struct async_event_msg_resp, status)); } queue_work(e->wq, &e->work); return 0; diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 9e2c9a44f76a..82412eec9a4b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -356,7 +356,7 @@ fail: } int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, - void *handle, int (*cb)(void*, const u32 *, size_t)) + void *handle, int (*cb)(void*, void __iomem *, size_t)) { struct async_event_msg_req req = { 0 }; struct xdna_mailbox_msg msg = { @@ -435,7 +435,7 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx) } int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)) + int (*notify_cb)(void *, void __iomem *, size_t)) { struct mailbox_channel *chann = hwctx->priv->mbox_chann; struct amdxdna_dev *xdna = hwctx->client->xdna; @@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, if (!payload) return -EINVAL; - if (!slot_cf_has_space(offset, payload_len)) + if (!slot_has_space(*buf, offset, payload_len)) return -ENOSPC; buf->cu_idx = cu_idx; @@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - if (!slot_dpu_has_space(offset, arg_sz)) + if (!slot_has_space(*buf, offset, arg_sz)) return -ENOSPC; buf->inst_buf_addr = sn->buffer; @@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, memcpy(buf->args, sn->prop_args, arg_sz); /* Accurate buf size to hint firmware to do necessary copy */ - *size += sizeof(*buf) + arg_sz; + *size = sizeof(*buf) + arg_sz; return 0; } @@ -640,7 +640,7 @@ aie2_cmd_op_to_msg_op(u32 op) int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)) + int (*notify_cb)(void *, void __iomem *, size_t)) { struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); struct mailbox_channel *chann = hwctx->priv->mbox_chann; @@ -705,7 +705,7 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)) + int (*notify_cb)(void *, void __iomem *, size_t)) { struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); struct mailbox_channel *chann = hwctx->priv->mbox_chann; @@ -740,7 +740,7 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, } int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)) + int (*notify_cb)(void *, void __iomem *, size_t)) { struct mailbox_channel *chann = hwctx->priv->mbox_chann; struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 4e02e744b470..6df9065b13f6 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -319,18 +319,16 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_cf_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +#define slot_has_space(slot, offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ + sizeof(typeof(slot))) + struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; __u32 arg_cnt; __u32 args[] __counted_by(arg_cnt); }; -#define slot_dpu_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_dpu, args[0])) struct cmd_chain_slot_dpu { __u64 inst_buf_addr; __u32 inst_size; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 5a058e565b01..c6cf7068d23c 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -512,12 +512,6 @@ static int aie2_init(struct amdxdna_dev *xdna) goto release_fw; } - ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); - if (ret) { - XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret); - goto free_irq; - } - psp_conf.fw_size = fw->size; psp_conf.fw_buf = fw->data; for (i = 0; i < PSP_MAX_REGS; i++) @@ -526,14 +520,14 @@ static int aie2_init(struct amdxdna_dev *xdna) if (!ndev->psp_hdl) { XDNA_ERR(xdna, "failed to create psp"); ret = -ENOMEM; - goto disable_sva; + goto free_irq; } xdna->dev_handle = ndev; ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "start npu failed, ret %d", ret); - goto disable_sva; + goto free_irq; } ret = aie2_mgmt_fw_query(ndev); @@ -584,8 +578,6 @@ async_event_free: aie2_error_async_events_free(ndev); stop_hw: aie2_hw_stop(xdna); -disable_sva: - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); free_irq: pci_free_irq_vectors(pdev); release_fw: @@ -601,7 +593,6 @@ static void aie2_fini(struct amdxdna_dev *xdna) aie2_hw_stop(xdna); aie2_error_async_events_free(ndev); - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); pci_free_irq_vectors(pdev); } diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index f2d95531ddc2..385914840eaa 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -271,18 +271,18 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, - void *handle, int (*cb)(void*, const u32 *, size_t)); + void *handle, int (*cb)(void*, void __iomem *, size_t)); int aie2_config_cu(struct amdxdna_hwctx *hwctx); int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)); + int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)); + int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)); + int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, const u32 *, size_t)); + int (*notify_cb)(void *, void __iomem *, size_t)); /* aie2_hwctx.c */ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index 73388443c676..d303701b0ded 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -64,6 +64,7 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) if (ret) { XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); + return ret; } ndev->npuclk_freq = freq; @@ -72,6 +73,7 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) if (ret) { XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); + return ret; } ndev->hclk_freq = freq; ndev->dpm_level = dpm_level; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index d11b1c83d9c3..be073224bd69 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -220,6 +220,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr args->syncobj_handle = hwctx->syncobj_hdl; mutex_unlock(&xdna->dev_lock); + atomic64_set(&hwctx->job_submit_cnt, 0); + atomic64_set(&hwctx->job_free_cnt, 0); XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret); drm_dev_exit(idx); return 0; @@ -494,11 +496,11 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, struct amdxdna_drm_exec_cmd *args) { struct amdxdna_dev *xdna = client->xdna; - u32 *arg_bo_hdls; + u32 *arg_bo_hdls = NULL; u32 cmd_bo_hdl; int ret; - if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { + if (args->arg_count > MAX_ARG_COUNT) { XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); return -EINVAL; } @@ -510,14 +512,16 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, } cmd_bo_hdl = (u32)args->cmd_handles; - arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); - if (!arg_bo_hdls) - return -ENOMEM; - ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), - args->arg_count * sizeof(u32)); - if (ret) { - ret = -EFAULT; - goto free_cmd_bo_hdls; + if (args->arg_count) { + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); + if (!arg_bo_hdls) + return -ENOMEM; + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), + args->arg_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_cmd_bo_hdls; + } } ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 80b0304193ec..f0a4a8586d85 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -87,6 +87,9 @@ struct amdxdna_hwctx { struct amdxdna_qos_info qos; struct amdxdna_hwctx_param_config_cu *cus; u32 syncobj_hdl; + + atomic64_t job_submit_cnt; + atomic64_t job_free_cnt ____cacheline_aligned_in_smp; }; #define drm_job_to_xdna_job(j) \ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 606433d73236..26831ec69f89 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -9,7 +9,10 @@ #include <drm/drm_gem.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/gpu_scheduler.h> +#include <linux/dma-buf.h> +#include <linux/dma-direct.h> #include <linux/iosys-map.h> +#include <linux/pagemap.h> #include <linux/vmalloc.h> #include "amdxdna_ctx.h" @@ -18,6 +21,8 @@ #define XDNA_MAX_CMD_BO_SIZE SZ_32K +MODULE_IMPORT_NS("DMA_BUF"); + static int amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) { @@ -55,57 +60,38 @@ amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) return 0; } -static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) -{ - struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); - struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); - - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); - if (abo->pinned) - amdxdna_gem_unpin(abo); - - if (abo->type == AMDXDNA_BO_DEV) { - mutex_lock(&abo->client->mm_lock); - drm_mm_remove_node(&abo->mm_node); - mutex_unlock(&abo->client->mm_lock); - - vunmap(abo->mem.kva); - drm_gem_object_put(to_gobj(abo->dev_heap)); - drm_gem_object_release(gobj); - mutex_destroy(&abo->lock); - kfree(abo); - return; - } - - if (abo->type == AMDXDNA_BO_DEV_HEAP) - drm_mm_takedown(&abo->mm); - - drm_gem_vunmap_unlocked(gobj, &map); - mutex_destroy(&abo->lock); - drm_gem_shmem_free(&abo->base); -} - -static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { - .free = amdxdna_gem_obj_free, -}; - static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj, - mem.notifier); - struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier); + struct amdxdna_gem_obj *abo = mapp->abo; + struct amdxdna_dev *xdna; - XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d", - abo->mem.userptr, abo->mem.size, abo->type); + xdna = to_xdna_dev(to_gobj(abo)->dev); + XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d", + mapp->vma->vm_start, mapp->vma->vm_end, abo->type); if (!mmu_notifier_range_blockable(range)) return false; + down_write(&xdna->notifier_lock); + abo->mem.map_invalid = true; + mapp->invalid = true; + mmu_interval_set_seq(&mapp->notifier, cur_seq); + up_write(&xdna->notifier_lock); + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + if (range->event == MMU_NOTIFY_UNMAP) { + down_write(&xdna->notifier_lock); + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + up_write(&xdna->notifier_lock); + } + return true; } @@ -113,102 +99,310 @@ static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { .invalidate = amdxdna_hmm_invalidate, }; -static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo) +static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp; + + down_read(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (!vma || mapp->vma == vma) { + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + if (vma) + break; + } + } + up_read(&xdna->notifier_lock); +} - if (!xdna->dev_info->ops->hmm_invalidate) - return; +static void amdxdna_umap_release(struct kref *ref) +{ + struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt); + struct vm_area_struct *vma = mapp->vma; + struct amdxdna_dev *xdna; + + mmu_interval_notifier_remove(&mapp->notifier); + if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_clear_unevictable(vma->vm_file->f_mapping); + + xdna = to_xdna_dev(to_gobj(mapp->abo)->dev); + down_write(&xdna->notifier_lock); + list_del(&mapp->node); + up_write(&xdna->notifier_lock); + + kvfree(mapp->range.hmm_pfns); + kfree(mapp); +} + +void amdxdna_umap_put(struct amdxdna_umap *mapp) +{ + kref_put(&mapp->refcnt, amdxdna_umap_release); +} + +static void amdxdna_hmm_unreg_work(struct work_struct *work) +{ + struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap, + hmm_unreg_work); - mmu_interval_notifier_remove(&abo->mem.notifier); - kvfree(abo->mem.pfns); - abo->mem.pfns = NULL; + amdxdna_umap_put(mapp); } -static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr, - size_t len) +static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long len = vma->vm_end - vma->vm_start; + unsigned long addr = vma->vm_start; + struct amdxdna_umap *mapp; u32 nr_pages; int ret; if (!xdna->dev_info->ops->hmm_invalidate) return 0; - if (abo->mem.pfns) - return -EEXIST; + mapp = kzalloc(sizeof(*mapp), GFP_KERNEL); + if (!mapp) + return -ENOMEM; nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; - abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns), - GFP_KERNEL); - if (!abo->mem.pfns) - return -ENOMEM; + mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns), + GFP_KERNEL); + if (!mapp->range.hmm_pfns) { + ret = -ENOMEM; + goto free_map; + } - ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier, + ret = mmu_interval_notifier_insert_locked(&mapp->notifier, current->mm, addr, len, &amdxdna_hmm_ops); if (ret) { XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); - kvfree(abo->mem.pfns); + goto free_pfns; } - abo->mem.userptr = addr; + mapp->range.notifier = &mapp->notifier; + mapp->range.start = vma->vm_start; + mapp->range.end = vma->vm_end; + mapp->range.default_flags = HMM_PFN_REQ_FAULT; + mapp->vma = vma; + mapp->abo = abo; + kref_init(&mapp->refcnt); + + if (abo->mem.userptr == AMDXDNA_INVALID_ADDR) + abo->mem.userptr = addr; + INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work); + if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_set_unevictable(vma->vm_file->f_mapping); + + down_write(&xdna->notifier_lock); + list_add_tail(&mapp->node, &abo->mem.umap_list); + up_write(&xdna->notifier_lock); + + return 0; + +free_pfns: + kvfree(mapp->range.hmm_pfns); +free_map: + kfree(mapp); return ret; } +static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long num_pages = vma_pages(vma); + unsigned long offset = 0; + int ret; + + if (!is_import_bo(abo)) { + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) { + XDNA_ERR(xdna, "Failed shmem mmap %d", ret); + return ret; + } + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages %d", ret); + vma->vm_ops->close(vma); + return ret; + } + + return 0; + } + + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0); + if (ret) { + XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret); + return ret; + } + + do { + vm_fault_t fault_ret; + + fault_ret = handle_mm_fault(vma, vma->vm_start + offset, + FAULT_FLAG_WRITE, NULL); + if (fault_ret & VM_FAULT_ERROR) { + vma->vm_ops->close(vma); + XDNA_ERR(xdna, "Fault in page failed"); + return -EFAULT; + } + + offset += PAGE_SIZE; + } while (--num_pages); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(to_gobj(abo)); + + return 0; +} + static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, struct vm_area_struct *vma) { + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - unsigned long num_pages; int ret; - ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size); + ret = amdxdna_hmm_register(abo, vma); if (ret) return ret; + ret = amdxdna_insert_pages(abo, vma); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages, ret %d", ret); + goto hmm_unreg; + } + + XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx", + drm_vma_node_offset_addr(&gobj->vma_node), abo->type, + vma->vm_start, gobj->size); + return 0; + +hmm_unreg: + amdxdna_hmm_unregister(abo, vma); + return ret; +} + +static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_gem_object *gobj = dma_buf->priv; + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + unsigned long num_pages = vma_pages(vma); + int ret; + + vma->vm_ops = &drm_gem_shmem_vm_ops; + vma->vm_private_data = gobj; + + drm_gem_object_get(gobj); ret = drm_gem_shmem_mmap(&abo->base, vma); if (ret) - goto hmm_unreg; + goto put_obj; - num_pages = gobj->size >> PAGE_SHIFT; - /* Try to insert the pages */ + /* The buffer is based on memory pages. Fix the flag. */ vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); - ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); if (ret) - XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret); + goto close_vma; return 0; -hmm_unreg: - amdxdna_hmm_unregister(abo); +close_vma: + vma->vm_ops->close(vma); +put_obj: + drm_gem_object_put(gobj); return ret; } -static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf) +static const struct dma_buf_ops amdxdna_dmabuf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = amdxdna_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags) { - return drm_gem_shmem_vm_ops.fault(vmf); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &amdxdna_dmabuf_ops; + exp_info.size = gobj->size; + exp_info.flags = flags; + exp_info.priv = gobj; + exp_info.resv = gobj->resv; + + return drm_gem_dmabuf_export(gobj->dev, &exp_info); } -static void amdxdna_gem_vm_open(struct vm_area_struct *vma) +static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo) { - drm_gem_shmem_vm_ops.open(vma); + dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL); + dma_buf_detach(abo->dma_buf, abo->attach); + dma_buf_put(abo->dma_buf); + drm_gem_object_release(to_gobj(abo)); + kfree(abo); } -static void amdxdna_gem_vm_close(struct vm_area_struct *vma) +static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) { - struct drm_gem_object *gobj = vma->vm_private_data; + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + + amdxdna_hmm_unregister(abo, NULL); + flush_workqueue(xdna->notifier_wq); + + if (abo->pinned) + amdxdna_gem_unpin(abo); - amdxdna_hmm_unregister(to_xdna_obj(gobj)); - drm_gem_shmem_vm_ops.close(vma); + if (abo->type == AMDXDNA_BO_DEV) { + mutex_lock(&abo->client->mm_lock); + drm_mm_remove_node(&abo->mm_node); + mutex_unlock(&abo->client->mm_lock); + + vunmap(abo->mem.kva); + drm_gem_object_put(to_gobj(abo->dev_heap)); + drm_gem_object_release(gobj); + mutex_destroy(&abo->lock); + kfree(abo); + return; + } + + if (abo->type == AMDXDNA_BO_DEV_HEAP) + drm_mm_takedown(&abo->mm); + + drm_gem_vunmap(gobj, &map); + mutex_destroy(&abo->lock); + + if (is_import_bo(abo)) { + amdxdna_imported_obj_free(abo); + return; + } + + drm_gem_shmem_free(&abo->base); } -static const struct vm_operations_struct amdxdna_gem_vm_ops = { - .fault = amdxdna_gem_vm_fault, - .open = amdxdna_gem_vm_open, - .close = amdxdna_gem_vm_close, +static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { + .free = amdxdna_gem_obj_free, }; static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { @@ -220,7 +414,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = amdxdna_gem_obj_mmap, - .vm_ops = &amdxdna_gem_vm_ops, + .vm_ops = &drm_gem_shmem_vm_ops, + .export = amdxdna_gem_prime_export, }; static struct amdxdna_gem_obj * @@ -239,6 +434,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) abo->mem.userptr = AMDXDNA_INVALID_ADDR; abo->mem.dev_addr = AMDXDNA_INVALID_ADDR; abo->mem.size = size; + INIT_LIST_HEAD(&abo->mem.umap_list); return abo; } @@ -258,6 +454,51 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size) return to_gobj(abo); } +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + struct sg_table *sgt; + int ret; + + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto put_buf; + } + + sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto fail_detach; + } + + gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(gobj)) { + ret = PTR_ERR(gobj); + goto fail_unmap; + } + + abo = to_xdna_obj(gobj); + abo->attach = attach; + abo->dma_buf = dma_buf; + + return gobj; + +fail_unmap: + dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); +fail_detach: + dma_buf_detach(dma_buf, attach); +put_buf: + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + static struct amdxdna_gem_obj * amdxdna_drm_alloc_shmem(struct drm_device *dev, struct amdxdna_drm_create_bo *args, @@ -417,7 +658,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = drm_gem_vmap_unlocked(to_gobj(abo), &map); + ret = drm_gem_vmap(to_gobj(abo), &map); if (ret) { XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); goto release_obj; @@ -483,6 +724,9 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); int ret; + if (is_import_bo(abo)) + return 0; + switch (abo->type) { case AMDXDNA_BO_SHMEM: case AMDXDNA_BO_DEV_HEAP: @@ -515,6 +759,9 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) { + if (is_import_bo(abo)) + return; + if (abo->type == AMDXDNA_BO_DEV) abo = abo->dev_heap; @@ -606,7 +853,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, goto put_obj; } - if (abo->type == AMDXDNA_BO_DEV) + if (is_import_bo(abo)) + drm_clflush_sg(abo->base.sgt); + else if (abo->type == AMDXDNA_BO_DEV) drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages); else drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index 8ccc0375dd9d..aee97e971d6d 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -6,6 +6,20 @@ #ifndef _AMDXDNA_GEM_H_ #define _AMDXDNA_GEM_H_ +#include <linux/hmm.h> + +struct amdxdna_umap { + struct vm_area_struct *vma; + struct mmu_interval_notifier notifier; + struct hmm_range range; + struct work_struct hmm_unreg_work; + struct amdxdna_gem_obj *abo; + struct list_head node; + struct kref refcnt; + bool invalid; + bool unmapped; +}; + struct amdxdna_mem { u64 userptr; void *kva; @@ -13,8 +27,7 @@ struct amdxdna_mem { size_t size; struct page **pages; u32 nr_pages; - struct mmu_interval_notifier notifier; - unsigned long *pfns; + struct list_head umap_list; bool map_invalid; }; @@ -31,9 +44,12 @@ struct amdxdna_gem_obj { struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ u32 assigned_hwctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *attach; }; #define to_gobj(obj) (&(obj)->base.base) +#define is_import_bo(obj) ((obj)->attach) static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj) { @@ -47,8 +63,12 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) drm_gem_object_put(to_gobj(abo)); } +void amdxdna_umap_put(struct amdxdna_umap *mapp); + struct drm_gem_object * amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size); +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); struct amdxdna_gem_obj * amdxdna_drm_alloc_dev_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index e5301fac1397..da1ac89bb78f 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -91,7 +91,7 @@ struct mailbox_pkg { struct mailbox_msg { void *handle; - int (*notify_cb)(void *handle, const u32 *data, size_t size); + int (*notify_cb)(void *handle, void __iomem *data, size_t size); size_t pkg_size; /* package size in bytes */ struct mailbox_pkg pkg; }; @@ -244,7 +244,7 @@ no_space: static int mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header, - void *data) + void __iomem *data) { struct mailbox_msg *mb_msg; int msg_id; @@ -332,7 +332,7 @@ static int mailbox_get_msg(struct mailbox_channel *mb_chann) memcpy_fromio((u32 *)&header + 1, read_addr, rest); read_addr += rest; - ret = mailbox_get_resp(mb_chann, &header, (u32 *)read_addr); + ret = mailbox_get_resp(mb_chann, &header, read_addr); mailbox_set_headptr(mb_chann, head + msg_size); /* After update head, it can equal to ringbuf_size. This is expected. */ @@ -349,8 +349,6 @@ static irqreturn_t mailbox_irq_handler(int irq, void *p) trace_mbox_irq_handle(MAILBOX_NAME, irq); /* Schedule a rx_work to call the callback functions */ queue_work(mb_chann->work_q, &mb_chann->rx_work); - /* Clear IOHUB register */ - mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); return IRQ_HANDLED; } @@ -367,6 +365,9 @@ static void mailbox_rx_worker(struct work_struct *rx_work) return; } +again: + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); + while (1) { /* * If return is 0, keep consuming next message, until there is @@ -380,10 +381,18 @@ static void mailbox_rx_worker(struct work_struct *rx_work) if (unlikely(ret)) { MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret); WRITE_ONCE(mb_chann->bad_state, true); - disable_irq(mb_chann->msix_irq); - break; + return; } } + + /* + * The hardware will not generate interrupt if firmware creates a new + * response right after driver clears interrupt register. Check + * the interrupt register to make sure there is not any new response + * before exiting. + */ + if (mailbox_reg_read(mb_chann, mb_chann->iohub_int_addr)) + goto again; } int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h index 57954c303bdd..ea367f2fb738 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox.h @@ -25,7 +25,7 @@ struct mailbox_channel; struct xdna_mailbox_msg { u32 opcode; void *handle; - int (*notify_cb)(void *handle, const u32 *data, size_t size); + int (*notify_cb)(void *handle, void __iomem *data, size_t size); u8 *send_data; size_t send_size; }; diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c index 5139a9c96a91..6d0c24513476 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c @@ -16,7 +16,7 @@ #include "amdxdna_mailbox_helper.h" #include "amdxdna_pci_drv.h" -int xdna_msg_cb(void *handle, const u32 *data, size_t size) +int xdna_msg_cb(void *handle, void __iomem *data, size_t size) { struct xdna_notify *cb_arg = handle; int ret; @@ -29,9 +29,9 @@ int xdna_msg_cb(void *handle, const u32 *data, size_t size) goto out; } + memcpy_fromio(cb_arg->data, data, cb_arg->size); print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET, - 16, 4, data, cb_arg->size, true); - memcpy(cb_arg->data, data, cb_arg->size); + 16, 4, cb_arg->data, cb_arg->size, true); out: ret = cb_arg->error; complete(&cb_arg->comp); diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h index 23e1317b79fe..710ff8873d61 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h @@ -35,7 +35,7 @@ struct xdna_notify { .notify_cb = xdna_msg_cb, \ } -int xdna_msg_cb(void *handle, const u32 *data, size_t size); +int xdna_msg_cb(void *handle, void __iomem *data, size_t size); int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann, struct xdna_mailbox_msg *msg); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index f5b8497cf5ad..f2bf1d374cc7 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -226,6 +226,7 @@ const struct drm_driver amdxdna_drm_drv = { .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), .gem_create_object = amdxdna_gem_create_object_cb, + .gem_prime_import = amdxdna_gem_prime_import, }; static const struct amdxdna_dev_info * @@ -266,12 +267,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) fs_reclaim_release(GFP_KERNEL); } + xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0); + if (!xdna->notifier_wq) + return -ENOMEM; + mutex_lock(&xdna->dev_lock); ret = xdna->dev_info->ops->init(xdna); mutex_unlock(&xdna->dev_lock); if (ret) { XDNA_ERR(xdna, "Hardware init failed, ret %d", ret); - return ret; + goto destroy_notifier_wq; } ret = amdxdna_sysfs_init(xdna); @@ -301,6 +306,8 @@ failed_dev_fini: mutex_lock(&xdna->dev_lock); xdna->dev_info->ops->fini(xdna); mutex_unlock(&xdna->dev_lock); +destroy_notifier_wq: + destroy_workqueue(xdna->notifier_wq); return ret; } @@ -310,6 +317,8 @@ static void amdxdna_remove(struct pci_dev *pdev) struct device *dev = &pdev->dev; struct amdxdna_client *client; + destroy_workqueue(xdna->notifier_wq); + pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index 37848a8d8031..ab79600911aa 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -6,6 +6,7 @@ #ifndef _AMDXDNA_PCI_DRV_H_ #define _AMDXDNA_PCI_DRV_H_ +#include <linux/workqueue.h> #include <linux/xarray.h> #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) @@ -98,6 +99,7 @@ struct amdxdna_dev { struct list_head client_list; struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ + struct workqueue_struct *notifier_wq; }; /* diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig index be85336107f9..1919fbb169c7 100644 --- a/drivers/accel/habanalabs/Kconfig +++ b/drivers/accel/habanalabs/Kconfig @@ -6,7 +6,7 @@ config DRM_ACCEL_HABANALABS tristate "HabanaLabs AI accelerators" depends on DRM_ACCEL - depends on X86_64 + depends on X86 && X86_64 depends on PCI && HAS_IOMEM select GENERIC_ALLOCATOR select HWMON diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 59823e3c3bf7..dee487724918 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -2586,7 +2586,7 @@ int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) cs_seq = args->in.seq; timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT - ? msecs_to_jiffies(args->in.timeout * 1000) + ? secs_to_jiffies(args->in.timeout) : hpriv->hdev->timeout_jiffies; switch (cs_type) { diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index ca7677293a55..4b391807e5f2 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -1403,7 +1403,7 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, return rc; if (value) - hdev->timeout_jiffies = msecs_to_jiffies(value * 1000); + hdev->timeout_jiffies = secs_to_jiffies(value); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 30277ae410d4..68eebed3b050 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -2091,7 +2091,7 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", hdev->device_release_watchdog_timeout_sec); schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, - msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); + secs_to_jiffies(hdev->device_release_watchdog_timeout_sec)); hdev->reset_info.watchdog_active = 1; out: spin_unlock(&hdev->reset_info.lock); diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index 596c52e8aa26..0035748f3228 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -386,7 +386,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; if (tmp_timeout) - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); + hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index 8729a0c57d78..dc80ca921d90 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -17,8 +17,6 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> -#include <asm/msr.h> - /* make sure there is space for all the signed info */ static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ); diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index e9f8ccc0bbf9..9d58efa2ff38 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -368,7 +368,7 @@ out: } static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t offset, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { struct device *dev = kobj_to_dev(kobj); @@ -443,10 +443,10 @@ static DEVICE_ATTR_RO(security_enabled); static DEVICE_ATTR_RO(module_id); static DEVICE_ATTR_RO(parent_device); -static struct bin_attribute bin_attr_eeprom = { +static const struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, .size = PAGE_SIZE, - .read = eeprom_read_handler + .read_new = eeprom_read_handler }; static struct attribute *hl_dev_attrs[] = { @@ -472,14 +472,14 @@ static struct attribute *hl_dev_attrs[] = { NULL, }; -static struct bin_attribute *hl_dev_bin_attrs[] = { +static const struct bin_attribute *const hl_dev_bin_attrs[] = { &bin_attr_eeprom, NULL }; static struct attribute_group hl_dev_attr_group = { .attrs = hl_dev_attrs, - .bin_attrs = hl_dev_bin_attrs, + .bin_attrs_new = hl_dev_bin_attrs, }; static struct attribute_group hl_dev_clks_attr_group; diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 8180b95ed69d..cd24ccd20ba6 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -4,6 +4,7 @@ */ #include <linux/debugfs.h> +#include <linux/fault-inject.h> #include <drm/drm_debugfs.h> #include <drm/drm_file.h> @@ -331,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; ivpu_pm_trigger_recovery(vdev, "debugfs"); @@ -382,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent) return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; if (active_percent) @@ -397,6 +398,88 @@ static int dct_active_set(void *data, u64 active_percent) DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n"); +static int priority_bands_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = s->private; + struct ivpu_hw_info *hw = vdev->hw; + + for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE; + band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) { + switch (band) { + case VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE: + seq_puts(s, "Idle: "); + break; + + case VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL: + seq_puts(s, "Normal: "); + break; + + case VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS: + seq_puts(s, "Focus: "); + break; + + case VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME: + seq_puts(s, "Realtime: "); + break; + } + + seq_printf(s, "grace_period %9u process_grace_period %9u process_quantum %9u\n", + hw->hws.grace_period[band], hw->hws.process_grace_period[band], + hw->hws.process_quantum[band]); + } + + return 0; +} + +static int priority_bands_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, priority_bands_show, inode->i_private); +} + +static ssize_t +priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct seq_file *s = file->private_data; + struct ivpu_device *vdev = s->private; + char buf[64]; + u32 grace_period; + u32 process_grace_period; + u32 process_quantum; + u32 band; + int ret; + + if (size >= sizeof(buf)) + return -EINVAL; + + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size); + if (ret < 0) + return ret; + + buf[ret] = '\0'; + ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, + &process_quantum); + if (ret != 4) + return -EINVAL; + + if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT) + return -EINVAL; + + vdev->hw->hws.grace_period[band] = grace_period; + vdev->hw->hws.process_grace_period[band] = process_grace_period; + vdev->hw->hws.process_quantum[band] = process_quantum; + + return size; +} + +static const struct file_operations ivpu_hws_priority_bands_fops = { + .owner = THIS_MODULE, + .open = priority_bands_fops_open, + .write = priority_bands_fops_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void ivpu_debugfs_init(struct ivpu_device *vdev) { struct dentry *debugfs_root = vdev->drm.debugfs_root; @@ -419,6 +502,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) &fw_trace_hw_comp_mask_fops); debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev, &fw_trace_level_fops); + debugfs_create_file("hws_priority_bands", 0200, debugfs_root, vdev, + &ivpu_hws_priority_bands_fops); debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, &ivpu_reset_engine_fops); @@ -430,4 +515,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_root, vdev, &fw_profiling_freq_fops); debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops); } + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_hw", debugfs_root, &ivpu_hw_failure); +#endif } diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 38cf1c342c72..0e7748c5e117 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/pm_runtime.h> +#include <linux/workqueue.h> #include <generated/utsrelease.h> #include <drm/drm_accel.h> @@ -36,8 +37,6 @@ #define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE #endif -static struct lock_class_key submitted_jobs_xa_lock_class_key; - int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); @@ -128,20 +127,18 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) kref_put(&file_priv->ref, file_priv_release); } -static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param *args) +bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability) { - switch (args->index) { + switch (capability) { case DRM_IVPU_CAP_METRIC_STREAMER: - args->value = 1; - break; + return true; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: - args->value = 1; - break; + return true; + case DRM_IVPU_CAP_MANAGE_CMDQ: + return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW; default: - return -EINVAL; + return false; } - - return 0; } static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -167,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); + args->value = ivpu_hw_dpu_max_freq_get(vdev); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -201,7 +198,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->hw->sku; break; case DRM_IVPU_PARAM_CAPABILITIES: - ret = ivpu_get_capabilities(vdev, args); + args->value = ivpu_is_capable(vdev, args->index); break; default: ret = -EINVAL; @@ -310,6 +307,9 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev) { int ret; + drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter)); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + /* Update boot params located at first 4KB of FW memory */ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem)); @@ -421,6 +424,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev) { ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); + flush_work(&vdev->irq_ipc_work); + flush_work(&vdev->irq_dct_work); + flush_work(&vdev->context_abort_work); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); } @@ -453,7 +459,7 @@ static const struct drm_driver driver = { .postclose = ivpu_postclose, .gem_create_object = ivpu_gem_create_object, - .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, + .gem_prime_import = ivpu_gem_prime_import, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), @@ -465,54 +471,6 @@ static const struct drm_driver driver = { .major = 1, }; -static void ivpu_context_abort_invalid(struct ivpu_device *vdev) -{ - struct ivpu_file_priv *file_priv; - unsigned long ctx_id; - - mutex_lock(&vdev->context_list_lock); - - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - if (!file_priv->has_mmu_faults || file_priv->aborted) - continue; - - mutex_lock(&file_priv->lock); - ivpu_context_abort_locked(file_priv); - file_priv->aborted = true; - mutex_unlock(&file_priv->lock); - } - - mutex_unlock(&vdev->context_list_lock); -} - -static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) -{ - struct ivpu_device *vdev = arg; - u8 irq_src; - - if (kfifo_is_empty(&vdev->hw->irq.fifo)) - return IRQ_NONE; - - while (kfifo_get(&vdev->hw->irq.fifo, &irq_src)) { - switch (irq_src) { - case IVPU_HW_IRQ_SRC_IPC: - ivpu_ipc_irq_thread_handler(vdev); - break; - case IVPU_HW_IRQ_SRC_MMU_EVTQ: - ivpu_context_abort_invalid(vdev); - break; - case IVPU_HW_IRQ_SRC_DCT: - ivpu_pm_dct_irq_thread_handler(vdev); - break; - default: - ivpu_err_ratelimited(vdev, "Unknown IRQ source: %u\n", irq_src); - break; - } - } - - return IRQ_HANDLED; -} - static int ivpu_irq_init(struct ivpu_device *vdev) { struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); @@ -524,12 +482,16 @@ static int ivpu_irq_init(struct ivpu_device *vdev) return ret; } + INIT_WORK(&vdev->irq_ipc_work, ivpu_ipc_irq_work_fn); + INIT_WORK(&vdev->irq_dct_work, ivpu_pm_irq_dct_work_fn); + INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_work_fn); + ivpu_irq_handlers_init(vdev); vdev->irq = pci_irq_vector(pdev, 0); - ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler, - ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev); + ret = devm_request_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler, + IRQF_NO_AUTOEN, DRIVER_NAME, vdev); if (ret) ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); @@ -614,10 +576,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); + atomic_set(&vdev->job_timeout_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); - lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); vdev->db_limit.min = IVPU_MIN_DB; @@ -627,6 +589,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (ret) goto err_xa_destroy; + ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock); + if (ret) + goto err_xa_destroy; + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); if (ret) goto err_xa_destroy; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 3fdff3f6cffd..5497e7030e91 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -58,6 +58,7 @@ #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 +#define IVPU_PLATFORM_HSLE 4 #define IVPU_PLATFORM_INVALID 8 #define IVPU_SCHED_MODE_AUTO -1 @@ -110,6 +111,7 @@ struct ivpu_wa_table { bool disable_clock_relinquish; bool disable_d0i3_msg; bool wp0_during_power_up; + bool disable_d0i2; }; struct ivpu_hw_info; @@ -142,11 +144,17 @@ struct ivpu_device { struct xa_limit db_limit; u32 db_next; + struct work_struct irq_ipc_work; + struct work_struct irq_dct_work; + struct work_struct context_abort_work; + struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; + struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ struct xarray submitted_jobs_xa; struct ivpu_ipc_consumer job_done_consumer; + atomic_t job_timeout_counter; atomic64_t unique_id_counter; @@ -200,6 +208,9 @@ extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_MIP_DISABLE BIT(6) #define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) #define IVPU_TEST_MODE_TURBO BIT(9) +#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10) +#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(11) +#define IVPU_TEST_MODE_D0I2_DISABLE BIT(12) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); @@ -208,6 +219,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_boot(struct ivpu_device *vdev); int ivpu_shutdown(struct ivpu_device *vdev); void ivpu_prepare_for_reset(struct ivpu_device *vdev); +bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability); static inline u8 ivpu_revision(struct ivpu_device *vdev) { @@ -282,7 +294,8 @@ static inline bool ivpu_is_simics(struct ivpu_device *vdev) static inline bool ivpu_is_fpga(struct ivpu_device *vdev) { - return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; + return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA || + ivpu_get_platform(vdev) == IVPU_PLATFORM_HSLE; } static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 6037ec0b3096..ccaaf6c100c0 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> @@ -145,7 +145,10 @@ ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_he if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) return ivpu_sched_mode; - return VPU_SCHEDULING_MODE_OS; + if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, JSM, 3, 24)) + return VPU_SCHEDULING_MODE_OS; + + return VPU_SCHEDULING_MODE_HW; } static int ivpu_fw_parse(struct ivpu_device *vdev) @@ -230,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->dvfs_mode = 0; fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + if (fw_hdr->preemption_buffer_1_max_size) + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; + else + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + + if (fw_hdr->preemption_buffer_2_max_size) + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; + else + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, fw_hdr->image_load_address, @@ -531,6 +544,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", + boot_params->power_profile); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -561,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock @@ -631,6 +645,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_delayed_entry = 1; boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; + if (IVPU_WA(disable_d0i2)) + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 1d0b2bd9d65c..9a3935be1c05 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -39,6 +39,7 @@ struct ivpu_fw_info { u64 read_only_addr; u32 read_only_size; u32 sched_mode; + u64 last_heartbeat; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 16178054e629..e0d242d9f3e5 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -20,6 +20,8 @@ #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +MODULE_IMPORT_NS("DMA_BUF"); + static const struct drm_gem_object_funcs ivpu_gem_funcs; static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action) @@ -28,7 +30,7 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n", action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0, (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, - (bool)bo->base.base.import_attach); + (bool)drm_gem_is_imported(&bo->base.base)); } /* @@ -120,7 +122,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) bo->ctx = NULL; } - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) return; dma_resv_lock(bo->base.base.resv, NULL); @@ -172,6 +174,47 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz return &bo->base.base; } +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct device *attach_dev = dev->dev; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct drm_gem_object *obj; + int ret; + + attach = dma_buf_attach(dma_buf, attach_dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(dma_buf); + + sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto fail_detach; + } + + obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto fail_unmap; + } + + obj->import_attach = attach; + obj->resv = dma_buf->resv; + + return obj; + +fail_unmap: + dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); +fail_detach: + dma_buf_detach(dma_buf, attach); + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags) { struct drm_gem_shmem_object *shmem; @@ -239,7 +282,7 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj) ivpu_bo_unbind_locked(bo); mutex_destroy(&bo->lock); - drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); + drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1); drm_gem_shmem_free(&bo->base); } @@ -319,7 +362,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, if (flags & DRM_IVPU_BO_MAPPABLE) { dma_resv_lock(bo->base.base.resv, NULL); - ret = drm_gem_shmem_vmap(&bo->base, &map); + ret = drm_gem_shmem_vmap_locked(&bo->base, &map); dma_resv_unlock(bo->base.base.resv); if (ret) @@ -344,7 +387,7 @@ void ivpu_bo_free(struct ivpu_bo *bo) if (bo->flags & DRM_IVPU_BO_MAPPABLE) { dma_resv_lock(bo->base.base.resv, NULL); - drm_gem_shmem_vunmap(&bo->base, &map); + drm_gem_shmem_vunmap_locked(&bo->base, &map); dma_resv_unlock(bo->base.base.resv); } @@ -418,7 +461,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) if (bo->mmu_mapped) drm_printf(p, " mmu_mapped"); - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) drm_printf(p, " imported"); drm_printf(p, "\n"); diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index d975000abd78..a222a9ec9d61 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -28,6 +28,7 @@ int ivpu_bo_pin(struct ivpu_bo *bo); void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, struct ivpu_addr_range *range, u64 size, u32 flags); struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags); diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index 4e1054f3466e..633160470c93 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -9,6 +9,16 @@ #include "ivpu_hw_ip.h" #include <linux/dmi.h> +#include <linux/fault-inject.h> +#include <linux/pm_runtime.h> + +#ifdef CONFIG_FAULT_INJECTION +DECLARE_FAULT_ATTR(ivpu_hw_failure); + +static char *ivpu_fail_hw; +module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444); +MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>"); +#endif static char *platform_to_str(u32 platform) { @@ -19,43 +29,36 @@ static char *platform_to_str(u32 platform) return "SIMICS"; case IVPU_PLATFORM_FPGA: return "FPGA"; + case IVPU_PLATFORM_HSLE: + return "HSLE"; default: return "Invalid platform"; } } -static const struct dmi_system_id dmi_platform_simulation[] = { - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), - DMI_MATCH(DMI_BOARD_SERIAL, "123456789"), - }, - }, - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "Simics"), - }, - }, - { } -}; - static void platform_init(struct ivpu_device *vdev) { - if (dmi_check_system(dmi_platform_simulation)) - vdev->platform = IVPU_PLATFORM_SIMICS; - else - vdev->platform = IVPU_PLATFORM_SILICON; + int platform = ivpu_hw_btrs_platform_read(vdev); - ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", - platform_to_str(vdev->platform), vdev->platform); + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", platform_to_str(platform), platform); + + switch (platform) { + case IVPU_PLATFORM_SILICON: + case IVPU_PLATFORM_SIMICS: + case IVPU_PLATFORM_FPGA: + case IVPU_PLATFORM_HSLE: + vdev->platform = platform; + break; + + default: + ivpu_err(vdev, "Invalid platform type: %d\n", platform); + break; + } } static void wa_init(struct ivpu_device *vdev) { - vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.punit_disabled = false; vdev->wa.clear_runtime_mem = false; if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) @@ -65,14 +68,24 @@ static void wa_init(struct ivpu_device *vdev) ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) vdev->wa.disable_clock_relinquish = true; + if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE) + vdev->wa.disable_clock_relinquish = false; + + if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_DISABLE) + vdev->wa.disable_clock_relinquish = true; + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) vdev->wa.wp0_during_power_up = true; + if (ivpu_test_mode & IVPU_TEST_MODE_D0I2_DISABLE) + vdev->wa.disable_d0i2 = true; + IVPU_PRINT_WA(punit_disabled); IVPU_PRINT_WA(clear_runtime_mem); IVPU_PRINT_WA(interrupt_clear_with_0); IVPU_PRINT_WA(disable_clock_relinquish); IVPU_PRINT_WA(wp0_during_power_up); + IVPU_PRINT_WA(disable_d0i2); } static void timeouts_init(struct ivpu_device *vdev) @@ -84,12 +97,12 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = -1; } else if (ivpu_is_fpga(vdev)) { - vdev->timeout.boot = 100000; - vdev->timeout.jsm = 50000; - vdev->timeout.tdr = 2000000; + vdev->timeout.boot = 50; + vdev->timeout.jsm = 15000; + vdev->timeout.tdr = 30000; vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = 500; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 10000; } else if (ivpu_is_simics(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 500; @@ -106,10 +119,30 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } +static void priority_bands_init(struct ivpu_device *vdev) +{ + /* Idle */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 0; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 160000; + /* Normal */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 300000; + /* Focus */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 200000; + /* Realtime */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 0; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000; +} + static void memory_ranges_init(struct ivpu_device *vdev) { if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { @@ -248,12 +281,18 @@ int ivpu_hw_init(struct ivpu_device *vdev) { ivpu_hw_btrs_info_init(vdev); ivpu_hw_btrs_freq_ratios_init(vdev); + priority_bands_init(vdev); memory_ranges_init(vdev); platform_init(vdev); wa_init(vdev); timeouts_init(vdev); atomic_set(&vdev->hw->firewall_irq_counter, 0); +#ifdef CONFIG_FAULT_INJECTION + if (ivpu_fail_hw) + setup_fault_attr(&ivpu_hw_failure, ivpu_fail_hw); +#endif + return 0; } @@ -285,8 +324,6 @@ void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) void ivpu_irq_handlers_init(struct ivpu_device *vdev) { - INIT_KFIFO(vdev->hw->irq.fifo); - if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx; else @@ -300,7 +337,6 @@ void ivpu_irq_handlers_init(struct ivpu_device *vdev) void ivpu_hw_irq_enable(struct ivpu_device *vdev) { - kfifo_reset(&vdev->hw->irq.fifo); ivpu_hw_ip_irq_enable(vdev); ivpu_hw_btrs_irq_enable(vdev); } @@ -327,9 +363,9 @@ irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr) /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ ivpu_hw_btrs_global_int_enable(vdev); - if (!kfifo_is_empty(&vdev->hw->irq.fifo)) - return IRQ_WAKE_THREAD; - if (ip_handled || btrs_handled) - return IRQ_HANDLED; - return IRQ_NONE; + if (!ip_handled && !btrs_handled) + return IRQ_NONE; + + pm_runtime_mark_last_busy(vdev->drm.dev); + return IRQ_HANDLED; } diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index fc4dbfc980c8..d79668fe1609 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,23 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_H__ #define __IVPU_HW_H__ -#include <linux/kfifo.h> - #include "ivpu_drv.h" #include "ivpu_hw_btrs.h" #include "ivpu_hw_ip.h" -#define IVPU_HW_IRQ_FIFO_LENGTH 1024 - -#define IVPU_HW_IRQ_SRC_IPC 1 -#define IVPU_HW_IRQ_SRC_MMU_EVTQ 2 -#define IVPU_HW_IRQ_SRC_DCT 3 - struct ivpu_addr_range { resource_size_t start; resource_size_t end; @@ -27,7 +19,6 @@ struct ivpu_hw_info { struct { bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq); bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq); - DECLARE_KFIFO(fifo, u8, IVPU_HW_IRQ_FIFO_LENGTH); } irq; struct { struct ivpu_addr_range global; @@ -45,6 +36,11 @@ struct ivpu_hw_info { u8 pn_ratio; u32 profiling_freq; } pll; + struct { + u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + } hws; u32 tile_fuse; u32 sku; u16 config; @@ -86,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) return range->end - range->start; } -static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) { - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); + return ivpu_hw_btrs_dpu_max_freq_get(vdev); } -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) { - ivpu_hw_ip_irq_clear(vdev); + return ivpu_hw_btrs_dpu_freq_get(vdev); } -static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - return ivpu_hw_btrs_pll_freq_get(vdev); + ivpu_hw_ip_irq_clear(vdev); } static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 3212c99f3682..b236c7234daa 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ +#include <linux/units.h> + #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_hw_btrs.h" @@ -28,17 +30,13 @@ #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) -#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_CONFIG_DEFAULT 0x0 -#define PLL_SIMULATION_FREQ 10000000 -#define PLL_REF_CLK_FREQ 50000000 +#define PLL_REF_CLK_FREQ 50000000ull +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define TIMEOUT_US (150 * USEC_PER_MSEC) @@ -62,6 +60,8 @@ #define DCT_ENABLE 0x1 #define DCT_DISABLE 0x0 +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); + int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) { REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); @@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); return 0; } @@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) prepare_wp_request(vdev, &wp, enable); - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); ret = wp_request_send(vdev, &wp); if (ret) { @@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); } +static u32 pll_config_get_mtl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); +} + +static u32 pll_config_get_lnl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); +} + +static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) +{ + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; +} + +static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio) / 2; +} + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(ratio); + else + return pll_ratio_to_dpu_freq_lnl(ratio); +} + +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) +{ + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); +} + +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); + else + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); +} + /* Handler for IRQs from Buttress core (irqB) */ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) { @@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) if (!status) return false; - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_mtl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); @@ -630,12 +674,15 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) { ivpu_dbg(vdev, IRQ, "Survivability IRQ\n"); - if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_DCT)) - ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + queue_work(system_wq, &vdev->irq_dct_work); } - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_lnl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", @@ -718,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); } -static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == MTL_PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) -{ - struct ivpu_hw_info *hw = vdev->hw; - - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_ratio_to_freq_mtl(ratio, hw->config); - else - return PLL_RATIO_TO_FREQ(ratio); -} - -static u32 pll_freq_get_mtl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); -} - -static u32 pll_freq_get_lnl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) -{ - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_freq_get_mtl(vdev); - else - return pll_freq_get_lnl(vdev); -} - u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) { if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) @@ -888,3 +881,10 @@ void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev) else return diagnose_failure_lnl(vdev); } + +int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + return REG_GET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PLATFORM, reg); +} diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 04f14f50fed6..d2d82651976d 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_BTRS_H__ @@ -13,9 +13,8 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); @@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); @@ -46,5 +45,6 @@ void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev); void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev); void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev); void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev); +int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev); #endif /* __IVPU_HW_BTRS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h index fc51f3098f97..fff2ef2cada6 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h @@ -86,6 +86,7 @@ #define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) #define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) #define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) +#define VPU_HW_BTRS_LNL_VPU_STATUS_PLATFORM_MASK GENMASK(31, 29) #define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u #define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0) diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 029dd065614b..823f6a57dc54 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -968,14 +968,14 @@ void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev) static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev) { - u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); + u32 count = readl(vdev->regv + VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); } static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev) { - u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); + u32 count = readl(vdev->regv + VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); } diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h index 79b3f441eac4..66259b0ead02 100644 --- a/drivers/accel/ivpu/ivpu_hw_reg_io.h +++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h @@ -7,6 +7,7 @@ #define __IVPU_HW_REG_IO_H__ #include <linux/bitfield.h> +#include <linux/fault-inject.h> #include <linux/io.h> #include <linux/iopoll.h> @@ -16,13 +17,11 @@ #define REG_IO_ERROR 0xffffffff #define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__) -#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg)) #define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__) #define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__) #define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__) #define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__) -#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg)) #define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__) #define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__) #define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__) @@ -47,31 +46,42 @@ #define REG_TEST_FLD_NUM(REG, FLD, num, val) \ ((num) == FIELD_GET(REG##_##FLD##_MASK, val)) -#define REGB_POLL_FLD(reg, fld, val, timeout_us) \ -({ \ - u32 var; \ - int r; \ - ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \ - __func__, #reg, reg, #fld, val); \ - r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\ - REG_POLL_SLEEP_US, timeout_us, false, (reg)); \ - ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \ - __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \ - r; \ -}) - -#define REGV_POLL_FLD(reg, fld, val, timeout_us) \ -({ \ - u32 var; \ - int r; \ - ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \ - __func__, #reg, reg, #fld, val); \ - r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\ - REG_POLL_SLEEP_US, timeout_us, false, (reg)); \ - ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \ - __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \ - r; \ -}) +#define REGB_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \ + ivpu_hw_reg_poll_fld(vdev, vdev->regb, reg, reg##_##fld##_MASK, \ + FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \ + __func__, #reg, #fld) + +#define REGV_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \ + ivpu_hw_reg_poll_fld(vdev, vdev->regv, reg, reg##_##fld##_MASK, \ + FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \ + __func__, #reg, #fld) + +extern struct fault_attr ivpu_hw_failure; + +static inline int __must_check +ivpu_hw_reg_poll_fld(struct ivpu_device *vdev, void __iomem *base, + u32 reg_offset, u32 reg_mask, u32 exp_masked_val, u32 timeout_us, + const char *func_name, const char *reg_name, const char *fld_name) +{ + u32 reg_val; + int ret; + + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s started (exp_val 0x%x)\n", + func_name, reg_name, reg_offset, fld_name, exp_masked_val); + + ret = read_poll_timeout(readl, reg_val, (reg_val & reg_mask) == exp_masked_val, + REG_POLL_SLEEP_US, timeout_us, false, base + reg_offset); + +#ifdef CONFIG_FAULT_INJECTION + if (should_fail(&ivpu_hw_failure, 1)) + ret = -ETIMEDOUT; +#endif + + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s %s (reg_val 0x%08x)\n", + func_name, reg_name, reg_offset, fld_name, ret ? "ETIMEDOUT" : "OK", reg_val); + + return ret; +} static inline u32 ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 01ebf88fe6ef..39f83225c181 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req struct ivpu_ipc_consumer cons; int ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) && + pm_runtime_enabled(vdev->drm.dev)); ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); @@ -459,13 +460,12 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev) } } - if (!list_empty(&ipc->cb_msg_list)) - if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_IPC)) - ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + queue_work(system_wq, &vdev->irq_ipc_work); } -void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) +void ivpu_ipc_irq_work_fn(struct work_struct *work) { + struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_ipc_work); struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg, *r; struct list_head cb_msg_list; diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index b4dfb504679b..b524a1985b9d 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -90,7 +90,7 @@ void ivpu_ipc_disable(struct ivpu_device *vdev); void ivpu_ipc_reset(struct ivpu_device *vdev); void ivpu_ipc_irq_handler(struct ivpu_device *vdev); -void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_work_fn(struct work_struct *work); void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel, ivpu_ipc_rx_callback_t callback); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 7149312f16e1..b28da35c30b6 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/pm_runtime.h> #include <linux/module.h> #include <uapi/drm/ivpu_accel.h> @@ -17,6 +18,7 @@ #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" +#include "ivpu_mmu.h" #include "ivpu_pm.h" #include "ivpu_trace.h" #include "vpu_boot_api.h" @@ -83,23 +85,9 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) if (!cmdq) return NULL; - ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, - GFP_KERNEL); - if (ret < 0) { - ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); - goto err_free_cmdq; - } - - ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, - &file_priv->cmdq_id_next, GFP_KERNEL); - if (ret < 0) { - ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret); - goto err_erase_db_xa; - } - cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!cmdq->mem) - goto err_erase_cmdq_xa; + goto err_free_cmdq; ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); if (ret) @@ -107,10 +95,6 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) return cmdq; -err_erase_cmdq_xa: - xa_erase(&file_priv->cmdq_xa, cmdq->id); -err_erase_db_xa: - xa_erase(&vdev->db_xa, cmdq->db_id); err_free_cmdq: kfree(cmdq); return NULL; @@ -118,15 +102,44 @@ err_free_cmdq: static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - if (!cmdq) - return; - ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); ivpu_bo_free(cmdq->mem); - xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); kfree(cmdq); } +static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, + bool is_legacy) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq = NULL; + int ret; + + lockdep_assert_held(&file_priv->lock); + + cmdq = ivpu_cmdq_alloc(file_priv); + if (!cmdq) { + ivpu_err(vdev, "Failed to allocate command queue\n"); + return NULL; + } + + cmdq->priority = priority; + cmdq->is_legacy = is_legacy; + + ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, + &file_priv->cmdq_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret); + goto err_free_cmdq; + } + + ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id); + return cmdq; + +err_free_cmdq: + ivpu_cmdq_free(file_priv, cmdq); + return NULL; +} + static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, u8 priority) { @@ -152,6 +165,13 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq * struct ivpu_device *vdev = file_priv->vdev; int ret; + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate doorbell ID: %d\n", ret); + return ret; + } + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); @@ -160,41 +180,52 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq * cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (!ret) - ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n", - cmdq->db_id, cmdq->id, file_priv->ctx.id); + ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n", + cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority); + else + xa_erase(&vdev->db_xa, cmdq->db_id); return ret; } -static int -ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority) +static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq) +{ + jobq->header.engine_idx = VPU_ENGINE_COMPUTE; + jobq->header.head = 0; + jobq->header.tail = 0; + + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { + ivpu_dbg(vdev, JOB, "Turbo mode enabled"); + jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + } + + wmb(); /* Flush WC buffer for jobq->header */ +} + +static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) +{ + size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); + + return size / sizeof(struct vpu_job_queue_entry); +} + +static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { struct ivpu_device *vdev = file_priv->vdev; - struct vpu_job_queue_header *jobq_header; int ret; lockdep_assert_held(&file_priv->lock); - if (cmdq->db_registered) + if (cmdq->db_id) return 0; - cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / - sizeof(struct vpu_job_queue_entry)); - + cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); - jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = VPU_ENGINE_COMPUTE; - jobq_header->head = 0; - jobq_header->tail = 0; - if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { - ivpu_dbg(vdev, JOB, "Turbo mode enabled"); - jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; - } - wmb(); /* Flush WC buffer for jobq->header */ + ivpu_cmdq_jobq_init(vdev, cmdq->jobq); if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { - ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority); + ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority); if (ret) return ret; } @@ -203,58 +234,83 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 prio if (ret) return ret; - cmdq->db_registered = true; - return 0; } -static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { struct ivpu_device *vdev = file_priv->vdev; int ret; lockdep_assert_held(&file_priv->lock); - if (!cmdq->db_registered) + if (!cmdq->db_id) return 0; - cmdq->db_registered = false; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); if (!ret) - ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id); + ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n", + cmdq->id, file_priv->ctx.id); } ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); if (!ret) ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); + cmdq->db_id = 0; + return 0; } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority) +static inline u8 ivpu_job_to_jsm_priority(u8 priority) +{ + if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) + return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL; + + return priority - 1; +} + +static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + ivpu_cmdq_unregister(file_priv, cmdq); + xa_erase(&file_priv->cmdq_xa, cmdq->id); + ivpu_cmdq_free(file_priv, cmdq); +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority) { struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; - int ret; + unsigned long id; lockdep_assert_held(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - if (cmdq->priority == priority) + xa_for_each(&file_priv->cmdq_xa, id, cmdq) + if (cmdq->is_legacy && cmdq->priority == priority) break; if (!cmdq) { - cmdq = ivpu_cmdq_alloc(file_priv); + cmdq = ivpu_cmdq_create(file_priv, priority, true); if (!cmdq) return NULL; - cmdq->priority = priority; } - ret = ivpu_cmdq_init(file_priv, cmdq, priority); - if (ret) + return cmdq; +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq; + + lockdep_assert_held(&file_priv->lock); + + cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id); + if (!cmdq) { + ivpu_warn_ratelimited(vdev, "Failed to find command queue with ID: %u\n", cmdq_id); return NULL; + } return cmdq; } @@ -266,11 +322,8 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) lockdep_assert_held(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { - xa_erase(&file_priv->cmdq_xa, cmdq_id); - ivpu_cmdq_fini(file_priv, cmdq); - ivpu_cmdq_free(file_priv, cmdq); - } + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_destroy(file_priv, cmdq); } /* @@ -286,8 +339,10 @@ static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) mutex_lock(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - cmdq->db_registered = false; + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); + cmdq->db_id = 0; + } mutex_unlock(&file_priv->lock); } @@ -305,25 +360,24 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) mutex_unlock(&vdev->context_list_lock); } -static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv) -{ - struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; - - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - ivpu_cmdq_fini(file_priv, cmdq); -} - void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; lockdep_assert_held(&file_priv->lock); + ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id); - ivpu_cmdq_fini_all(file_priv); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_unregister(file_priv, cmdq); if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) ivpu_jsm_context_release(vdev, file_priv->ctx.id); + + ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id); + + file_priv->aborted = true; } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) @@ -416,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job) struct ivpu_device *vdev = job->vdev; u32 i; - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", - job->job_id, job->file_priv->ctx.id, job->engine_idx); + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); for (i = 0; i < job->bo_count; i++) if (job->bos[i]) @@ -462,16 +516,14 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device * { struct ivpu_job *job; - xa_lock(&vdev->submitted_jobs_xa); - job = __xa_erase(&vdev->submitted_jobs_xa, job_id); + lockdep_assert_held(&vdev->submitted_jobs_lock); + job = xa_erase(&vdev->submitted_jobs_xa, job_id); if (xa_empty(&vdev->submitted_jobs_xa) && job) { vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), vdev->busy_time); } - xa_unlock(&vdev->submitted_jobs_xa); - return job; } @@ -479,6 +531,28 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 { struct ivpu_job *job; + lockdep_assert_held(&vdev->submitted_jobs_lock); + + job = xa_load(&vdev->submitted_jobs_xa, job_id); + if (!job) + return -ENOENT; + + if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) { + guard(mutex)(&job->file_priv->lock); + + if (job->file_priv->has_mmu_faults) + return 0; + + /* + * Mark context as faulty and defer destruction of the job to jobs abort thread + * handler to synchronize between both faults and jobs returning context violation + * status and ensure both are handled in the same way + */ + job->file_priv->has_mmu_faults = true; + queue_work(system_wq, &vdev->context_abort_work); + return 0; + } + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); if (!job) return -ENOENT; @@ -490,13 +564,17 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 dma_fence_signal(job->done_fence); trace_job("done", job); - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); ivpu_rpm_put(vdev); + + if (!xa_empty(&vdev->submitted_jobs_xa)) + ivpu_start_job_timeout_detection(vdev); + return 0; } @@ -505,11 +583,29 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) struct ivpu_job *job; unsigned long id; + mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); + + mutex_unlock(&vdev->submitted_jobs_lock); } -static int ivpu_job_submit(struct ivpu_job *job, u8 priority) +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) +{ + struct ivpu_job *job; + unsigned long id; + + mutex_lock(&vdev->submitted_jobs_lock); + + xa_for_each(&vdev->submitted_jobs_xa, id, job) + if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id) + ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); + + mutex_unlock(&vdev->submitted_jobs_lock); +} + +static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; @@ -521,25 +617,35 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) if (ret < 0) return ret; + mutex_lock(&vdev->submitted_jobs_lock); mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(file_priv, priority); + if (cmdq_id == 0) + cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority); + else + cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id); if (!cmdq) { - ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", - file_priv->ctx.id, job->engine_idx, priority); + ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d\n", file_priv->ctx.id); ret = -EINVAL; - goto err_unlock_file_priv; + goto err_unlock; + } + + ret = ivpu_cmdq_register(file_priv, cmdq); + if (ret) { + ivpu_err(vdev, "Failed to register command queue: %d\n", ret); + goto err_unlock; } - xa_lock(&vdev->submitted_jobs_xa); + job->cmdq_id = cmdq->id; + is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, - &file_priv->job_id_next, GFP_KERNEL); + ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; - goto err_unlock_submitted_jobs_xa; + goto err_unlock; } ret = ivpu_cmdq_push_job(cmdq, job); @@ -558,25 +664,25 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) } trace_job("submit", job); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, priority, + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); - xa_unlock(&vdev->submitted_jobs_xa); - mutex_unlock(&file_priv->lock); - if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + } + + mutex_unlock(&vdev->submitted_jobs_lock); return 0; err_erase_xa: - __xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_unlock_submitted_jobs_xa: - xa_unlock(&vdev->submitted_jobs_xa); -err_unlock_file_priv: + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_unlock: mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } @@ -585,7 +691,7 @@ static int ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, u32 buf_count, u32 commands_offset) { - struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = file_priv->vdev; struct ww_acquire_ctx acquire_ctx; enum dma_resv_usage usage; @@ -647,49 +753,20 @@ unlock_reservations: return ret; } -static inline u8 ivpu_job_to_hws_priority(struct ivpu_file_priv *file_priv, u8 priority) -{ - if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) - return DRM_IVPU_JOB_PRIORITY_NORMAL; - - return priority - 1; -} - -int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id, + u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset, + u8 priority) { - struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; - struct drm_ivpu_submit *params = data; struct ivpu_job *job; u32 *buf_handles; int idx, ret; - u8 priority; - - if (params->engine != DRM_IVPU_ENGINE_COMPUTE) - return -EINVAL; - if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) - return -EINVAL; - - if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) - return -EINVAL; - - if (!IS_ALIGNED(params->commands_offset, 8)) - return -EINVAL; - - if (!file_priv->ctx.id) - return -EINVAL; - - if (file_priv->has_mmu_faults) - return -EBADFD; - - buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); + buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL); if (!buf_handles) return -ENOMEM; - ret = copy_from_user(buf_handles, - (void __user *)params->buffers_ptr, - params->buffer_count * sizeof(u32)); + ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32)); if (ret) { ret = -EFAULT; goto err_free_handles; @@ -700,27 +777,24 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_free_handles; } - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", - file_priv->ctx.id, params->buffer_count); + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", + file_priv->ctx.id, cmdq_id, buffer_count); - job = ivpu_job_create(file_priv, params->engine, params->buffer_count); + job = ivpu_job_create(file_priv, engine, buffer_count); if (!job) { ivpu_err(vdev, "Failed to create job\n"); ret = -ENOMEM; goto err_exit_dev; } - ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, - params->commands_offset); + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset); if (ret) { ivpu_err(vdev, "Failed to prepare job: %d\n", ret); goto err_destroy_job; } - priority = ivpu_job_to_hws_priority(file_priv, params->priority); - down_read(&vdev->pm->reset_lock); - ret = ivpu_job_submit(job, priority); + ret = ivpu_job_submit(job, priority, cmdq_id); up_read(&vdev->pm->reset_lock); if (ret) goto err_signal_fence; @@ -740,12 +814,137 @@ err_free_handles: return ret; } +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_submit *args = data; + u8 priority; + + if (args->engine != DRM_IVPU_ENGINE_COMPUTE) + return -EINVAL; + + if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) + return -EINVAL; + + if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) + return -EINVAL; + + if (!IS_ALIGNED(args->commands_offset, 8)) + return -EINVAL; + + if (!file_priv->ctx.id) + return -EINVAL; + + if (file_priv->has_mmu_faults) + return -EBADFD; + + priority = ivpu_job_to_jsm_priority(args->priority); + + return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine, + (void __user *)args->buffers_ptr, args->commands_offset, priority); +} + +int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_cmdq_submit *args = data; + + if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + return -ENODEV; + + if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) + return -EINVAL; + + if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) + return -EINVAL; + + if (!IS_ALIGNED(args->commands_offset, 8)) + return -EINVAL; + + if (!file_priv->ctx.id) + return -EINVAL; + + if (file_priv->has_mmu_faults) + return -EBADFD; + + return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE, + (void __user *)args->buffers_ptr, args->commands_offset, 0); +} + +int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_cmdq_create *args = data; + struct ivpu_cmdq *cmdq; + int ret; + + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + return -ENODEV; + + if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->lock); + + cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); + if (cmdq) + args->cmdq_id = cmdq->id; + + mutex_unlock(&file_priv->lock); + + ivpu_rpm_put(vdev); + + return cmdq ? 0 : -ENOMEM; +} + +int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_cmdq_destroy *args = data; + struct ivpu_cmdq *cmdq; + u32 cmdq_id = 0; + int ret; + + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + return -ENODEV; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->lock); + + cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); + if (!cmdq || cmdq->is_legacy) { + ret = -ENOENT; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; + } + + mutex_unlock(&file_priv->lock); + + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + + return ret; +} + static void ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) { struct vpu_ipc_msg_payload_job_done *payload; - int ret; if (!jsm_msg) { ivpu_err(vdev, "IPC message has no JSM payload\n"); @@ -758,9 +957,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, } payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; - ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); - if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) - ivpu_start_job_timeout_detection(vdev); + + mutex_lock(&vdev->submitted_jobs_lock); + ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); + mutex_unlock(&vdev->submitted_jobs_lock); } void ivpu_job_done_consumer_init(struct ivpu_device *vdev) @@ -773,3 +973,55 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) { ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); } + +void ivpu_context_abort_work_fn(struct work_struct *work) +{ + struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); + struct ivpu_file_priv *file_priv; + struct ivpu_job *job; + unsigned long ctx_id; + unsigned long id; + + if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0)) + return; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ivpu_jsm_reset_engine(vdev, 0); + + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) + continue; + + mutex_lock(&file_priv->lock); + ivpu_context_abort_locked(file_priv); + mutex_unlock(&file_priv->lock); + } + mutex_unlock(&vdev->context_list_lock); + + /* + * We will not receive new MMU event interrupts until existing events are discarded + * however, we want to discard these events only after aborting the faulty context + * to avoid generating new faults from that context + */ + ivpu_mmu_discard_events(vdev); + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + goto runtime_put; + + ivpu_jsm_hws_resume_engine(vdev, 0); + /* + * In hardware scheduling mode NPU already has stopped processing jobs + * and won't send us any further notifications, thus we have to free job related resources + * and notify userspace + */ + mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) + if (job->file_priv->aborted) + ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); + mutex_unlock(&vdev->submitted_jobs_lock); + +runtime_put: + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_put_autosuspend(vdev->drm.dev); +} diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 8b19e3f8b4cf..2e301c2eea7b 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -30,8 +30,8 @@ struct ivpu_cmdq { u32 entry_count; u32 id; u32 db_id; - bool db_registered; u8 priority; + bool is_legacy; }; /** @@ -51,6 +51,7 @@ struct ivpu_job { struct ivpu_file_priv *file_priv; struct dma_fence *done_fence; u64 cmd_buf_vpu_addr; + u32 cmdq_id; u32 job_id; u32 engine_idx; size_t bo_count; @@ -58,14 +59,19 @@ struct ivpu_job { }; int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id); void ivpu_job_done_consumer_init(struct ivpu_device *vdev); void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); +void ivpu_context_abort_work_fn(struct work_struct *work); void ivpu_jobs_abort_all(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 30a40be76930..219ab8afefab 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -7,6 +7,7 @@ #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" +#include "vpu_jsm_api.h" const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) { @@ -407,26 +408,18 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) { struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP }; struct vpu_jsm_msg resp; + struct ivpu_hw_info *hw = vdev->hw; + struct vpu_ipc_msg_payload_hws_priority_band_setup *setup = + &req.payload.hws_priority_band_setup; int ret; - /* Idle */ - req.payload.hws_priority_band_setup.grace_period[0] = 0; - req.payload.hws_priority_band_setup.process_grace_period[0] = 50000; - req.payload.hws_priority_band_setup.process_quantum[0] = 160000; - /* Normal */ - req.payload.hws_priority_band_setup.grace_period[1] = 50000; - req.payload.hws_priority_band_setup.process_grace_period[1] = 50000; - req.payload.hws_priority_band_setup.process_quantum[1] = 300000; - /* Focus */ - req.payload.hws_priority_band_setup.grace_period[2] = 50000; - req.payload.hws_priority_band_setup.process_grace_period[2] = 50000; - req.payload.hws_priority_band_setup.process_quantum[2] = 200000; - /* Realtime */ - req.payload.hws_priority_band_setup.grace_period[3] = 0; - req.payload.hws_priority_band_setup.process_grace_period[3] = 50000; - req.payload.hws_priority_band_setup.process_quantum[3] = 200000; - - req.payload.hws_priority_band_setup.normal_band_percentage = 10; + for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE; + band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) { + setup->grace_period[band] = hw->hws.grace_period[band]; + setup->process_grace_period[band] = hw->hws.process_grace_period[band]; + setup->process_quantum[band] = hw->hws.process_quantum[band]; + } + setup->normal_band_percentage = 10; ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 26ef52fbb93e..5ea010568faa 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -20,6 +20,12 @@ #define IVPU_MMU_REG_CR0 0x00200020u #define IVPU_MMU_REG_CR0ACK 0x00200024u #define IVPU_MMU_REG_CR0ACK_VAL_MASK GENMASK(31, 0) +#define IVPU_MMU_REG_CR0_ATSCHK_MASK BIT(4) +#define IVPU_MMU_REG_CR0_CMDQEN_MASK BIT(3) +#define IVPU_MMU_REG_CR0_EVTQEN_MASK BIT(2) +#define IVPU_MMU_REG_CR0_PRIQEN_MASK BIT(1) +#define IVPU_MMU_REG_CR0_SMMUEN_MASK BIT(0) + #define IVPU_MMU_REG_CR1 0x00200028u #define IVPU_MMU_REG_CR2 0x0020002cu #define IVPU_MMU_REG_IRQ_CTRL 0x00200050u @@ -141,12 +147,6 @@ #define IVPU_MMU_IRQ_EVTQ_EN BIT(2) #define IVPU_MMU_IRQ_GERROR_EN BIT(0) -#define IVPU_MMU_CR0_ATSCHK BIT(4) -#define IVPU_MMU_CR0_CMDQEN BIT(3) -#define IVPU_MMU_CR0_EVTQEN BIT(2) -#define IVPU_MMU_CR0_PRIQEN BIT(1) -#define IVPU_MMU_CR0_SMMUEN BIT(0) - #define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10) #define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8) #define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6) @@ -596,7 +596,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0); REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0); - val = IVPU_MMU_CR0_CMDQEN; + val = REG_SET_FLD(IVPU_MMU_REG_CR0, CMDQEN, 0); ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -617,12 +617,12 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0); REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0); - val |= IVPU_MMU_CR0_EVTQEN; + val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; - val |= IVPU_MMU_CR0_ATSCHK; + val = REG_SET_FLD(IVPU_MMU_REG_CR0, ATSCHK, val); ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -631,7 +631,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) if (ret) return ret; - val |= IVPU_MMU_CR0_SMMUEN; + val = REG_SET_FLD(IVPU_MMU_REG_CR0, SMMUEN, val); return ivpu_mmu_reg_write_cr0(vdev, val); } @@ -725,8 +725,8 @@ static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_d cd[2] = 0; cd[3] = 0x0000000000007444; - /* For global context generate memory fault on VPU */ - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + /* For global and reserved contexts generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) cd[0] |= IVPU_MMU_CD_0_A; if (valid) @@ -870,28 +870,107 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) return evt; } +static int ivpu_mmu_evtq_set(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(IVPU_MMU_REG_CR0); + + if (enable) + val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); + else + val = REG_CLR_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); + REGV_WR32(IVPU_MMU_REG_CR0, val); + + return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); +} + +static int ivpu_mmu_evtq_enable(struct ivpu_device *vdev) +{ + return ivpu_mmu_evtq_set(vdev, true); +} + +static int ivpu_mmu_evtq_disable(struct ivpu_device *vdev) +{ + return ivpu_mmu_evtq_set(vdev, false); +} + +void ivpu_mmu_discard_events(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + + mutex_lock(&mmu->lock); + /* + * Disable event queue (stop MMU from updating the producer) + * to allow synchronization of consumer and producer indexes + */ + ivpu_mmu_evtq_disable(vdev); + + vdev->mmu->evtq.cons = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); + vdev->mmu->evtq.prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); + + ivpu_mmu_evtq_enable(vdev); + + drm_WARN_ON_ONCE(&vdev->drm, vdev->mmu->evtq.cons != vdev->mmu->evtq.prod); + + mutex_unlock(&mmu->lock); +} + +int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry; + u64 val; + + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) + return -EINVAL; + + mutex_lock(&mmu->lock); + + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); + + val = READ_ONCE(entry[0]); + val &= ~IVPU_MMU_CD_0_R; + WRITE_ONCE(entry[0], val); + + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + + ivpu_mmu_cmdq_write_cfgi_all(vdev); + ivpu_mmu_cmdq_sync(vdev); + + mutex_unlock(&mmu->lock); + + return 0; +} + void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) { + struct ivpu_file_priv *file_priv; u32 *event; u32 ssid; ivpu_dbg(vdev, IRQ, "MMU event queue\n"); - while ((event = ivpu_mmu_get_event(vdev)) != NULL) { - ivpu_mmu_dump_event(vdev, event); - - ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) { + while ((event = ivpu_mmu_get_event(vdev))) { + ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, *event); + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || + ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) { + ivpu_mmu_dump_event(vdev, event); ivpu_pm_trigger_recovery(vdev, "MMU event"); return; } - ivpu_mmu_user_context_mark_invalid(vdev, ssid); - REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); + file_priv = xa_load(&vdev->context_xa, ssid); + if (file_priv) { + if (!READ_ONCE(file_priv->has_mmu_faults)) { + ivpu_mmu_dump_event(vdev, event); + WRITE_ONCE(file_priv->has_mmu_faults, true); + } + } } - if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ)) - ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + queue_work(system_wq, &vdev->context_abort_work); } void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index 7afea9cd8731..1ce7529746ad 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -47,5 +47,7 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); void ivpu_mmu_evtq_dump(struct ivpu_device *vdev); +void ivpu_mmu_discard_events(struct ivpu_device *vdev); +int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid); #endif /* __IVPU_MMU_H__ */ diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 0af614dfb6f9..f0267efa55aa 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -635,16 +635,3 @@ void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) ivpu_mmu_cd_clear(vdev, vdev->rctx.id); ivpu_mmu_context_fini(vdev, &vdev->rctx); } - -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) -{ - struct ivpu_file_priv *file_priv; - - xa_lock(&vdev->context_xa); - - file_priv = xa_load(&vdev->context_xa, ssid); - if (file_priv) - file_priv->has_mmu_faults = true; - - xa_unlock(&vdev->context_xa); -} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 8042fc067062..f255310968cf 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -37,8 +37,6 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); - int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, u64 size, struct drm_mm_node *node); void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node); diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index ffe7b10f8a76..2a043baf10ca 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -4,6 +4,7 @@ */ #include <drm/drm_file.h> +#include <linux/pm_runtime.h> #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); if (get_instance_by_mask(file_priv, args->metric_group_mask)) { @@ -96,6 +101,8 @@ err_free_ms: kfree(ms); unlock: mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); return ret; } @@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * unlock: mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ret; } @@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file { struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_ms_instance *ms; + int ret; if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ms ? 0 : -EINVAL; } @@ -281,6 +300,9 @@ unlock: void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) { struct ivpu_ms_instance *ms, *tmp; + struct ivpu_device *vdev = file_priv->vdev; + + pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&file_priv->ms_lock); @@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) free_instance(file_priv, ms); mutex_unlock(&file_priv->ms_lock); + + pm_runtime_put_autosuspend(vdev->drm.dev); } void ivpu_ms_cleanup_all(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 5060c5dd40d1..ea30db181cd7 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); #define PM_RESCHEDULE_LIMIT 5 +#define PM_TDR_HEARTBEAT_LIMIT 30 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) { @@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; + fw->last_heartbeat = 0; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) @@ -177,16 +179,11 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) return; } - if (ivpu_is_fpga(vdev)) { - ivpu_err(vdev, "Recovery not available on FPGA\n"); - return; - } - /* Trigger recovery if it's not in progress */ if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) { ivpu_hw_diagnose_failure(vdev); ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */ - queue_work(system_long_wq, &vdev->pm->recovery_work); + queue_work(system_unbound_wq, &vdev->pm->recovery_work); } } @@ -194,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; + u64 heartbeat; + if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) { + ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n"); + goto recovery; + } + + if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) { + ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n"); + goto recovery; + } + + vdev->fw->last_heartbeat = heartbeat; + ivpu_start_job_timeout_detection(vdev); + return; + +recovery: + atomic_set(&vdev->job_timeout_counter, 0); ivpu_pm_trigger_recovery(vdev, "TDR"); } @@ -209,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) { cancel_delayed_work_sync(&vdev->pm->job_timeout_work); + atomic_set(&vdev->job_timeout_counter, 0); } int ivpu_pm_suspend_cb(struct device *dev) @@ -433,16 +448,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -450,27 +466,29 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } -void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev) +void ivpu_pm_irq_dct_work_fn(struct work_struct *work) { + struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_dct_work); bool enable; int ret; if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index b70efe6c36e4..89b264cc0e3e 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -45,6 +45,6 @@ void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); int ivpu_pm_dct_init(struct ivpu_device *vdev); int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent); int ivpu_pm_dct_disable(struct ivpu_device *vdev); -void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev); +void ivpu_pm_irq_dct_work_fn(struct work_struct *work); #endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 616477fc17fa..268ab7744a8b 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -1,17 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include <linux/device.h> #include <linux/err.h> +#include <linux/pm_runtime.h> +#include <linux/units.h> #include "ivpu_drv.h" +#include "ivpu_gem.h" #include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_sysfs.h" -/* +/** + * DOC: npu_busy_time_us + * * npu_busy_time_us is the time that the device spent executing jobs. * The time is counted when and only when there are jobs submitted to firmware. * @@ -30,11 +35,12 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b struct ivpu_device *vdev = to_ivpu_device(drm); ktime_t total, now = 0; - xa_lock(&vdev->submitted_jobs_xa); + mutex_lock(&vdev->submitted_jobs_lock); + total = vdev->busy_time; if (!xa_empty(&vdev->submitted_jobs_xa)) now = ktime_sub(ktime_get(), vdev->busy_start_ts); - xa_unlock(&vdev->submitted_jobs_xa); + mutex_unlock(&vdev->submitted_jobs_lock); return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); } @@ -42,6 +48,30 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b static DEVICE_ATTR_RO(npu_busy_time_us); /** + * DOC: npu_memory_utilization + * + * The npu_memory_utilization is used to report in bytes a current NPU memory utilization. + * + */ +static ssize_t +npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + struct ivpu_bo *bo; + u64 total_npu_memory = 0; + + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) + total_npu_memory += bo->base.base.size; + mutex_unlock(&vdev->bo_list_lock); + + return sysfs_emit(buf, "%lld\n", total_npu_memory); +} + +static DEVICE_ATTR_RO(npu_memory_utilization); + +/** * DOC: sched_mode * * The sched_mode is used to report current NPU scheduling mode. @@ -62,9 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR_RO(sched_mode); +/** + * DOC: npu_max_frequency + * + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data + * processing unit + */ +static ssize_t +npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_max_frequency_mhz); + +/** + * DOC: npu_current_frequency_mhz + * + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's + * data processing unit + */ +static ssize_t +npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = 0; + + /* Read frequency only if device is active, otherwise frequency is 0 */ + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { + freq = ivpu_hw_dpu_freq_get(vdev); + + pm_runtime_put_autosuspend(vdev->drm.dev); + } + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_current_frequency_mhz); + static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, + &dev_attr_npu_memory_utilization.attr, &dev_attr_sched_mode.attr, + &dev_attr_npu_max_frequency_mhz.attr, + &dev_attr_npu_current_frequency_mhz.attr, NULL, }; diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 908e68ea1c39..218468bbbcad 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -26,7 +26,7 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 26 +#define VPU_BOOT_API_VER_MINOR 28 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -76,8 +76,15 @@ struct vpu_firmware_header { * submission queue size and device capabilities. */ u32 preemption_buffer_2_size; + /* + * Maximum preemption buffer size that the FW can use: no need for the host + * driver to allocate more space than that specified by these fields. + * A value of 0 means no declared limit. + */ + u32 preemption_buffer_1_max_size; + u32 preemption_buffer_2_max_size; /* Space reserved for future preemption-related fields. */ - u32 preemption_reserved[6]; + u32 preemption_reserved[4]; /* FW image read only section start address, 4KB aligned */ u64 ro_section_start_address; /* FW image read only section size, 4KB aligned */ @@ -134,7 +141,7 @@ enum vpu_trace_destination { /* * Processor bit shifts (for loggable HW components). */ -#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_RESERVED 0 #define VPU_TRACE_PROC_BIT_LRT 1 #define VPU_TRACE_PROC_BIT_LNN 2 #define VPU_TRACE_PROC_BIT_SHV_0 3 diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7215c144158c..4b6b2b3d2583 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 25 +#define VPU_JSM_API_VER_MINOR 29 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -53,8 +53,7 @@ * Engine indexes. */ #define VPU_ENGINE_COMPUTE 0 -#define VPU_ENGINE_COPY 1 -#define VPU_ENGINE_NB 2 +#define VPU_ENGINE_NB 1 /* * VPU status values. @@ -126,11 +125,13 @@ enum { * When set, indicates that job queue uses native fences (as inline commands * in job queue). Such queues may also use legacy fences (as commands in batch buffers). * When cleared, indicates the job queue only uses legacy fences. - * NOTE: For queues using native fences, VPU expects that all jobs in the queue - * are immediately followed by an inline command object. This object is expected - * to be a fence signal command in most cases, but can also be a NOP in case the host - * does not need per-job fence signalling. Other inline commands objects can be - * inserted between "job and inline command" pairs. + * NOTES: + * 1. For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + * 2. Native fence queues are only supported on VPU 40xx onwards. */ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), @@ -275,6 +276,8 @@ struct vpu_inline_cmd { u64 value; /* User VA of the log buffer in which to add log entry on completion. */ u64 log_buffer_va; + /* NPU private data. */ + u64 npu_private_data; } fence; /* Other commands do not have a payload. */ /* Payload definition for future inline commands can be inserted here. */ @@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update { /** Metric group mask that identifies metric streamer instance. */ u64 metric_group_mask; /** - * Address and size of the buffer where the VPU will write metric data. If - * the buffer address is 0 or same as the currently used buffer the VPU will - * continue writing metric data to the current buffer. In this case the - * buffer size is ignored and the size of the current buffer is unchanged. - * If the address is non-zero and differs from the current buffer address the - * VPU will immediately switch data collection to the new buffer. + * Address and size of the buffer where the VPU will write metric data. + * This member dictates how the update operation should perform: + * 1. client needs information about the number of collected samples and the + * amount of data written to the current buffer + * 2. client wants to switch to a new buffer + * + * Case 1. is identified by the buffer address being 0 or the same as the + * currently used buffer address. In this case the buffer size is ignored and + * the size of the current buffer is unchanged. The VPU will return an update + * in the vpu_jsm_metric_streamer_done structure. The internal writing position + * into the buffer is not changed. + * + * Case 2. is identified by the address being non-zero and differs from the + * current buffer address. The VPU will immediately switch data collection to + * the new buffer. Then the VPU will return an update in the + * vpu_jsm_metric_streamer_done structure. */ u64 buffer_addr; u64 buffer_size; @@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* * Default quantum in 100ns units for scheduling across processes * within a priority band + * Minimum value supported by NPU is 1ms (10000 in 100ns units). */ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* @@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { * in situations when it's starved by the focus band. */ u32 normal_band_percentage; - /* Reserved */ - u32 reserved_0; + /* + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. + */ + u32 tdr_timeout; }; /* @@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { s32 in_process_priority; /* Zero padding / Reserved */ u32 reserved_1; - /* Context quantum relative to other contexts of same priority in the same process */ + /* + * Context quantum relative to other contexts of same priority in the same process + * Minimum value supported by NPU is 1ms (10000 in 100ns units). + */ u64 context_quantum; /* Grace period when preempting context of the same priority within the same process */ u64 grace_period_same_priority; diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c index 8ab82e78dd94..13a14c6c6168 100644 --- a/drivers/accel/qaic/mhi_controller.c +++ b/drivers/accel/qaic/mhi_controller.c @@ -20,6 +20,11 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */ module_param(mhi_timeout_ms, uint, 0600); MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value"); +static const char *fw_image_paths[FAMILY_MAX] = { + [FAMILY_AIC100] = "qcom/aic100/sbl.bin", + [FAMILY_AIC200] = "qcom/aic200/sbl.bin", +}; + static const struct mhi_channel_config aic100_channels[] = { { .name = "QAIC_LOOPBACK", @@ -439,6 +444,297 @@ static const struct mhi_channel_config aic100_channels[] = { }, }; +static const struct mhi_channel_config aic200_channels[] = { + { + .name = "QAIC_LOOPBACK", + .num = 0, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOOPBACK", + .num = 1, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 2, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 3, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 6, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 7, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 10, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 11, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 12, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 13, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 14, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 15, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 16, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 17, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 22, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 23, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 24, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 25, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = true, + .wake_capable = false, + }, +}; + static struct mhi_event_config aic100_events[] = { { .num_elements = 32, @@ -454,16 +750,44 @@ static struct mhi_event_config aic100_events[] = { }, }; -static struct mhi_controller_config aic100_config = { - .max_channels = 128, - .timeout_ms = 0, /* controlled by mhi_timeout */ - .buf_len = 0, - .num_channels = ARRAY_SIZE(aic100_channels), - .ch_cfg = aic100_channels, - .num_events = ARRAY_SIZE(aic100_events), - .event_cfg = aic100_events, - .use_bounce_buf = false, - .m2_no_db = false, +static struct mhi_event_config aic200_events[] = { + { + .num_elements = 32, + .irq_moderation_ms = 0, + .irq = 0, + .channel = U32_MAX, + .priority = 1, + .mode = MHI_DB_BRST_DISABLE, + .data_type = MHI_ER_CTRL, + .hardware_event = false, + .client_managed = false, + .offload_channel = false, + }, +}; + +static struct mhi_controller_config mhi_cntrl_configs[] = { + [FAMILY_AIC100] = { + .max_channels = 128, + .timeout_ms = 0, /* controlled by mhi_timeout */ + .buf_len = 0, + .num_channels = ARRAY_SIZE(aic100_channels), + .ch_cfg = aic100_channels, + .num_events = ARRAY_SIZE(aic100_events), + .event_cfg = aic100_events, + .use_bounce_buf = false, + .m2_no_db = false, + }, + [FAMILY_AIC200] = { + .max_channels = 128, + .timeout_ms = 0, /* controlled by mhi_timeout */ + .buf_len = 0, + .num_channels = ARRAY_SIZE(aic200_channels), + .ch_cfg = aic200_channels, + .num_events = ARRAY_SIZE(aic200_events), + .event_cfg = aic200_events, + .use_bounce_buf = false, + .m2_no_db = false, + }, }; static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out) @@ -545,8 +869,9 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl) } struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, - int mhi_irq, bool shared_msi) + int mhi_irq, bool shared_msi, int family) { + struct mhi_controller_config mhi_config = mhi_cntrl_configs[family]; struct mhi_controller *mhi_cntrl; int ret; @@ -581,11 +906,18 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */ mhi_cntrl->irq_flags = IRQF_SHARED; - mhi_cntrl->fw_image = "qcom/aic100/sbl.bin"; + mhi_cntrl->fw_image = fw_image_paths[family]; + + if (family == FAMILY_AIC200) { + mhi_cntrl->name = "AIC200"; + mhi_cntrl->seg_len = SZ_512K; + } else { + mhi_cntrl->name = "AIC100"; + } /* use latest configured timeout */ - aic100_config.timeout_ms = mhi_timeout_ms; - ret = mhi_register_controller(mhi_cntrl, &aic100_config); + mhi_config.timeout_ms = mhi_timeout_ms; + ret = mhi_register_controller(mhi_cntrl, &mhi_config); if (ret) { pci_err(pci_dev, "mhi_register_controller failed %d\n", ret); return ERR_PTR(ret); diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h index 500e7f4af2af..8939f6ae185e 100644 --- a/drivers/accel/qaic/mhi_controller.h +++ b/drivers/accel/qaic/mhi_controller.h @@ -8,7 +8,7 @@ #define MHICONTROLLERQAIC_H_ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, - int mhi_irq, bool shared_msi); + int mhi_irq, bool shared_msi, int family); void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up); void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl); void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl); diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index 02561b6cecc6..0dbb8e32e4b9 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -32,6 +32,12 @@ #define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */ #define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev) +enum aic_families { + FAMILY_AIC100, + FAMILY_AIC200, + FAMILY_MAX, +}; + enum __packed dev_states { /* Device is offline or will be very soon */ QAIC_OFFLINE, @@ -113,10 +119,10 @@ struct qaic_device { struct pci_dev *pdev; /* Req. ID of request that will be queued next in MHI control device */ u32 next_seq_num; - /* Base address of bar 0 */ - void __iomem *bar_0; - /* Base address of bar 2 */ - void __iomem *bar_2; + /* Base address of the MHI bar */ + void __iomem *bar_mhi; + /* Base address of the DBCs bar */ + void __iomem *bar_dbc; /* Controller structure for MHI devices */ struct mhi_controller *mhi_cntrl; /* MHI control channel device */ diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 43aba57b48f0..1bce1af7c72c 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc struct scatterlist *sg; int ret = 0; - if (obj->import_attach) + if (drm_gem_is_imported(obj)) return -EINVAL; for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { @@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj) { struct qaic_bo *bo = to_qaic_bo(obj); - if (obj->import_attach) { + if (drm_gem_is_imported(obj)) { /* DMABUF/PRIME Path */ drm_prime_gem_destroy(obj, NULL); } else { @@ -870,7 +870,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, { int ret; - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) ret = qaic_prepare_import_bo(bo, hdr); else ret = qaic_prepare_export_bo(qdev, bo, hdr); @@ -894,7 +894,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) { - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) qaic_unprepare_import_bo(bo); else qaic_unprepare_export_bo(qdev, bo); diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index ba0cf2f94732..a991b8198dc4 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -240,7 +240,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d mhi_unprepare: mhi_unprepare_from_transfer(mhi_dev); destroy_workqueue: - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); out: return ret; @@ -253,7 +252,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) qdev = dev_get_drvdata(&mhi_dev->dev); mhi_unprepare_from_transfer(qdev->bootlog_ch); - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); qdev->bootlog_ch = NULL; } diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 81819b9ef8d4..3b415e2c9431 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -34,13 +34,46 @@ MODULE_IMPORT_NS("DMA_BUF"); -#define PCI_DEV_AIC080 0xa080 -#define PCI_DEV_AIC100 0xa100 +#define PCI_DEVICE_ID_QCOM_AIC080 0xa080 +#define PCI_DEVICE_ID_QCOM_AIC100 0xa100 +#define PCI_DEVICE_ID_QCOM_AIC200 0xa110 #define QAIC_NAME "qaic" #define QAIC_DESC "Qualcomm Cloud AI Accelerators" #define CNTL_MAJOR 5 #define CNTL_MINOR 0 +struct qaic_device_config { + /* Indicates the AIC family the device belongs to */ + int family; + /* A bitmask representing the available BARs */ + int bar_mask; + /* An index value used to identify the MHI controller BAR */ + unsigned int mhi_bar_idx; + /* An index value used to identify the DBCs BAR */ + unsigned int dbc_bar_idx; +}; + +static const struct qaic_device_config aic080_config = { + .family = FAMILY_AIC100, + .bar_mask = BIT(0) | BIT(2) | BIT(4), + .mhi_bar_idx = 0, + .dbc_bar_idx = 2, +}; + +static const struct qaic_device_config aic100_config = { + .family = FAMILY_AIC100, + .bar_mask = BIT(0) | BIT(2) | BIT(4), + .mhi_bar_idx = 0, + .dbc_bar_idx = 2, +}; + +static const struct qaic_device_config aic200_config = { + .family = FAMILY_AIC200, + .bar_mask = BIT(0) | BIT(1) | BIT(2) | BIT(4), + .mhi_bar_idx = 1, + .dbc_bar_idx = 2, +}; + bool datapath_polling; module_param(datapath_polling, bool, 0400); MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); @@ -352,7 +385,8 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) release_dbc(qdev, i); } -static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id) +static struct qaic_device *create_qdev(struct pci_dev *pdev, + const struct qaic_device_config *config) { struct device *dev = &pdev->dev; struct qaic_drm_device *qddev; @@ -365,12 +399,10 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de return NULL; qdev->dev_state = QAIC_OFFLINE; - if (id->device == PCI_DEV_AIC080 || id->device == PCI_DEV_AIC100) { - qdev->num_dbc = 16; - qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); - if (!qdev->dbc) - return NULL; - } + qdev->num_dbc = 16; + qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); + if (!qdev->dbc) + return NULL; qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm); if (IS_ERR(qddev)) @@ -426,17 +458,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de return qdev; } -static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) +static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev, + const struct qaic_device_config *config) { int bars; int ret; - bars = pci_select_bars(pdev, IORESOURCE_MEM); + bars = pci_select_bars(pdev, IORESOURCE_MEM) & 0x3f; /* make sure the device has the expected BARs */ - if (bars != (BIT(0) | BIT(2) | BIT(4))) { - pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n", - __func__, bars); + if (bars != config->bar_mask) { + pci_dbg(pdev, "%s: expected BARs %#x not found in device. Found %#x\n", + __func__, config->bar_mask, bars); return -EINVAL; } @@ -449,13 +482,13 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) return ret; dma_set_max_seg_size(&pdev->dev, UINT_MAX); - qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]); - if (IS_ERR(qdev->bar_0)) - return PTR_ERR(qdev->bar_0); + qdev->bar_mhi = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->mhi_bar_idx]); + if (IS_ERR(qdev->bar_mhi)) + return PTR_ERR(qdev->bar_mhi); - qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]); - if (IS_ERR(qdev->bar_2)) - return PTR_ERR(qdev->bar_2); + qdev->bar_dbc = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->dbc_bar_idx]); + if (IS_ERR(qdev->bar_dbc)) + return PTR_ERR(qdev->bar_dbc); /* Managed release since we use pcim_enable_device above */ pci_set_master(pdev); @@ -465,14 +498,15 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) { + int irq_count = qdev->num_dbc + 1; int mhi_irq; int ret; int i; /* Managed release since we use pcim_enable_device */ - ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI); + ret = pci_alloc_irq_vectors(pdev, irq_count, irq_count, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (ret == -ENOSPC) { - ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (ret < 0) return ret; @@ -485,7 +519,8 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) * interrupted, it shouldn't race with itself. */ qdev->single_msi = true; - pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n"); + pci_info(pdev, "Allocating %d MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n", + irq_count); } else if (ret < 0) { return ret; } @@ -515,21 +550,22 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct qaic_device_config *config = (struct qaic_device_config *)id->driver_data; struct qaic_device *qdev; int mhi_irq; int ret; int i; - qdev = create_qdev(pdev, id); + qdev = create_qdev(pdev, config); if (!qdev) return -ENOMEM; - ret = init_pci(qdev, pdev); + ret = init_pci(qdev, pdev, config); if (ret) return ret; for (i = 0; i < qdev->num_dbc; ++i) - qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i); + qdev->dbc[i].dbc_base = qdev->bar_dbc + QAIC_DBC_OFF(i); mhi_irq = init_msi(qdev, pdev); if (mhi_irq < 0) @@ -539,8 +575,8 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return ret; - qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq, - qdev->single_msi); + qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_mhi, mhi_irq, + qdev->single_msi, config->family); if (IS_ERR(qdev->mhi_cntrl)) { ret = PTR_ERR(qdev->mhi_cntrl); qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); @@ -607,8 +643,9 @@ static struct mhi_driver qaic_mhi_driver = { }; static const struct pci_device_id qaic_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC080), }, - { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), }, + { PCI_DEVICE_DATA(QCOM, AIC080, (kernel_ulong_t)&aic080_config), }, + { PCI_DEVICE_DATA(QCOM, AIC100, (kernel_ulong_t)&aic100_config), }, + { PCI_DEVICE_DATA(QCOM, AIC200, (kernel_ulong_t)&aic200_config), }, { } }; MODULE_DEVICE_TABLE(pci, qaic_ids); diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c index 301f4462d51b..972833fabcfc 100644 --- a/drivers/accel/qaic/qaic_timesync.c +++ b/drivers/accel/qaic/qaic_timesync.c @@ -201,7 +201,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi goto free_sync_msg; /* Qtimer register pointer */ - mqtsdev->qtimer_addr = qdev->bar_0 + QTIMER_REG_OFFSET; + mqtsdev->qtimer_addr = qdev->bar_mhi + QTIMER_REG_OFFSET; timer_setup(timer, qaic_timesync_timer, 0); timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms); add_timer(timer); @@ -221,7 +221,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev) { struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); - del_timer_sync(&mqtsdev->timer); + timer_delete_sync(&mqtsdev->timer); mhi_unprepare_from_transfer(mqtsdev->mhi_dev); kfree(mqtsdev->sync_msg); kfree(mqtsdev); diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c index 21d58aed0deb..3ebcc1f7ff58 100644 --- a/drivers/accel/qaic/sahara.c +++ b/drivers/accel/qaic/sahara.c @@ -160,7 +160,7 @@ struct sahara_context { struct work_struct fw_work; struct work_struct dump_work; struct mhi_device *mhi_dev; - const char **image_table; + const char * const *image_table; u32 table_size; u32 active_image_id; const struct firmware *firmware; @@ -177,7 +177,7 @@ struct sahara_context { bool is_mem_dump_mode; }; -static const char *aic100_image_table[] = { +static const char * const aic100_image_table[] = { [1] = "qcom/aic100/fw1.bin", [2] = "qcom/aic100/fw2.bin", [4] = "qcom/aic100/fw4.bin", @@ -188,6 +188,34 @@ static const char *aic100_image_table[] = { [10] = "qcom/aic100/fw10.bin", }; +static const char * const aic200_image_table[] = { + [5] = "qcom/aic200/uefi.elf", + [12] = "qcom/aic200/aic200-nsp.bin", + [23] = "qcom/aic200/aop.mbn", + [32] = "qcom/aic200/tz.mbn", + [33] = "qcom/aic200/hypvm.mbn", + [39] = "qcom/aic200/aic200_abl.elf", + [40] = "qcom/aic200/apdp.mbn", + [41] = "qcom/aic200/devcfg.mbn", + [42] = "qcom/aic200/sec.elf", + [43] = "qcom/aic200/aic200-hlos.elf", + [49] = "qcom/aic200/shrm.elf", + [50] = "qcom/aic200/cpucp.elf", + [51] = "qcom/aic200/aop_devcfg.mbn", + [57] = "qcom/aic200/cpucp_dtbs.elf", + [62] = "qcom/aic200/uefi_dtbs.elf", + [63] = "qcom/aic200/xbl_ac_config.mbn", + [64] = "qcom/aic200/tz_ac_config.mbn", + [65] = "qcom/aic200/hyp_ac_config.mbn", + [66] = "qcom/aic200/pdp.elf", + [67] = "qcom/aic200/pdp_cdb.elf", + [68] = "qcom/aic200/sdi.mbn", + [69] = "qcom/aic200/dcd.mbn", + [73] = "qcom/aic200/gearvm.mbn", + [74] = "qcom/aic200/sti.bin", + [75] = "qcom/aic200/pvs.bin", +}; + static int sahara_find_image(struct sahara_context *context, u32 image_id) { int ret; @@ -748,8 +776,15 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_ context->mhi_dev = mhi_dev; INIT_WORK(&context->fw_work, sahara_processing); INIT_WORK(&context->dump_work, sahara_dump_processing); - context->image_table = aic100_image_table; - context->table_size = ARRAY_SIZE(aic100_image_table); + + if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) { + context->image_table = aic200_image_table; + context->table_size = ARRAY_SIZE(aic200_image_table); + } else { + context->image_table = aic100_image_table; + context->table_size = ARRAY_SIZE(aic100_image_table); + } + context->active_image_id = SAHARA_IMAGE_ID_NONE; dev_set_drvdata(&mhi_dev->dev, context); |