diff options
Diffstat (limited to 'drivers/accel')
122 files changed, 16514 insertions, 4490 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index 64065fb8922b..5b9490367a39 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -24,6 +24,7 @@ menuconfig DRM_ACCEL different device files, called accel/accel* (in /dev, sysfs and debugfs). +source "drivers/accel/amdxdna/Kconfig" source "drivers/accel/habanalabs/Kconfig" source "drivers/accel/ivpu/Kconfig" source "drivers/accel/qaic/Kconfig" diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index ab3df932937f..a301fb6089d4 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/ obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/ diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig new file mode 100644 index 000000000000..f39d7a87296c --- /dev/null +++ b/drivers/accel/amdxdna/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_AMDXDNA + tristate "AMD AI Engine" + depends on AMD_IOMMU + depends on DRM_ACCEL + depends on PCI && HAS_IOMEM + depends on X86_64 + select DRM_SCHED + select DRM_GEM_SHMEM_HELPER + select FW_LOADER + select HMM_MIRROR + help + Choose this option to enable support for NPU integrated into AMD + client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to + accelerate machine learning applications. + + If "M" is selected, the driver module will be amdxdna. diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile new file mode 100644 index 000000000000..0e9adf6890a0 --- /dev/null +++ b/drivers/accel/amdxdna/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only + +amdxdna-y := \ + aie2_ctx.o \ + aie2_error.o \ + aie2_message.o \ + aie2_pci.o \ + aie2_pm.o \ + aie2_psp.o \ + aie2_smu.o \ + aie2_solver.o \ + amdxdna_ctx.o \ + amdxdna_gem.o \ + amdxdna_mailbox.o \ + amdxdna_mailbox_helper.o \ + amdxdna_pci_drv.o \ + amdxdna_sysfs.o \ + npu1_regs.o \ + npu2_regs.o \ + npu4_regs.o \ + npu5_regs.o \ + npu6_regs.o +obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO new file mode 100644 index 000000000000..5119bccd1917 --- /dev/null +++ b/drivers/accel/amdxdna/TODO @@ -0,0 +1,3 @@ +- Add import and export BO support +- Add debugfs support +- Add debug BO support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c new file mode 100644 index 000000000000..5f43db02b240 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/drm_syncobj.h> +#include <linux/hmm.h> +#include <linux/types.h> +#include <linux/xarray.h> +#include <trace/events/amdxdna.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "aie2_solver.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +static bool force_cmdlist; +module_param(force_cmdlist, bool, 0600); +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); + +#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ + +static void aie2_job_release(struct kref *ref) +{ + struct amdxdna_sched_job *job; + + job = container_of(ref, struct amdxdna_sched_job, refcnt); + amdxdna_sched_job_cleanup(job); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); +} + +static void aie2_job_put(struct amdxdna_sched_job *job) +{ + kref_put(&job->refcnt, aie2_job_release); +} + +/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, + struct drm_sched_job *bad_job) +{ + drm_sched_stop(&hwctx->priv->sched, bad_job); + aie2_destroy_context(xdna->dev_handle, hwctx); +} + +static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_gem_obj *heap = hwctx->priv->heap; + int ret; + + ret = aie2_create_context(xdna->dev_handle, hwctx); + if (ret) { + XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); + goto out; + } + + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, + heap->mem.userptr, heap->mem.size); + if (ret) { + XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); + goto out; + } + + if (hwctx->status != HWCTX_STAT_READY) { + XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); + goto out; + } + + ret = aie2_config_cu(hwctx); + if (ret) { + XDNA_ERR(xdna, "Config cu failed, ret %d", ret); + goto out; + } + +out: + drm_sched_start(&hwctx->priv->sched, 0); + XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); + return ret; +} + +void aie2_restart_ctx(struct amdxdna_client *client) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + mutex_lock(&client->hwctx_lock); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + if (hwctx->status != HWCTX_STAT_STOP) + continue; + + hwctx->status = hwctx->old_status; + XDNA_DBG(xdna, "Resetting %s", hwctx->name); + aie2_hwctx_restart(xdna, hwctx); + } + mutex_unlock(&client->hwctx_lock); +} + +static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) +{ + struct dma_fence *fence, *out_fence = NULL; + int ret; + + fence = drm_syncobj_fence_get(hwctx->priv->syncobj); + if (!fence) + return NULL; + + ret = dma_fence_chain_find_seqno(&fence, seq); + if (ret) + goto out; + + out_fence = dma_fence_get(dma_fence_chain_contained(fence)); + +out: + dma_fence_put(fence); + return out_fence; +} + +static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) +{ + struct dma_fence *fence; + + fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); + if (!fence) + return; + + dma_fence_wait(fence, false); + dma_fence_put(fence); +} + +void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + /* + * Command timeout is unlikely. But if it happens, it doesn't + * break the system. aie2_hwctx_stop() will destroy mailbox + * and abort all commands. + */ + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + aie2_hwctx_wait_for_idle(hwctx); + aie2_hwctx_stop(xdna, hwctx, NULL); + hwctx->old_status = hwctx->status; + hwctx->status = HWCTX_STAT_STOP; +} + +void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + /* + * The resume path cannot guarantee that mailbox channel can be + * regenerated. If this happen, when submit message to this + * mailbox channel, error will return. + */ + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + hwctx->status = hwctx->old_status; + aie2_hwctx_restart(xdna, hwctx); +} + +static void +aie2_sched_notify(struct amdxdna_sched_job *job) +{ + struct dma_fence *fence = job->fence; + + trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); + job->hwctx->priv->completed++; + dma_fence_signal(fence); + + up(&job->hwctx->priv->job_sem); + job->job_done = true; + dma_fence_put(fence); + mmput_async(job->mm); + aie2_job_put(job); +} + +static int +aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + struct amdxdna_gem_obj *cmd_abo; + u32 ret = 0; + u32 status; + + cmd_abo = job->cmd_bo; + + if (unlikely(!data)) + goto out; + + if (unlikely(size != sizeof(u32))) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + + status = *data; + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + if (status == AIE2_STATUS_SUCCESS) + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); + else + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); + +out: + aie2_sched_notify(job); + return ret; +} + +static int +aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + u32 ret = 0; + u32 status; + + if (unlikely(!data)) + goto out; + + if (unlikely(size != sizeof(u32))) { + ret = -EINVAL; + goto out; + } + + status = *data; + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + +out: + aie2_sched_notify(job); + return ret; +} + +static int +aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + struct amdxdna_gem_obj *cmd_abo; + struct cmd_chain_resp *resp; + struct amdxdna_dev *xdna; + u32 fail_cmd_status; + u32 fail_cmd_idx; + u32 ret = 0; + + cmd_abo = job->cmd_bo; + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + + resp = (struct cmd_chain_resp *)data; + xdna = job->hwctx->client->xdna; + XDNA_DBG(xdna, "Status 0x%x", resp->status); + if (resp->status == AIE2_STATUS_SUCCESS) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); + goto out; + } + + /* Slow path to handle error, read from ringbuf on BAR */ + fail_cmd_idx = resp->fail_cmd_idx; + fail_cmd_status = resp->fail_cmd_status; + XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", + fail_cmd_idx, fail_cmd_status); + + if (fail_cmd_status == AIE2_STATUS_SUCCESS) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); + + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { + struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); + + cc->error_index = fail_cmd_idx; + if (cc->error_index >= cc->command_count) + cc->error_index = 0; + } +out: + aie2_sched_notify(job); + return ret; +} + +static struct dma_fence * +aie2_sched_job_run(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_hwctx *hwctx = job->hwctx; + struct dma_fence *fence; + int ret; + + if (!mmget_not_zero(job->mm)) + return ERR_PTR(-ESRCH); + + kref_get(&job->refcnt); + fence = dma_fence_get(job->fence); + + if (unlikely(!cmd_abo)) { + ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler); + goto out; + } + + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); + + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) + ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); + else if (force_cmdlist) + ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); + else + ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); + +out: + if (ret) { + dma_fence_put(job->fence); + aie2_job_put(job); + mmput(job->mm); + fence = ERR_PTR(ret); + } + trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); + + return fence; +} + +static void aie2_sched_job_free(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_hwctx *hwctx = job->hwctx; + + trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); + if (!job->job_done) + up(&hwctx->priv->job_sem); + + drm_sched_job_cleanup(sched_job); + aie2_job_put(job); +} + +static enum drm_gpu_sched_stat +aie2_sched_job_timedout(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_hwctx *hwctx = job->hwctx; + struct amdxdna_dev *xdna; + + xdna = hwctx->client->xdna; + trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); + mutex_lock(&xdna->dev_lock); + aie2_hwctx_stop(xdna, hwctx, sched_job); + + aie2_hwctx_restart(xdna, hwctx); + mutex_unlock(&xdna->dev_lock); + + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +const struct drm_sched_backend_ops sched_ops = { + .run_job = aie2_sched_job_run, + .free_job = aie2_sched_job_free, + .timedout_job = aie2_sched_job_timedout, +}; + +static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_dev_hdl *ndev; + int start, end, first, last; + u32 width = 1, entries = 0; + int i; + + if (!hwctx->num_tiles) { + XDNA_ERR(xdna, "Number of tiles is zero"); + return -EINVAL; + } + + ndev = xdna->dev_handle; + if (unlikely(!ndev->metadata.core.row_count)) { + XDNA_WARN(xdna, "Core tile row count is zero"); + return -EINVAL; + } + + hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count; + if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { + XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); + return -EINVAL; + } + + if (ndev->priv->col_align == COL_ALIGN_NATURE) + width = hwctx->num_col; + + /* + * In range [start, end], find out columns that is multiple of width. + * 'first' is the first column, + * 'last' is the last column, + * 'entries' is the total number of columns. + */ + start = xdna->dev_info->first_col; + end = ndev->total_col - hwctx->num_col; + if (start > 0 && end == 0) { + XDNA_DBG(xdna, "Force start from col 0"); + start = 0; + } + first = start + (width - start % width) % width; + last = end - end % width; + if (last >= first) + entries = (last - first) / width + 1; + XDNA_DBG(xdna, "start %d end %d first %d last %d", + start, end, first, last); + + if (unlikely(!entries)) { + XDNA_ERR(xdna, "Start %d end %d width %d", + start, end, width); + return -EINVAL; + } + + hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); + if (!hwctx->col_list) + return -ENOMEM; + + hwctx->col_list_len = entries; + hwctx->col_list[0] = first; + for (i = 1; i < entries; i++) + hwctx->col_list[i] = hwctx->col_list[i - 1] + width; + + print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, + entries * sizeof(*hwctx->col_list), false); + return 0; +} + +static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct alloc_requests *xrs_req; + int ret; + + xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); + if (!xrs_req) + return -ENOMEM; + + xrs_req->cdo.start_cols = hwctx->col_list; + xrs_req->cdo.cols_len = hwctx->col_list_len; + xrs_req->cdo.ncols = hwctx->num_col; + xrs_req->cdo.qos_cap.opc = hwctx->max_opc; + + xrs_req->rqos.gops = hwctx->qos.gops; + xrs_req->rqos.fps = hwctx->qos.fps; + xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; + xrs_req->rqos.latency = hwctx->qos.latency; + xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; + xrs_req->rqos.priority = hwctx->qos.priority; + + xrs_req->rid = (uintptr_t)hwctx; + + ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); + if (ret) + XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); + + kfree(xrs_req); + return ret; +} + +static void aie2_release_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + int ret; + + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); + if (ret) + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); +} + +static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct drm_file *filp = hwctx->client->filp; + struct drm_syncobj *syncobj; + u32 hdl; + int ret; + + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; + + ret = drm_syncobj_create(&syncobj, 0, NULL); + if (ret) { + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); + return ret; + } + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); + if (ret) { + drm_syncobj_put(syncobj); + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); + return ret; + } + hwctx->priv->syncobj = syncobj; + hwctx->syncobj_hdl = hdl; + + return 0; +} + +static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) +{ + /* + * The syncobj_hdl is owned by user space and will be cleaned up + * separately. + */ + drm_syncobj_put(hwctx->priv->syncobj); +} + +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct drm_gpu_scheduler *sched; + struct amdxdna_hwctx_priv *priv; + struct amdxdna_gem_obj *heap; + struct amdxdna_dev_hdl *ndev; + int i, ret; + + priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + hwctx->priv = priv; + + mutex_lock(&client->mm_lock); + heap = client->dev_heap; + if (!heap) { + XDNA_ERR(xdna, "The client dev heap object not exist"); + mutex_unlock(&client->mm_lock); + ret = -ENOENT; + goto free_priv; + } + drm_gem_object_get(to_gobj(heap)); + mutex_unlock(&client->mm_lock); + priv->heap = heap; + sema_init(&priv->job_sem, HWCTX_MAX_CMDS); + + ret = amdxdna_gem_pin(heap); + if (ret) { + XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); + goto put_heap; + } + + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { + struct amdxdna_gem_obj *abo; + struct amdxdna_drm_create_bo args = { + .flags = 0, + .type = AMDXDNA_BO_DEV, + .vaddr = 0, + .size = MAX_CHAIN_CMDBUF_SIZE, + }; + + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true); + if (IS_ERR(abo)) { + ret = PTR_ERR(abo); + goto free_cmd_bufs; + } + + XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", + i, abo->mem.dev_addr, abo->mem.size); + priv->cmd_buf[i] = abo; + } + + sched = &priv->sched; + mutex_init(&priv->io_lock); + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&priv->io_lock); + fs_reclaim_release(GFP_KERNEL); + + ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, + HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT), + NULL, NULL, hwctx->name, xdna->ddev.dev); + if (ret) { + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); + goto free_cmd_bufs; + } + + ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (ret) { + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); + goto free_sched; + } + + ret = aie2_hwctx_col_list(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create col list failed, ret %d", ret); + goto free_entity; + } + + ret = aie2_alloc_resource(hwctx); + if (ret) { + XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); + goto free_col_list; + } + + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, + heap->mem.userptr, heap->mem.size); + if (ret) { + XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); + goto release_resource; + } + + ret = aie2_ctx_syncobj_create(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); + goto release_resource; + } + + hwctx->status = HWCTX_STAT_INIT; + ndev = xdna->dev_handle; + ndev->hwctx_num++; + + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); + + return 0; + +release_resource: + aie2_release_resource(hwctx); +free_col_list: + kfree(hwctx->col_list); +free_entity: + drm_sched_entity_destroy(&priv->entity); +free_sched: + drm_sched_fini(&priv->sched); +free_cmd_bufs: + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { + if (!priv->cmd_buf[i]) + continue; + drm_gem_object_put(to_gobj(priv->cmd_buf[i])); + } + amdxdna_gem_unpin(heap); +put_heap: + drm_gem_object_put(to_gobj(heap)); +free_priv: + kfree(priv); + return ret; +} + +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev_hdl *ndev; + struct amdxdna_dev *xdna; + int idx; + + xdna = hwctx->client->xdna; + ndev = xdna->dev_handle; + ndev->hwctx_num--; + drm_sched_wqueue_stop(&hwctx->priv->sched); + + /* Now, scheduler will not send command to device. */ + aie2_release_resource(hwctx); + + /* + * All submitted commands are aborted. + * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() + * will return NODEV if it is called. + */ + drm_sched_wqueue_start(&hwctx->priv->sched); + + aie2_hwctx_wait_for_idle(hwctx); + drm_sched_entity_destroy(&hwctx->priv->entity); + drm_sched_fini(&hwctx->priv->sched); + aie2_ctx_syncobj_destroy(hwctx); + + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); + + for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) + drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); + amdxdna_gem_unpin(hwctx->priv->heap); + drm_gem_object_put(to_gobj(hwctx->priv->heap)); + + mutex_destroy(&hwctx->priv->io_lock); + kfree(hwctx->col_list); + kfree(hwctx->priv); + kfree(hwctx->cus); +} + +static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) +{ + struct amdxdna_hwctx_param_config_cu *config = buf; + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 total_size; + int ret; + + XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); + if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) + return -EINVAL; + + if (hwctx->status != HWCTX_STAT_INIT) { + XDNA_ERR(xdna, "Not support re-config CU"); + return -EINVAL; + } + + if (!config->num_cus) { + XDNA_ERR(xdna, "Number of CU is zero"); + return -EINVAL; + } + + total_size = struct_size(config, cu_configs, config->num_cus); + if (total_size > size) { + XDNA_ERR(xdna, "CU config larger than size"); + return -EINVAL; + } + + hwctx->cus = kmemdup(config, total_size, GFP_KERNEL); + if (!hwctx->cus) + return -ENOMEM; + + ret = aie2_config_cu(hwctx); + if (ret) { + XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); + goto free_cus; + } + + wmb(); /* To avoid locking in command submit when check status */ + hwctx->status = HWCTX_STAT_READY; + + return 0; + +free_cus: + kfree(hwctx->cus); + hwctx->cus = NULL; + return ret; +} + +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + switch (type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + return aie2_hwctx_cu_config(hwctx, buf, size); + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + return -EOPNOTSUPP; + default: + XDNA_DBG(xdna, "Not supported type %d", type); + return -EOPNOTSUPP; + } +} + +static int aie2_populate_range(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct mm_struct *mm = abo->mem.notifier.mm; + struct hmm_range range = { 0 }; + unsigned long timeout; + int ret; + + XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", + abo->mem.userptr, abo->mem.size); + range.notifier = &abo->mem.notifier; + range.start = abo->mem.userptr; + range.end = abo->mem.userptr + abo->mem.size; + range.hmm_pfns = abo->mem.pfns; + range.default_flags = HMM_PFN_REQ_FAULT; + + if (!mmget_not_zero(mm)) + return -EFAULT; + + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); +again: + range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); + mmap_read_lock(mm); + ret = hmm_range_fault(&range); + mmap_read_unlock(mm); + if (ret) { + if (time_after(jiffies, timeout)) { + ret = -ETIME; + goto put_mm; + } + + if (ret == -EBUSY) + goto again; + + goto put_mm; + } + + down_read(&xdna->notifier_lock); + if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { + up_read(&xdna->notifier_lock); + goto again; + } + abo->mem.map_invalid = false; + up_read(&xdna->notifier_lock); + +put_mm: + mmput(mm); + return ret; +} + +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct ww_acquire_ctx acquire_ctx; + struct dma_fence_chain *chain; + struct amdxdna_gem_obj *abo; + unsigned long timeout = 0; + int ret, i; + + ret = down_interruptible(&hwctx->priv->job_sem); + if (ret) { + XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); + return ret; + } + + chain = dma_fence_chain_alloc(); + if (!chain) { + XDNA_ERR(xdna, "Alloc fence chain failed"); + ret = -ENOMEM; + goto up_sem; + } + + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); + if (ret) { + XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); + goto free_chain; + } + +retry: + ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + if (ret) { + XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); + goto cleanup_job; + } + + for (i = 0; i < job->bo_cnt; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); + if (ret) { + XDNA_WARN(xdna, "Failed to reserve fences %d", ret); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + goto cleanup_job; + } + } + + down_read(&xdna->notifier_lock); + for (i = 0; i < job->bo_cnt; i++) { + abo = to_xdna_obj(job->bos[i]); + if (abo->mem.map_invalid) { + up_read(&xdna->notifier_lock); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + if (!timeout) { + timeout = jiffies + + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + } else if (time_after(jiffies, timeout)) { + ret = -ETIME; + goto cleanup_job; + } + + ret = aie2_populate_range(abo); + if (ret) + goto cleanup_job; + goto retry; + } + } + + mutex_lock(&hwctx->priv->io_lock); + drm_sched_job_arm(&job->base); + job->out_fence = dma_fence_get(&job->base.s_fence->finished); + for (i = 0; i < job->bo_cnt; i++) + dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); + job->seq = hwctx->priv->seq++; + kref_get(&job->refcnt); + drm_sched_entity_push_job(&job->base); + + *seq = job->seq; + drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); + mutex_unlock(&hwctx->priv->io_lock); + + up_read(&xdna->notifier_lock); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + + aie2_job_put(job); + + return 0; + +cleanup_job: + drm_sched_job_cleanup(&job->base); +free_chain: + dma_fence_chain_free(chain); +up_sem: + up(&hwctx->priv->job_sem); + job->job_done = true; + return ret; +} + +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, + unsigned long cur_seq) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct drm_gem_object *gobj = to_gobj(abo); + long ret; + + down_write(&xdna->notifier_lock); + abo->mem.map_invalid = true; + mmu_interval_set_seq(&abo->mem.notifier, cur_seq); + up_write(&xdna->notifier_lock); + ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, + true, MAX_SCHEDULE_TIMEOUT); + if (!ret || ret == -ERESTARTSYS) + XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); +} diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c new file mode 100644 index 000000000000..b1defaa8513b --- /dev/null +++ b/drivers/accel/amdxdna/aie2_error.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/kernel.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +struct async_event { + struct amdxdna_dev_hdl *ndev; + struct async_event_msg_resp resp; + struct workqueue_struct *wq; + struct work_struct work; + u8 *buf; + dma_addr_t addr; + u32 size; +}; + +struct async_events { + struct workqueue_struct *wq; + u8 *buf; + dma_addr_t addr; + u32 size; + u32 event_cnt; + struct async_event event[] __counted_by(event_cnt); +}; + +/* + * Below enum, struct and lookup tables are porting from XAIE util header file. + * + * Below data is defined by AIE device and it is used for decode error message + * from the device. + */ + +enum aie_module_type { + AIE_MEM_MOD = 0, + AIE_CORE_MOD, + AIE_PL_MOD, +}; + +enum aie_error_category { + AIE_ERROR_SATURATION = 0, + AIE_ERROR_FP, + AIE_ERROR_STREAM, + AIE_ERROR_ACCESS, + AIE_ERROR_BUS, + AIE_ERROR_INSTRUCTION, + AIE_ERROR_ECC, + AIE_ERROR_LOCK, + AIE_ERROR_DMA, + AIE_ERROR_MEM_PARITY, + /* Unknown is not from XAIE, added for better category */ + AIE_ERROR_UNKNOWN, +}; + +/* Don't pack, unless XAIE side changed */ +struct aie_error { + __u8 row; + __u8 col; + __u32 mod_type; + __u8 event_id; +}; + +struct aie_err_info { + u32 err_cnt; + u32 ret_code; + u32 rsvd; + struct aie_error payload[] __counted_by(err_cnt); +}; + +struct aie_event_category { + u8 event_id; + enum aie_error_category category; +}; + +#define EVENT_CATEGORY(id, cat) { id, cat } +static const struct aie_event_category aie_ml_mem_event_cat[] = { + EVENT_CATEGORY(88U, AIE_ERROR_ECC), + EVENT_CATEGORY(90U, AIE_ERROR_ECC), + EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(97U, AIE_ERROR_DMA), + EVENT_CATEGORY(98U, AIE_ERROR_DMA), + EVENT_CATEGORY(99U, AIE_ERROR_DMA), + EVENT_CATEGORY(100U, AIE_ERROR_DMA), + EVENT_CATEGORY(101U, AIE_ERROR_LOCK), +}; + +static const struct aie_event_category aie_ml_core_event_cat[] = { + EVENT_CATEGORY(55U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(56U, AIE_ERROR_STREAM), + EVENT_CATEGORY(57U, AIE_ERROR_STREAM), + EVENT_CATEGORY(58U, AIE_ERROR_BUS), + EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION), + EVENT_CATEGORY(60U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(62U, AIE_ERROR_ECC), + EVENT_CATEGORY(64U, AIE_ERROR_ECC), + EVENT_CATEGORY(65U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(66U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(67U, AIE_ERROR_LOCK), + EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION), + EVENT_CATEGORY(71U, AIE_ERROR_STREAM), + EVENT_CATEGORY(72U, AIE_ERROR_BUS), +}; + +static const struct aie_event_category aie_ml_mem_tile_event_cat[] = { + EVENT_CATEGORY(130U, AIE_ERROR_ECC), + EVENT_CATEGORY(132U, AIE_ERROR_ECC), + EVENT_CATEGORY(133U, AIE_ERROR_DMA), + EVENT_CATEGORY(134U, AIE_ERROR_DMA), + EVENT_CATEGORY(135U, AIE_ERROR_STREAM), + EVENT_CATEGORY(136U, AIE_ERROR_STREAM), + EVENT_CATEGORY(137U, AIE_ERROR_STREAM), + EVENT_CATEGORY(138U, AIE_ERROR_BUS), + EVENT_CATEGORY(139U, AIE_ERROR_LOCK), +}; + +static const struct aie_event_category aie_ml_shim_tile_event_cat[] = { + EVENT_CATEGORY(64U, AIE_ERROR_BUS), + EVENT_CATEGORY(65U, AIE_ERROR_STREAM), + EVENT_CATEGORY(66U, AIE_ERROR_STREAM), + EVENT_CATEGORY(67U, AIE_ERROR_BUS), + EVENT_CATEGORY(68U, AIE_ERROR_BUS), + EVENT_CATEGORY(69U, AIE_ERROR_BUS), + EVENT_CATEGORY(70U, AIE_ERROR_BUS), + EVENT_CATEGORY(71U, AIE_ERROR_BUS), + EVENT_CATEGORY(72U, AIE_ERROR_DMA), + EVENT_CATEGORY(73U, AIE_ERROR_DMA), + EVENT_CATEGORY(74U, AIE_ERROR_LOCK), +}; + +static enum aie_error_category +aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) +{ + const struct aie_event_category *lut; + int num_entry; + int i; + + switch (mod_type) { + case AIE_PL_MOD: + lut = aie_ml_shim_tile_event_cat; + num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat); + break; + case AIE_CORE_MOD: + lut = aie_ml_core_event_cat; + num_entry = ARRAY_SIZE(aie_ml_core_event_cat); + break; + case AIE_MEM_MOD: + if (row == 1) { + lut = aie_ml_mem_tile_event_cat; + num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat); + } else { + lut = aie_ml_mem_event_cat; + num_entry = ARRAY_SIZE(aie_ml_mem_event_cat); + } + break; + default: + return AIE_ERROR_UNKNOWN; + } + + for (i = 0; i < num_entry; i++) { + if (event_id != lut[i].event_id) + continue; + + return lut[i].category; + } + + return AIE_ERROR_UNKNOWN; +} + +static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) +{ + struct aie_error *errs = err_info; + u32 err_col = 0; /* assume that AIE has less than 32 columns */ + int i; + + /* Get err column bitmap */ + for (i = 0; i < num_err; i++) { + struct aie_error *err = &errs[i]; + enum aie_error_category cat; + + cat = aie_get_error_category(err->row, err->event_id, err->mod_type); + XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d", + err->row, err->col, err->mod_type, + err->event_id, cat); + + if (err->col >= 32) { + XDNA_WARN(ndev->xdna, "Invalid column number"); + break; + } + + err_col |= (1 << err->col); + } + + return err_col; +} + +static int aie2_error_async_cb(void *handle, const u32 *data, size_t size) +{ + struct async_event_msg_resp *resp; + struct async_event *e = handle; + + if (data) { + resp = (struct async_event_msg_resp *)data; + e->resp.type = resp->type; + wmb(); /* Update status in the end, so that no lock for here */ + e->resp.status = resp->status; + } + queue_work(e->wq, &e->work); + return 0; +} + +static int aie2_error_event_send(struct async_event *e) +{ + drm_clflush_virt_range(e->buf, e->size); /* device can access */ + return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e, + aie2_error_async_cb); +} + +static void aie2_error_worker(struct work_struct *err_work) +{ + struct aie_err_info *info; + struct amdxdna_dev *xdna; + struct async_event *e; + u32 max_err; + u32 err_col; + + e = container_of(err_work, struct async_event, work); + + xdna = e->ndev->xdna; + + if (e->resp.status == MAX_AIE2_STATUS_CODE) + return; + + e->resp.status = MAX_AIE2_STATUS_CODE; + + print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4, + e->buf, 0x100, false); + + info = (struct aie_err_info *)e->buf; + XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code); + + max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error); + if (unlikely(info->err_cnt > max_err)) { + WARN_ONCE(1, "Error count too large %d\n", info->err_cnt); + return; + } + err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt); + if (!err_col) { + XDNA_WARN(xdna, "Did not get error column"); + return; + } + + mutex_lock(&xdna->dev_lock); + /* Re-sent this event to firmware */ + if (aie2_error_event_send(e)) + XDNA_WARN(xdna, "Unable to register async event"); + mutex_unlock(&xdna->dev_lock); +} + +int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct async_event *e; + int i, ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + for (i = 0; i < ndev->async_events->event_cnt; i++) { + e = &ndev->async_events->event[i]; + ret = aie2_error_event_send(e); + if (ret) + return ret; + } + + return 0; +} + +void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct async_events *events; + + events = ndev->async_events; + + mutex_unlock(&xdna->dev_lock); + destroy_workqueue(events->wq); + mutex_lock(&xdna->dev_lock); + + dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, + events->addr, DMA_FROM_DEVICE); + kfree(events); +} + +int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + u32 total_col = ndev->total_col; + u32 total_size = ASYNC_BUF_SIZE * total_col; + struct async_events *events; + int i, ret; + + events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL); + if (!events) + return -ENOMEM; + + events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!events->buf) { + ret = -ENOMEM; + goto free_events; + } + events->size = total_size; + events->event_cnt = total_col; + + events->wq = alloc_ordered_workqueue("async_wq", 0); + if (!events->wq) { + ret = -ENOMEM; + goto free_buf; + } + + for (i = 0; i < events->event_cnt; i++) { + struct async_event *e = &events->event[i]; + u32 offset = i * ASYNC_BUF_SIZE; + + e->ndev = ndev; + e->wq = events->wq; + e->buf = &events->buf[offset]; + e->addr = events->addr + offset; + e->size = ASYNC_BUF_SIZE; + e->resp.status = MAX_AIE2_STATUS_CODE; + INIT_WORK(&e->work, aie2_error_worker); + } + + ndev->async_events = events; + + XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x", + events->event_cnt, events->size); + return 0; + +free_buf: + dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, + events->addr, DMA_FROM_DEVICE); +free_events: + kfree(events); + return ret; +} diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c new file mode 100644 index 000000000000..9e2c9a44f76a --- /dev/null +++ b/drivers/accel/amdxdna/aie2_message.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/bitfield.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/xarray.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_mailbox_helper.h" +#include "amdxdna_pci_drv.h" + +#define DECLARE_AIE2_MSG(name, op) \ + DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) + +static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, + struct xdna_mailbox_msg *msg) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct xdna_notify *hdl = msg->handle; + int ret; + + if (!ndev->mgmt_chann) + return -ENODEV; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); + if (ret == -ETIME) { + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; + } + + if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) { + XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", + msg->opcode, *hdl->data); + ret = -EINVAL; + } + + return ret; +} + +int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) +{ + DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND); + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) +{ + DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME); + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value) +{ + DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG); + int ret; + + req.type = type; + req.value = value; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret); + return ret; + } + + return 0; +} + +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) +{ + DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG); + int ret; + + req.type = type; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret); + return ret; + } + + *value = resp.value; + return 0; +} + +int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid) +{ + DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID); + + req.pasid = pasid; + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version) +{ + DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed", + resp.major, resp.minor); + + version->major = resp.major; + version->minor = resp.minor; + + return 0; +} + +int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata) +{ + DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO); + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + metadata->size = resp.info.size; + metadata->cols = resp.info.cols; + metadata->rows = resp.info.rows; + + metadata->version.major = resp.info.major; + metadata->version.minor = resp.info.minor; + + metadata->core.row_count = resp.info.core_rows; + metadata->core.row_start = resp.info.core_row_start; + metadata->core.dma_channel_count = resp.info.core_dma_channels; + metadata->core.lock_count = resp.info.core_locks; + metadata->core.event_reg_count = resp.info.core_events; + + metadata->mem.row_count = resp.info.mem_rows; + metadata->mem.row_start = resp.info.mem_row_start; + metadata->mem.dma_channel_count = resp.info.mem_dma_channels; + metadata->mem.lock_count = resp.info.mem_locks; + metadata->mem.event_reg_count = resp.info.mem_events; + + metadata->shim.row_count = resp.info.shim_rows; + metadata->shim.row_start = resp.info.shim_row_start; + metadata->shim.dma_channel_count = resp.info.shim_dma_channels; + metadata->shim.lock_count = resp.info.shim_locks; + metadata->shim.event_reg_count = resp.info.shim_events; + + return 0; +} + +int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, + struct amdxdna_fw_ver *fw_ver) +{ + DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION); + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + fw_ver->major = resp.major; + fw_ver->minor = resp.minor; + fw_ver->sub = resp.sub; + fw_ver->build = resp.build; + + return 0; +} + +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + struct xdna_mailbox_chann_res x2i; + struct xdna_mailbox_chann_res i2x; + struct cq_pair *cq_pair; + u32 intr_reg; + int ret; + + req.aie_type = 1; + req.start_col = hwctx->start_col; + req.num_col = hwctx->num_col; + req.num_cq_pairs_requested = 1; + req.pasid = hwctx->client->pasid; + req.context_priority = 2; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + hwctx->fw_ctx_id = resp.context_id; + WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + + cq_pair = &resp.cq_pair[0]; + x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); + x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); + x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); + x2i.rb_size = cq_pair->x2i_q.buf_size; + + i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); + i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); + i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); + i2x.rb_size = cq_pair->i2x_q.buf_size; + + ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); + if (ret == -EINVAL) { + XDNA_ERR(xdna, "not able to create channel"); + goto out_destroy_context; + } + + intr_reg = i2x.mb_head_ptr_reg + 4; + hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, + intr_reg, ret); + if (!hwctx->priv->mbox_chann) { + XDNA_ERR(xdna, "not able to create channel"); + ret = -EINVAL; + goto out_destroy_context; + } + + XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", + hwctx->name, ret, resp.msix_id); + XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name, + hwctx->fw_ctx_id, hwctx->client->pasid); + + return 0; + +out_destroy_context: + aie2_destroy_context(ndev, hwctx); + return ret; +} + +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + if (hwctx->fw_ctx_id == -1) + return 0; + + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); + + req.context_id = hwctx->fw_ctx_id; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret); + + xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, + hwctx->fw_ctx_id); + hwctx->priv->mbox_chann = NULL; + hwctx->fw_ctx_id = -1; + + return ret; +} + +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) +{ + DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + req.context_id = context_id; + req.buf_addr = addr; + req.buf_size = size; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx", + context_id, addr, size); + + return 0; +} + +int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, + u32 size, u32 *cols_filled) +{ + DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS); + struct amdxdna_dev *xdna = ndev->xdna; + struct amdxdna_client *client; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + dma_addr_t dma_addr; + u32 aie_bitmap = 0; + u8 *buff_addr; + int ret, idx; + + buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!buff_addr) + return -ENOMEM; + + /* Go through each hardware context and mark the AIE columns that are active */ + list_for_each_entry(client, &xdna->client_list, node) { + idx = srcu_read_lock(&client->hwctx_srcu); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + aie_bitmap |= amdxdna_hwctx_col_map(hwctx); + srcu_read_unlock(&client->hwctx_srcu, idx); + } + + *cols_filled = 0; + req.dump_buff_addr = dma_addr; + req.dump_buff_size = size; + req.num_cols = hweight32(aie_bitmap); + req.aie_bitmap = aie_bitmap; + + drm_clflush_virt_range(buff_addr, size); /* device can access */ + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(xdna, "Error during NPU query, status %d", ret); + goto fail; + } + + if (resp.status != AIE2_STATUS_SUCCESS) { + XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status); + ret = -EINVAL; + goto fail; + } + XDNA_DBG(xdna, "Query NPU status completed"); + + if (size < resp.size) { + ret = -EINVAL; + XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); + goto fail; + } + + if (copy_to_user(buf, buff_addr, resp.size)) { + ret = -EFAULT; + XDNA_ERR(xdna, "Failed to copy NPU status to user space"); + goto fail; + } + + *cols_filled = aie_bitmap; + +fail: + dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE); + return ret; +} + +int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, + void *handle, int (*cb)(void*, const u32 *, size_t)) +{ + struct async_event_msg_req req = { 0 }; + struct xdna_mailbox_msg msg = { + .send_data = (u8 *)&req, + .send_size = sizeof(req), + .handle = handle, + .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG, + .notify_cb = cb, + }; + + req.buf_addr = addr; + req.buf_size = size; + + XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size); + return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT); +} + +int aie2_config_cu(struct amdxdna_hwctx *hwctx) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 shift = xdna->dev_info->dev_mem_buf_shift; + DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU); + struct drm_gem_object *gobj; + struct amdxdna_gem_obj *abo; + int ret, i; + + if (!chann) + return -ENODEV; + + if (hwctx->cus->num_cus > MAX_NUM_CUS) { + XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS); + return -EINVAL; + } + + for (i = 0; i < hwctx->cus->num_cus; i++) { + struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i]; + + if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad))) + return -EINVAL; + + gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo); + if (!gobj) { + XDNA_ERR(xdna, "Lookup GEM object failed"); + return -EINVAL; + } + abo = to_xdna_obj(gobj); + + if (abo->type != AMDXDNA_BO_DEV) { + drm_gem_object_put(gobj); + XDNA_ERR(xdna, "Invalid BO type"); + return -EINVAL; + } + + req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR, + abo->mem.dev_addr >> shift); + req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func); + XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i, + abo->mem.dev_addr, req.cfgs[i]); + drm_gem_object_put(gobj); + } + req.num_cus = hwctx->cus->num_cus; + + ret = xdna_send_msg_wait(xdna, chann, &msg); + if (ret == -ETIME) + aie2_destroy_context(xdna->dev_handle, hwctx); + + if (resp.status == AIE2_STATUS_SUCCESS) { + XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret); + return 0; + } + + XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d", + msg.opcode, resp.status, ret); + return ret; +} + +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + union { + struct execute_buffer_req ebuf; + struct exec_dpu_req dpu; + } req; + struct xdna_mailbox_msg msg; + u32 payload_len; + void *payload; + int cu_idx; + int ret; + u32 op; + + if (!chann) + return -ENODEV; + + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); + if (!payload) { + XDNA_ERR(xdna, "Invalid command, cannot get payload"); + return -EINVAL; + } + + cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); + if (cu_idx < 0) { + XDNA_DBG(xdna, "Invalid cu idx"); + return -EINVAL; + } + + op = amdxdna_cmd_get_op(cmd_abo); + switch (op) { + case ERT_START_CU: + if (unlikely(payload_len > sizeof(req.ebuf.payload))) + XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); + req.ebuf.cu_idx = cu_idx; + memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); + msg.send_size = sizeof(req.ebuf); + msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; + break; + case ERT_START_NPU: { + struct amdxdna_cmd_start_npu *sn = payload; + + if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) + XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); + req.dpu.inst_buf_addr = sn->buffer; + req.dpu.inst_size = sn->buffer_size; + req.dpu.inst_prop_cnt = sn->prop_count; + req.dpu.cu_idx = cu_idx; + memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); + msg.send_size = sizeof(req.dpu); + msg.opcode = MSG_OP_EXEC_DPU; + break; + } + default: + XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); + return -EINVAL; + } + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, + 0x40, false); + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; + } + + return 0; +} + +static int +aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, + struct amdxdna_gem_obj *abo, u32 *size) +{ + struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; + int cu_idx = amdxdna_cmd_get_cu_idx(abo); + u32 payload_len; + void *payload; + + if (cu_idx < 0) + return -EINVAL; + + payload = amdxdna_cmd_get_payload(abo, &payload_len); + if (!payload) + return -EINVAL; + + if (!slot_cf_has_space(offset, payload_len)) + return -ENOSPC; + + buf->cu_idx = cu_idx; + buf->arg_cnt = payload_len / sizeof(u32); + memcpy(buf->args, payload, payload_len); + /* Accurate buf size to hint firmware to do necessary copy */ + *size = sizeof(*buf) + payload_len; + return 0; +} + +static int +aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, + struct amdxdna_gem_obj *abo, u32 *size) +{ + struct cmd_chain_slot_dpu *buf = cmd_buf + offset; + int cu_idx = amdxdna_cmd_get_cu_idx(abo); + struct amdxdna_cmd_start_npu *sn; + u32 payload_len; + void *payload; + u32 arg_sz; + + if (cu_idx < 0) + return -EINVAL; + + payload = amdxdna_cmd_get_payload(abo, &payload_len); + if (!payload) + return -EINVAL; + sn = payload; + arg_sz = payload_len - sizeof(*sn); + if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) + return -EINVAL; + + if (!slot_dpu_has_space(offset, arg_sz)) + return -ENOSPC; + + buf->inst_buf_addr = sn->buffer; + buf->inst_size = sn->buffer_size; + buf->inst_prop_cnt = sn->prop_count; + buf->cu_idx = cu_idx; + buf->arg_cnt = arg_sz / sizeof(u32); + memcpy(buf->args, sn->prop_args, arg_sz); + + /* Accurate buf size to hint firmware to do necessary copy */ + *size += sizeof(*buf) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, + struct amdxdna_gem_obj *abo, u32 *size) +{ + u32 this_op = amdxdna_cmd_get_op(abo); + void *cmd_buf = cmdbuf_abo->mem.kva; + int ret; + + if (this_op != op) { + ret = -EINVAL; + goto done; + } + + switch (op) { + case ERT_START_CU: + ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); + break; + case ERT_START_NPU: + ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); + break; + default: + ret = -EOPNOTSUPP; + } + +done: + if (ret) { + XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", + op, ret); + } + return ret; +} + +static inline struct amdxdna_gem_obj * +aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) +{ + int idx = get_job_idx(job->seq); + + return job->hwctx->priv->cmd_buf[idx]; +} + +static void +aie2_cmdlist_prepare_request(struct cmd_chain_req *req, + struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) +{ + req->buf_addr = cmdbuf_abo->mem.dev_addr; + req->buf_size = size; + req->count = cnt; + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); + XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", + req->buf_addr, size, cnt); +} + +static inline u32 +aie2_cmd_op_to_msg_op(u32 op) +{ + switch (op) { + case ERT_START_CU: + return MSG_OP_CHAIN_EXEC_BUFFER_CF; + case ERT_START_NPU: + return MSG_OP_CHAIN_EXEC_DPU; + default: + return MSG_OP_MAX_OPCODE; + } +} + +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)) +{ + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_client *client = hwctx->client; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_cmd_chain *payload; + struct xdna_mailbox_msg msg; + struct cmd_chain_req req; + u32 payload_len; + u32 offset = 0; + u32 size; + int ret; + u32 op; + u32 i; + + op = amdxdna_cmd_get_op(cmd_abo); + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); + if (op != ERT_CMD_CHAIN || !payload || + payload_len < struct_size(payload, data, payload->command_count)) + return -EINVAL; + + for (i = 0; i < payload->command_count; i++) { + u32 boh = (u32)(payload->data[i]); + struct amdxdna_gem_obj *abo; + + abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); + if (!abo) { + XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); + return -ENOENT; + } + + /* All sub-cmd should have same op, use the first one. */ + if (i == 0) + op = amdxdna_cmd_get_op(abo); + + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); + amdxdna_gem_put_obj(abo); + if (ret) + return -EINVAL; + + offset += size; + } + + /* The offset is the accumulated total size of the cmd buffer */ + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); + + msg.opcode = aie2_cmd_op_to_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(hwctx->client->xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)) +{ + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct xdna_mailbox_msg msg; + struct cmd_chain_req req; + u32 size; + int ret; + u32 op; + + op = amdxdna_cmd_get_op(cmd_abo); + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); + if (ret) + return ret; + + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); + + msg.opcode = aie2_cmd_op_to_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(hwctx->client->xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct xdna_mailbox_msg msg; + struct sync_bo_req req; + int ret = 0; + + req.src_addr = 0; + req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr; + req.size = abo->mem.size; + + /* Device to Host */ + req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | + FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); + + XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", + req.size, req.src_addr, req.dst_addr); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.opcode = MSG_OP_SYNC_BO; + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; + } + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h new file mode 100644 index 000000000000..4e02e744b470 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -0,0 +1,370 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_MSG_PRIV_H_ +#define _AIE2_MSG_PRIV_H_ + +enum aie2_msg_opcode { + MSG_OP_CREATE_CONTEXT = 0x2, + MSG_OP_DESTROY_CONTEXT = 0x3, + MSG_OP_SYNC_BO = 0x7, + MSG_OP_EXECUTE_BUFFER_CF = 0xC, + MSG_OP_QUERY_COL_STATUS = 0xD, + MSG_OP_QUERY_AIE_TILE_INFO = 0xE, + MSG_OP_QUERY_AIE_VERSION = 0xF, + MSG_OP_EXEC_DPU = 0x10, + MSG_OP_CONFIG_CU = 0x11, + MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12, + MSG_OP_CHAIN_EXEC_DPU = 0x13, + MSG_OP_MAX_XRT_OPCODE, + MSG_OP_SUSPEND = 0x101, + MSG_OP_RESUME = 0x102, + MSG_OP_ASSIGN_MGMT_PASID = 0x103, + MSG_OP_INVOKE_SELF_TEST = 0x104, + MSG_OP_MAP_HOST_BUFFER = 0x106, + MSG_OP_GET_FIRMWARE_VERSION = 0x108, + MSG_OP_SET_RUNTIME_CONFIG = 0x10A, + MSG_OP_GET_RUNTIME_CONFIG = 0x10B, + MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C, + MSG_OP_MAX_DRV_OPCODE, + MSG_OP_GET_PROTOCOL_VERSION = 0x301, + MSG_OP_MAX_OPCODE +}; + +enum aie2_msg_status { + AIE2_STATUS_SUCCESS = 0x0, + /* AIE Error codes */ + AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001, + AIE2_STATUS_AIE_FP_ERROR = 0x1000002, + AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003, + AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004, + AIE2_STATUS_AIE_BUS_ERROR = 0x1000005, + AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006, + AIE2_STATUS_AIE_ECC_ERROR = 0x1000007, + AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008, + AIE2_STATUS_AIE_DMA_ERROR = 0x1000009, + AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a, + AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b, + AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c, + AIE2_STATUS_MAX_AIE_STATUS_CODE, + /* MGMT ERT Error codes */ + AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001, + AIE2_STATUS_MGMT_ERT_HASH_MISMATCH, + AIE2_STATUS_MGMT_ERT_NOAVAIL, + AIE2_STATUS_MGMT_ERT_INVALID_PARAM, + AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE, + AIE2_STATUS_MGMT_ERT_BUSY, + AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE, + MAX_MGMT_ERT_STATUS_CODE, + /* APP ERT Error codes */ + AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001, + AIE2_STATUS_APP_INVALID_INSTR, + AIE2_STATUS_APP_LOAD_PDI_FAIL, + MAX_APP_ERT_STATUS_CODE, + /* NPU RTOS Error Codes */ + AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001, + AIE2_STATUS_INVALID_COMMAND, + AIE2_STATUS_INVALID_PARAM, + AIE2_STATUS_INVALID_OPERATION = 0x4000006, + AIE2_STATUS_ASYNC_EVENT_MSGS_FULL, + AIE2_STATUS_MAX_RTOS_STATUS_CODE, + MAX_AIE2_STATUS_CODE +}; + +struct assign_mgmt_pasid_req { + __u16 pasid; + __u16 reserved; +} __packed; + +struct assign_mgmt_pasid_resp { + enum aie2_msg_status status; +} __packed; + +struct map_host_buffer_req { + __u32 context_id; + __u64 buf_addr; + __u64 buf_size; +} __packed; + +struct map_host_buffer_resp { + enum aie2_msg_status status; +} __packed; + +#define MAX_CQ_PAIRS 2 +struct cq_info { + __u32 head_addr; + __u32 tail_addr; + __u32 buf_addr; + __u32 buf_size; +}; + +struct cq_pair { + struct cq_info x2i_q; + struct cq_info i2x_q; +}; + +struct create_ctx_req { + __u32 aie_type; + __u8 start_col; + __u8 num_col; + __u16 reserved; + __u8 num_cq_pairs_requested; + __u8 reserved1; + __u16 pasid; + __u32 pad[2]; + __u32 sec_comm_target_type; + __u32 context_priority; +} __packed; + +struct create_ctx_resp { + enum aie2_msg_status status; + __u32 context_id; + __u16 msix_id; + __u8 num_cq_pairs_allocated; + __u8 reserved; + struct cq_pair cq_pair[MAX_CQ_PAIRS]; +} __packed; + +struct destroy_ctx_req { + __u32 context_id; +} __packed; + +struct destroy_ctx_resp { + enum aie2_msg_status status; +} __packed; + +struct execute_buffer_req { + __u32 cu_idx; + __u32 payload[19]; +} __packed; + +struct exec_dpu_req { + __u64 inst_buf_addr; + __u32 inst_size; + __u32 inst_prop_cnt; + __u32 cu_idx; + __u32 payload[35]; +} __packed; + +struct execute_buffer_resp { + enum aie2_msg_status status; +} __packed; + +struct aie_tile_info { + __u32 size; + __u16 major; + __u16 minor; + __u16 cols; + __u16 rows; + __u16 core_rows; + __u16 mem_rows; + __u16 shim_rows; + __u16 core_row_start; + __u16 mem_row_start; + __u16 shim_row_start; + __u16 core_dma_channels; + __u16 mem_dma_channels; + __u16 shim_dma_channels; + __u16 core_locks; + __u16 mem_locks; + __u16 shim_locks; + __u16 core_events; + __u16 mem_events; + __u16 shim_events; + __u16 reserved; +}; + +struct aie_tile_info_req { + __u32 reserved; +} __packed; + +struct aie_tile_info_resp { + enum aie2_msg_status status; + struct aie_tile_info info; +} __packed; + +struct aie_version_info_req { + __u32 reserved; +} __packed; + +struct aie_version_info_resp { + enum aie2_msg_status status; + __u16 major; + __u16 minor; +} __packed; + +struct aie_column_info_req { + __u64 dump_buff_addr; + __u32 dump_buff_size; + __u32 num_cols; + __u32 aie_bitmap; +} __packed; + +struct aie_column_info_resp { + enum aie2_msg_status status; + __u32 size; +} __packed; + +struct suspend_req { + __u32 place_holder; +} __packed; + +struct suspend_resp { + enum aie2_msg_status status; +} __packed; + +struct resume_req { + __u32 place_holder; +} __packed; + +struct resume_resp { + enum aie2_msg_status status; +} __packed; + +struct check_header_hash_req { + __u64 hash_high; + __u64 hash_low; +} __packed; + +struct check_header_hash_resp { + enum aie2_msg_status status; +} __packed; + +struct query_error_req { + __u64 buf_addr; + __u32 buf_size; + __u32 next_row; + __u32 next_column; + __u32 next_module; +} __packed; + +struct query_error_resp { + enum aie2_msg_status status; + __u32 num_err; + __u32 has_next_err; + __u32 next_row; + __u32 next_column; + __u32 next_module; +} __packed; + +struct protocol_version_req { + __u32 reserved; +} __packed; + +struct protocol_version_resp { + enum aie2_msg_status status; + __u32 major; + __u32 minor; +} __packed; + +struct firmware_version_req { + __u32 reserved; +} __packed; + +struct firmware_version_resp { + enum aie2_msg_status status; + __u32 major; + __u32 minor; + __u32 sub; + __u32 build; +} __packed; + +#define MAX_NUM_CUS 32 +#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0) +#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17) +struct config_cu_req { + __u32 num_cus; + __u32 cfgs[MAX_NUM_CUS]; +} __packed; + +struct config_cu_resp { + enum aie2_msg_status status; +} __packed; + +struct set_runtime_cfg_req { + __u32 type; + __u64 value; +} __packed; + +struct set_runtime_cfg_resp { + enum aie2_msg_status status; +} __packed; + +struct get_runtime_cfg_req { + __u32 type; +} __packed; + +struct get_runtime_cfg_resp { + enum aie2_msg_status status; + __u64 value; +} __packed; + +enum async_event_type { + ASYNC_EVENT_TYPE_AIE_ERROR, + ASYNC_EVENT_TYPE_EXCEPTION, + MAX_ASYNC_EVENT_TYPE +}; + +#define ASYNC_BUF_SIZE SZ_8K +struct async_event_msg_req { + __u64 buf_addr; + __u32 buf_size; +} __packed; + +struct async_event_msg_resp { + enum aie2_msg_status status; + enum async_event_type type; +} __packed; + +#define MAX_CHAIN_CMDBUF_SIZE SZ_4K +#define slot_cf_has_space(offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ + offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +struct cmd_chain_slot_execbuf_cf { + __u32 cu_idx; + __u32 arg_cnt; + __u32 args[] __counted_by(arg_cnt); +}; + +#define slot_dpu_has_space(offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ + offsetof(struct cmd_chain_slot_dpu, args[0])) +struct cmd_chain_slot_dpu { + __u64 inst_buf_addr; + __u32 inst_size; + __u32 inst_prop_cnt; + __u32 cu_idx; + __u32 arg_cnt; +#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32)) + __u32 args[] __counted_by(arg_cnt); +}; + +struct cmd_chain_req { + __u64 buf_addr; + __u32 buf_size; + __u32 count; +} __packed; + +struct cmd_chain_resp { + enum aie2_msg_status status; + __u32 fail_cmd_idx; + enum aie2_msg_status fail_cmd_status; +} __packed; + +#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0) +#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4) +struct sync_bo_req { + __u64 src_addr; + __u64 dst_addr; + __u32 size; +#define SYNC_BO_DEV_MEM 0 +#define SYNC_BO_HOST_MEM 2 + __u32 type; +} __packed; + +struct sync_bo_resp { + enum aie2_msg_status status; +} __packed; +#endif /* _AIE2_MSG_PRIV_H_ */ diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c new file mode 100644 index 000000000000..5a058e565b01 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -0,0 +1,928 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/errno.h> +#include <linux/firmware.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/pci.h> +#include <linux/xarray.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "aie2_solver.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +static int aie2_max_col = XRS_MAX_COL; +module_param(aie2_max_col, uint, 0600); +MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); + +/* + * The management mailbox channel is allocated by firmware. + * The related register and ring buffer information is on SRAM BAR. + * This struct is the register layout. + */ +#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */ +struct mgmt_mbox_chann_info { + __u32 x2i_tail; + __u32 x2i_head; + __u32 x2i_buf; + __u32 x2i_buf_sz; + __u32 i2x_tail; + __u32 i2x_head; + __u32 i2x_buf; + __u32 i2x_buf_sz; + __u32 magic; + __u32 msi_id; + __u32 prot_major; + __u32 prot_minor; + __u32 rsvd[4]; +}; + +static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + /* + * The driver supported mailbox behavior is defined by + * ndev->priv->protocol_major and protocol_minor. + * + * When protocol_major and fw_major are different, it means driver + * and firmware are incompatible. + */ + if (ndev->priv->protocol_major != fw_major) { + XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d", + fw_major, fw_minor); + return -EINVAL; + } + + /* + * When protocol_minor is greater then fw_minor, that means driver + * relies on operation the installed firmware does not support. + */ + if (ndev->priv->protocol_minor > fw_minor) { + XDNA_ERR(xdna, "Firmware minor version smaller than supported"); + return -EINVAL; + } + return 0; +} + +static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg); + XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg); + XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr); + XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size); + XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg); + XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg); + XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr); + XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size); + XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx); + XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major); + XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor); +} + +static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) +{ + struct mgmt_mbox_chann_info info_regs; + struct xdna_mailbox_chann_res *i2x; + struct xdna_mailbox_chann_res *x2i; + u32 addr, off; + u32 *reg; + int ret; + int i; + + /* + * Once firmware is alive, it will write management channel + * information in SRAM BAR and write the address of that information + * at FW_ALIVE_OFF offset in SRMA BAR. + * + * Read a non-zero value from FW_ALIVE_OFF implies that firmware + * is alive. + */ + ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF), + addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT); + if (ret || !addr) + return -ETIME; + + off = AIE2_SRAM_OFF(ndev, addr); + reg = (u32 *)&info_regs; + for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++) + reg[i] = readl(ndev->sram_base + off + i * sizeof(u32)); + + if (info_regs.magic != MGMT_MBOX_MAGIC) { + XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic); + ret = -EINVAL; + goto done; + } + + i2x = &ndev->mgmt_i2x; + x2i = &ndev->mgmt_x2i; + + i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head); + i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail); + i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf); + i2x->rb_size = info_regs.i2x_buf_sz; + + x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head); + x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail); + x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf); + x2i->rb_size = info_regs.x2i_buf_sz; + + ndev->mgmt_chan_idx = info_regs.msi_id; + ndev->mgmt_prot_major = info_regs.prot_major; + ndev->mgmt_prot_minor = info_regs.prot_minor; + + ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor); + +done: + aie2_dump_chann_info_debug(ndev); + + /* Must clear address at FW_ALIVE_OFF */ + writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF)); + + return ret; +} + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val) +{ + const struct rt_config *cfg; + u32 value; + int ret; + + for (cfg = ndev->priv->rt_config; cfg->type; cfg++) { + if (cfg->category != category) + continue; + + value = val ? *val : cfg->value; + ret = aie2_set_runtime_cfg(ndev, cfg->type, value); + if (ret) { + XDNA_ERR(ndev->xdna, "Set type %d value %d failed", + cfg->type, value); + return ret; + } + } + + return 0; +} + +static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_suspend_fw(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Suspend firmware failed"); + return ret; + } + + ret = aie2_resume_fw(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Resume firmware failed"); + return ret; + } + + return 0; +} + +static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Runtime config failed"); + return ret; + } + + ret = aie2_assign_mgmt_pasid(ndev, 0); + if (ret) { + XDNA_ERR(ndev->xdna, "Can not assign PASID"); + return ret; + } + + ret = aie2_xdna_reset(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Reset firmware failed"); + return ret; + } + + if (!ndev->async_events) + return 0; + + ret = aie2_error_async_events_send(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Send async events failed"); + return ret; + } + + return 0; +} + +static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver); + if (ret) { + XDNA_ERR(ndev->xdna, "query firmware version failed"); + return ret; + } + + ret = aie2_query_aie_version(ndev, &ndev->version); + if (ret) { + XDNA_ERR(ndev->xdna, "Query AIE version failed"); + return ret; + } + + ret = aie2_query_aie_metadata(ndev, &ndev->metadata); + if (ret) { + XDNA_ERR(ndev->xdna, "Query AIE metadata failed"); + return ret; + } + + return 0; +} + +static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev) +{ + if (aie2_suspend_fw(ndev)) + XDNA_ERR(ndev->xdna, "Suspend_fw failed"); + XDNA_DBG(ndev->xdna, "Firmware suspended"); +} + +static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + hwctx->start_col = action->part.start_col; + hwctx->num_col = action->part.ncols; + ret = aie2_create_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "create context failed, ret %d", ret); + + return ret; +} + +static int aie2_xrs_unload(void *cb_arg) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + ret = aie2_destroy_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "destroy context failed, ret %d", ret); + + return ret; +} + +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_dev_hdl *ndev; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + ndev = xdna->dev_handle; + ndev->dft_dpm_level = dpm_level; + if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) + return 0; + + return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); +} + +static struct xrs_action_ops aie2_xrs_actions = { + .load = aie2_xrs_load, + .unload = aie2_xrs_unload, + .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level, +}; + +static void aie2_hw_stop(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + + if (ndev->dev_status <= AIE2_DEV_INIT) { + XDNA_ERR(xdna, "device is already stopped"); + return; + } + + aie2_mgmt_fw_fini(ndev); + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; + drmm_kfree(&xdna->ddev, ndev->mbox); + ndev->mbox = NULL; + aie2_psp_stop(ndev->psp_hdl); + aie2_smu_fini(ndev); + pci_disable_device(pdev); + + ndev->dev_status = AIE2_DEV_INIT; +} + +static int aie2_hw_start(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + struct xdna_mailbox_res mbox_res; + u32 xdna_mailbox_intr_reg; + int mgmt_mb_irq, ret; + + if (ndev->dev_status >= AIE2_DEV_START) { + XDNA_INFO(xdna, "device is already started"); + return 0; + } + + ret = pci_enable_device(pdev); + if (ret) { + XDNA_ERR(xdna, "failed to enable device, ret %d", ret); + return ret; + } + pci_set_master(pdev); + + ret = aie2_smu_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init smu, ret %d", ret); + goto disable_dev; + } + + ret = aie2_psp_start(ndev->psp_hdl); + if (ret) { + XDNA_ERR(xdna, "failed to start psp, ret %d", ret); + goto fini_smu; + } + + ret = aie2_get_mgmt_chann_info(ndev); + if (ret) { + XDNA_ERR(xdna, "firmware is not alive"); + goto stop_psp; + } + + mbox_res.ringbuf_base = ndev->sram_base; + mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); + mbox_res.mbox_base = ndev->mbox_base; + mbox_res.mbox_size = MBOX_SIZE(ndev); + mbox_res.name = "xdna_mailbox"; + ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); + if (!ndev->mbox) { + XDNA_ERR(xdna, "failed to create mailbox device"); + ret = -ENODEV; + goto stop_psp; + } + + mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx); + if (mgmt_mb_irq < 0) { + ret = mgmt_mb_irq; + XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret); + goto stop_psp; + } + + xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4; + ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, + &ndev->mgmt_x2i, + &ndev->mgmt_i2x, + xdna_mailbox_intr_reg, + mgmt_mb_irq); + if (!ndev->mgmt_chann) { + XDNA_ERR(xdna, "failed to create management mailbox channel"); + ret = -EINVAL; + goto stop_psp; + } + + ret = aie2_pm_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + goto destroy_mgmt_chann; + } + + ret = aie2_mgmt_fw_init(ndev); + if (ret) { + XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); + goto destroy_mgmt_chann; + } + + ndev->dev_status = AIE2_DEV_START; + + return 0; + +destroy_mgmt_chann: + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); +stop_psp: + aie2_psp_stop(ndev->psp_hdl); +fini_smu: + aie2_smu_fini(ndev); +disable_dev: + pci_disable_device(pdev); + + return ret; +} + +static int aie2_init(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + void __iomem *tbl[PCI_NUM_RESOURCES] = {0}; + struct init_config xrs_cfg = { 0 }; + struct amdxdna_dev_hdl *ndev; + struct psp_config psp_conf; + const struct firmware *fw; + unsigned long bars = 0; + int i, nvec, ret; + + ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL); + if (!ndev) + return -ENOMEM; + + ndev->priv = xdna->dev_info->dev_priv; + ndev->xdna = xdna; + + ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); + if (ret) { + XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", + ndev->priv->fw_path, ret); + return ret; + } + + ret = pcim_enable_device(pdev); + if (ret) { + XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret); + goto release_fw; + } + + for (i = 0; i < PSP_MAX_REGS; i++) + set_bit(PSP_REG_BAR(ndev, i), &bars); + + set_bit(xdna->dev_info->sram_bar, &bars); + set_bit(xdna->dev_info->smu_bar, &bars); + set_bit(xdna->dev_info->mbox_bar, &bars); + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!test_bit(i, &bars)) + continue; + tbl[i] = pcim_iomap(pdev, i, 0); + if (!tbl[i]) { + XDNA_ERR(xdna, "map bar %d failed", i); + ret = -ENOMEM; + goto release_fw; + } + } + + ndev->sram_base = tbl[xdna->dev_info->sram_bar]; + ndev->smu_base = tbl[xdna->dev_info->smu_bar]; + ndev->mbox_base = tbl[xdna->dev_info->mbox_bar]; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret); + goto release_fw; + } + + nvec = pci_msix_vec_count(pdev); + if (nvec <= 0) { + XDNA_ERR(xdna, "does not get number of interrupt vector"); + ret = -EINVAL; + goto release_fw; + } + + ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); + if (ret < 0) { + XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret); + goto release_fw; + } + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + if (ret) { + XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret); + goto free_irq; + } + + psp_conf.fw_size = fw->size; + psp_conf.fw_buf = fw->data; + for (i = 0; i < PSP_MAX_REGS; i++) + psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i); + ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf); + if (!ndev->psp_hdl) { + XDNA_ERR(xdna, "failed to create psp"); + ret = -ENOMEM; + goto disable_sva; + } + xdna->dev_handle = ndev; + + ret = aie2_hw_start(xdna); + if (ret) { + XDNA_ERR(xdna, "start npu failed, ret %d", ret); + goto disable_sva; + } + + ret = aie2_mgmt_fw_query(ndev); + if (ret) { + XDNA_ERR(xdna, "Query firmware failed, ret %d", ret); + goto stop_hw; + } + ndev->total_col = min(aie2_max_col, ndev->metadata.cols); + + xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1; + for (i = 0; i < xrs_cfg.clk_list.num_levels; i++) + xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk; + xrs_cfg.sys_eff_factor = 1; + xrs_cfg.ddev = &xdna->ddev; + xrs_cfg.actions = &aie2_xrs_actions; + xrs_cfg.total_col = ndev->total_col; + + xdna->xrs_hdl = xrsm_init(&xrs_cfg); + if (!xdna->xrs_hdl) { + XDNA_ERR(xdna, "Initialize resolver failed"); + ret = -EINVAL; + goto stop_hw; + } + + ret = aie2_error_async_events_alloc(ndev); + if (ret) { + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); + goto stop_hw; + } + + ret = aie2_error_async_events_send(ndev); + if (ret) { + XDNA_ERR(xdna, "Send async events failed, ret %d", ret); + goto async_event_free; + } + + /* Issue a command to make sure firmware handled async events */ + ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver); + if (ret) { + XDNA_ERR(xdna, "Re-query firmware version failed"); + goto async_event_free; + } + + release_firmware(fw); + return 0; + +async_event_free: + aie2_error_async_events_free(ndev); +stop_hw: + aie2_hw_stop(xdna); +disable_sva: + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); +free_irq: + pci_free_irq_vectors(pdev); +release_fw: + release_firmware(fw); + + return ret; +} + +static void aie2_fini(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + + aie2_hw_stop(xdna); + aie2_error_async_events_free(ndev); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_free_irq_vectors(pdev); +} + +static int aie2_get_aie_status(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_status status; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret; + + ndev = xdna->dev_handle; + if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) { + XDNA_ERR(xdna, "Failed to copy AIE request into kernel"); + return -EFAULT; + } + + if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) { + XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.", + status.buffer_size, ndev->metadata.cols * ndev->metadata.size); + return -EINVAL; + } + + ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer), + status.buffer_size, &status.cols_filled); + if (ret) { + XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret); + return ret; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) { + XDNA_ERR(xdna, "Failed to copy AIE request info to user space"); + return -EFAULT; + } + + return 0; +} + +static int aie2_get_aie_metadata(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_metadata *meta; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret = 0; + + ndev = xdna->dev_handle; + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->col_size = ndev->metadata.size; + meta->cols = ndev->metadata.cols; + meta->rows = ndev->metadata.rows; + + meta->version.major = ndev->metadata.version.major; + meta->version.minor = ndev->metadata.version.minor; + + meta->core.row_count = ndev->metadata.core.row_count; + meta->core.row_start = ndev->metadata.core.row_start; + meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count; + meta->core.lock_count = ndev->metadata.core.lock_count; + meta->core.event_reg_count = ndev->metadata.core.event_reg_count; + + meta->mem.row_count = ndev->metadata.mem.row_count; + meta->mem.row_start = ndev->metadata.mem.row_start; + meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count; + meta->mem.lock_count = ndev->metadata.mem.lock_count; + meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count; + + meta->shim.row_count = ndev->metadata.shim.row_count; + meta->shim.row_start = ndev->metadata.shim.row_start; + meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count; + meta->shim.lock_count = ndev->metadata.shim.lock_count; + meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count; + + if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta))) + ret = -EFAULT; + + kfree(meta); + return ret; +} + +static int aie2_get_aie_version(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_version version; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + version.major = ndev->version.major; + version.minor = ndev->version.minor; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +static int aie2_get_firmware_version(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_firmware_version version; + struct amdxdna_dev *xdna = client->xdna; + + version.major = xdna->fw_ver.major; + version.minor = xdna->fw_ver.minor; + version.patch = xdna->fw_ver.sub; + version.build = xdna->fw_ver.build; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +static int aie2_get_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_power_mode mode = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + mode.power_mode = ndev->pw_mode; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode))) + return -EFAULT; + + return 0; +} + +static int aie2_get_clock_metadata(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_clock_metadata *clock; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret = 0; + + ndev = xdna->dev_handle; + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return -ENOMEM; + + snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name), + "MP-NPU Clock"); + clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq; + snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock"); + clock->h_clock.freq_mhz = ndev->hclk_freq; + + if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock))) + ret = -EFAULT; + + kfree(clock); + return ret; +} + +static int aie2_get_hwctx_status(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_hwctx __user *buf; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_drm_query_hwctx *tmp; + struct amdxdna_client *tmp_client; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + bool overflow = false; + u32 req_bytes = 0; + u32 hw_i = 0; + int ret = 0; + int idx; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + buf = u64_to_user_ptr(args->buffer); + list_for_each_entry(tmp_client, &xdna->client_list, node) { + idx = srcu_read_lock(&tmp_client->hwctx_srcu); + amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) { + req_bytes += sizeof(*tmp); + if (args->buffer_size < req_bytes) { + /* Continue iterating to get the required size */ + overflow = true; + continue; + } + + memset(tmp, 0, sizeof(*tmp)); + tmp->pid = tmp_client->pid; + tmp->context_id = hwctx->id; + tmp->start_col = hwctx->start_col; + tmp->num_col = hwctx->num_col; + tmp->command_submissions = hwctx->priv->seq; + tmp->command_completions = hwctx->priv->completed; + + if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) { + ret = -EFAULT; + srcu_read_unlock(&tmp_client->hwctx_srcu, idx); + goto out; + } + hw_i++; + } + srcu_read_unlock(&tmp_client->hwctx_srcu, idx); + } + + if (overflow) { + XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.", + args->buffer_size, req_bytes); + ret = -EINVAL; + } + +out: + kfree(tmp); + args->buffer_size = req_bytes; + return ret; +} + +static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + switch (args->param) { + case DRM_AMDXDNA_QUERY_AIE_STATUS: + ret = aie2_get_aie_status(client, args); + break; + case DRM_AMDXDNA_QUERY_AIE_METADATA: + ret = aie2_get_aie_metadata(client, args); + break; + case DRM_AMDXDNA_QUERY_AIE_VERSION: + ret = aie2_get_aie_version(client, args); + break; + case DRM_AMDXDNA_QUERY_CLOCK_METADATA: + ret = aie2_get_clock_metadata(client, args); + break; + case DRM_AMDXDNA_QUERY_HW_CONTEXTS: + ret = aie2_get_hwctx_status(client, args); + break; + case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION: + ret = aie2_get_firmware_version(client, args); + break; + case DRM_AMDXDNA_GET_POWER_MODE: + ret = aie2_get_power_mode(client, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + } + XDNA_DBG(xdna, "Got param %d", args->param); + + drm_dev_exit(idx); + return ret; +} + +static int aie2_set_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_drm_set_power_mode power_state; + enum amdxdna_power_mode_type power_mode; + struct amdxdna_dev *xdna = client->xdna; + + if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer), + sizeof(power_state))) { + XDNA_ERR(xdna, "Failed to copy power mode request into kernel"); + return -EFAULT; + } + + if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad))) + return -EINVAL; + + power_mode = power_state.power_mode; + if (power_mode > POWER_MODE_TURBO) { + XDNA_ERR(xdna, "Invalid power mode %d", power_mode); + return -EINVAL; + } + + return aie2_pm_set_mode(xdna->dev_handle, power_mode); +} + +static int aie2_set_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + switch (args->param) { + case DRM_AMDXDNA_SET_POWER_MODE: + ret = aie2_set_power_mode(client, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + break; + } + + drm_dev_exit(idx); + return ret; +} + +const struct amdxdna_dev_ops aie2_ops = { + .init = aie2_init, + .fini = aie2_fini, + .resume = aie2_hw_start, + .suspend = aie2_hw_stop, + .get_aie_info = aie2_get_info, + .set_aie_state = aie2_set_state, + .hwctx_init = aie2_hwctx_init, + .hwctx_fini = aie2_hwctx_fini, + .hwctx_config = aie2_hwctx_config, + .cmd_submit = aie2_cmd_submit, + .hmm_invalidate = aie2_hmm_invalidate, + .hwctx_suspend = aie2_hwctx_suspend, + .hwctx_resume = aie2_hwctx_resume, +}; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h new file mode 100644 index 000000000000..f2d95531ddc2 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_PCI_H_ +#define _AIE2_PCI_H_ + +#include <drm/amdxdna_accel.h> +#include <linux/semaphore.h> + +#include "amdxdna_mailbox.h" + +#define AIE2_INTERVAL 20000 /* us */ +#define AIE2_TIMEOUT 1000000 /* us */ + +/* Firmware determines device memory base address and size */ +#define AIE2_DEVM_BASE 0x4000000 +#define AIE2_DEVM_SIZE SZ_64M + +#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev)) + +#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr) +#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr) + +#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx) +#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset) +#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset) + +#define SMU_REG(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \ +}) +#define SRAM_GET_ADDR(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \ +}) + +#define CHAN_SLOT_SZ SZ_8K +#define MBOX_SIZE(ndev) \ +({ \ + typeof(ndev) _ndev = (ndev); \ + ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \ + pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ +}) + +enum aie2_smu_reg_idx { + SMU_CMD_REG = 0, + SMU_ARG_REG, + SMU_INTR_REG, + SMU_RESP_REG, + SMU_OUT_REG, + SMU_MAX_REGS /* Keep this at the end */ +}; + +enum aie2_sram_reg_idx { + MBOX_CHANN_OFF = 0, + FW_ALIVE_OFF, + SRAM_MAX_INDEX /* Keep this at the end */ +}; + +enum psp_reg_idx { + PSP_CMD_REG = 0, + PSP_ARG0_REG, + PSP_ARG1_REG, + PSP_ARG2_REG, + PSP_NUM_IN_REGS, /* number of input registers */ + PSP_INTR_REG = PSP_NUM_IN_REGS, + PSP_STATUS_REG, + PSP_RESP_REG, + PSP_MAX_REGS /* Keep this at the end */ +}; + +struct amdxdna_client; +struct amdxdna_fw_ver; +struct amdxdna_hwctx; +struct amdxdna_sched_job; + +struct psp_config { + const void *fw_buf; + u32 fw_size; + void __iomem *psp_regs[PSP_MAX_REGS]; +}; + +struct aie_version { + u16 major; + u16 minor; +}; + +struct aie_tile_metadata { + u16 row_count; + u16 row_start; + u16 dma_channel_count; + u16 lock_count; + u16 event_reg_count; +}; + +struct aie_metadata { + u32 size; + u16 cols; + u16 rows; + struct aie_version version; + struct aie_tile_metadata core; + struct aie_tile_metadata mem; + struct aie_tile_metadata shim; +}; + +enum rt_config_category { + AIE2_RT_CFG_INIT, + AIE2_RT_CFG_CLK_GATING, +}; + +struct rt_config { + u32 type; + u32 value; + u32 category; +}; + +struct dpm_clk_freq { + u32 npuclk; + u32 hclk; +}; + +/* + * Define the maximum number of pending commands in a hardware context. + * Must be power of 2! + */ +#define HWCTX_MAX_CMDS 4 +#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) +struct amdxdna_hwctx_priv { + struct amdxdna_gem_obj *heap; + void *mbox_chann; + + struct drm_gpu_scheduler sched; + struct drm_sched_entity entity; + + struct mutex io_lock; /* protect seq and cmd order */ + struct wait_queue_head job_free_wq; + u32 num_pending; + u64 seq; + struct semaphore job_sem; + bool job_done; + + /* Completed job counter */ + u64 completed; + + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; + struct drm_syncobj *syncobj; +}; + +enum aie2_dev_status { + AIE2_DEV_UNINIT, + AIE2_DEV_INIT, + AIE2_DEV_START, +}; + +struct amdxdna_dev_hdl { + struct amdxdna_dev *xdna; + const struct amdxdna_dev_priv *priv; + void __iomem *sram_base; + void __iomem *smu_base; + void __iomem *mbox_base; + struct psp_device *psp_hdl; + + struct xdna_mailbox_chann_res mgmt_x2i; + struct xdna_mailbox_chann_res mgmt_i2x; + u32 mgmt_chan_idx; + u32 mgmt_prot_major; + u32 mgmt_prot_minor; + + u32 total_col; + struct aie_version version; + struct aie_metadata metadata; + + /* power management and clock*/ + enum amdxdna_power_mode_type pw_mode; + u32 dpm_level; + u32 dft_dpm_level; + u32 max_dpm_level; + u32 clk_gating; + u32 npuclk_freq; + u32 hclk_freq; + + /* Mailbox and the management channel */ + struct mailbox *mbox; + struct mailbox_channel *mgmt_chann; + struct async_events *async_events; + + enum aie2_dev_status dev_status; + u32 hwctx_num; +}; + +#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ + [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE} + +struct aie2_bar_off_pair { + int bar_idx; + u32 offset; +}; + +struct aie2_hw_ops { + int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +}; + +struct amdxdna_dev_priv { + const char *fw_path; + u64 protocol_major; + u64 protocol_minor; + const struct rt_config *rt_config; + const struct dpm_clk_freq *dpm_clk_tbl; + +#define COL_ALIGN_NONE 0 +#define COL_ALIGN_NATURE 1 + u32 col_align; + u32 mbox_dev_addr; + /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ + u32 mbox_size; + u32 sram_dev_addr; + struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; + struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; + struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; + struct aie2_hw_ops hw_ops; +}; + +extern const struct amdxdna_dev_ops aie2_ops; + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val); + +/* aie2 npu hw config */ +extern const struct dpm_clk_freq npu1_dpm_clk_table[]; +extern const struct dpm_clk_freq npu4_dpm_clk_table[]; +extern const struct rt_config npu1_default_rt_cfg[]; +extern const struct rt_config npu4_default_rt_cfg[]; + +/* aie2_smu.c */ +int aie2_smu_init(struct amdxdna_dev_hdl *ndev); +void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + +/* aie2_pm.c */ +int aie2_pm_init(struct amdxdna_dev_hdl *ndev); +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); + +/* aie2_psp.c */ +struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); +int aie2_psp_start(struct psp_device *psp); +void aie2_psp_stop(struct psp_device *psp); + +/* aie2_error.c */ +int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); +void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); +int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev); +int aie2_error_async_msg_thread(void *data); + +/* aie2_message.c */ +int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); +int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); +int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); +int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); +int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); +int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); +int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, + struct amdxdna_fw_ver *fw_ver); +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); +int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); +int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, + void *handle, int (*cb)(void*, const u32 *, size_t)); +int aie2_config_cu(struct amdxdna_hwctx *hwctx); +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)); +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)); +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)); +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, const u32 *, size_t)); + +/* aie2_hwctx.c */ +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); +void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx); +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); +void aie2_restart_ctx(struct amdxdna_client *client); + +#endif /* _AIE2_PCI_H_ */ diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c new file mode 100644 index 000000000000..426c38fce848 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> + +#include "aie2_pci.h" +#include "amdxdna_pci_drv.h" + +#define AIE2_CLK_GATING_ENABLE 1 +#define AIE2_CLK_GATING_DISABLE 0 + +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) +{ + int ret; + + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val); + if (ret) + return ret; + + ndev->clk_gating = val; + return 0; +} + +int aie2_pm_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + if (ndev->dev_status != AIE2_DEV_UNINIT) { + /* Resume device */ + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating); + if (ret) + return ret; + + return 0; + } + + while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk) + ndev->max_dpm_level++; + ndev->max_dpm_level--; + + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); + if (ret) + return ret; + + ndev->pw_mode = POWER_MODE_DEFAULT; + ndev->dft_dpm_level = ndev->max_dpm_level; + + return 0; +} + +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target) +{ + struct amdxdna_dev *xdna = ndev->xdna; + u32 clk_gating, dpm_level; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (ndev->pw_mode == target) + return 0; + + switch (target) { + case POWER_MODE_TURBO: + if (ndev->hwctx_num) { + XDNA_ERR(xdna, "Can not set turbo when there is active hwctx"); + return -EINVAL; + } + + clk_gating = AIE2_CLK_GATING_DISABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_HIGH: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_DEFAULT: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->dft_dpm_level; + break; + default: + return -EOPNOTSUPP; + } + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, clk_gating); + if (ret) + return ret; + + ndev->pw_mode = target; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c new file mode 100644 index 000000000000..dc3a072ce3b6 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/bitfield.h> +#include <linux/iopoll.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +#define PSP_STATUS_READY BIT(31) + +/* PSP commands */ +#define PSP_VALIDATE 1 +#define PSP_START 2 +#define PSP_RELEASE_TMR 3 + +/* PSP special arguments */ +#define PSP_START_COPY_FW 1 + +/* PSP response error code */ +#define PSP_ERROR_CANCEL 0xFFFF0002 +#define PSP_ERROR_BAD_STATE 0xFFFF0007 + +#define PSP_FW_ALIGN 0x10000 +#define PSP_POLL_INTERVAL 20000 /* us */ +#define PSP_POLL_TIMEOUT 1000000 /* us */ + +#define PSP_REG(p, reg) ((p)->psp_regs[reg]) + +struct psp_device { + struct drm_device *ddev; + struct psp_config conf; + u32 fw_buf_sz; + u64 fw_paddr; + void *fw_buffer; + void __iomem *psp_regs[PSP_MAX_REGS]; +}; + +static int psp_exec(struct psp_device *psp, u32 *reg_vals) +{ + u32 resp_code; + int ret, i; + u32 ready; + + /* Write command and argument registers */ + for (i = 0; i < PSP_NUM_IN_REGS; i++) + writel(reg_vals[i], PSP_REG(psp, i)); + + /* clear and set PSP INTR register to kick off */ + writel(0, PSP_REG(psp, PSP_INTR_REG)); + writel(1, PSP_REG(psp, PSP_INTR_REG)); + + /* PSP should be busy. Wait for ready, so we know task is done. */ + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready, + FIELD_GET(PSP_STATUS_READY, ready), + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT); + if (ret) { + drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret); + return ret; + } + + resp_code = readl(PSP_REG(psp, PSP_RESP_REG)); + if (resp_code) { + drm_err(psp->ddev, "fw return error 0x%x", resp_code); + return -EIO; + } + + return 0; +} + +void aie2_psp_stop(struct psp_device *psp) +{ + u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, }; + int ret; + + ret = psp_exec(psp, reg_vals); + if (ret) + drm_err(psp->ddev, "release tmr failed, ret %d", ret); +} + +int aie2_psp_start(struct psp_device *psp) +{ + u32 reg_vals[PSP_NUM_IN_REGS]; + int ret; + + reg_vals[0] = PSP_VALIDATE; + reg_vals[1] = lower_32_bits(psp->fw_paddr); + reg_vals[2] = upper_32_bits(psp->fw_paddr); + reg_vals[3] = psp->fw_buf_sz; + + ret = psp_exec(psp, reg_vals); + if (ret) { + drm_err(psp->ddev, "failed to validate fw, ret %d", ret); + return ret; + } + + memset(reg_vals, 0, sizeof(reg_vals)); + reg_vals[0] = PSP_START; + reg_vals[1] = PSP_START_COPY_FW; + ret = psp_exec(psp, reg_vals); + if (ret) { + drm_err(psp->ddev, "failed to start fw, ret %d", ret); + return ret; + } + + return 0; +} + +struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf) +{ + struct psp_device *psp; + u64 offset; + + psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL); + if (!psp) + return NULL; + + psp->ddev = ddev; + memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); + + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + if (!psp->fw_buffer) { + drm_err(ddev, "no memory for fw buffer"); + return NULL; + } + + /* + * AMD Platform Security Processor(PSP) requires host physical + * address to load NPU firmware. + */ + psp->fw_paddr = virt_to_phys(psp->fw_buffer); + offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr; + psp->fw_paddr += offset; + memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size); + + return psp; +} diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c new file mode 100644 index 000000000000..73388443c676 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/iopoll.h> + +#include "aie2_pci.h" +#include "amdxdna_pci_drv.h" + +#define SMU_RESULT_OK 1 + +/* SMU commands */ +#define AIE2_SMU_POWER_ON 0x3 +#define AIE2_SMU_POWER_OFF 0x4 +#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5 +#define AIE2_SMU_SET_HCLK_FREQ 0x6 +#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7 +#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8 + +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, + u32 reg_arg, u32 *out) +{ + u32 resp; + int ret; + + writel(0, SMU_REG(ndev, SMU_RESP_REG)); + writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG)); + writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG)); + + /* Clear and set SMU_INTR_REG to kick off */ + writel(0, SMU_REG(ndev, SMU_INTR_REG)); + writel(1, SMU_REG(ndev, SMU_INTR_REG)); + + ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp, + resp, AIE2_INTERVAL, AIE2_TIMEOUT); + if (ret) { + XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd); + return ret; + } + + if (out) + *out = readl(SMU_REG(ndev, SMU_OUT_REG)); + + if (resp != SMU_RESULT_OK) { + XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp); + return -EINVAL; + } + + return 0; +} + +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + u32 freq; + int ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); + } + ndev->npuclk_freq = freq; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); + } + ndev->hclk_freq = freq; + ndev->dpm_level = dpm_level; + + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); + + return 0; +} + +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + int ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", + dpm_level, ret); + return ret; + } + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", + dpm_level, ret); + return ret; + } + + ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; + ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; + ndev->dpm_level = dpm_level; + + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); + + return 0; +} + +int aie2_smu_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); + return ret; + } + + return 0; +} + +void aie2_smu_fini(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ndev->priv->hw_ops.set_dpm(ndev, 0); + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); + if (ret) + XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret); +} diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c new file mode 100644 index 000000000000..2013d1f13aae --- /dev/null +++ b/drivers/accel/amdxdna/aie2_solver.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <linux/bitops.h> +#include <linux/bitmap.h> +#include <linux/slab.h> + +#include "aie2_solver.h" + +struct partition_node { + struct list_head list; + u32 nshared; /* # shared requests */ + u32 start_col; /* start column */ + u32 ncols; /* # columns */ + bool exclusive; /* can not be shared if set */ +}; + +struct solver_node { + struct list_head list; + u64 rid; /* Request ID from consumer */ + + struct partition_node *pt_node; + void *cb_arg; + u32 dpm_level; + u32 cols_len; + u32 start_cols[] __counted_by(cols_len); +}; + +struct solver_rgroup { + u32 rgid; + u32 nnode; + u32 npartition_node; + + DECLARE_BITMAP(resbit, XRS_MAX_COL); + struct list_head node_list; + struct list_head pt_node_list; +}; + +struct solver_state { + struct solver_rgroup rgp; + struct init_config cfg; + struct xrs_action_ops *actions; +}; + +static u32 calculate_gops(struct aie_qos *rqos) +{ + u32 service_rate = 0; + + if (rqos->latency) + service_rate = (1000 / rqos->latency); + + if (rqos->fps > service_rate) + return rqos->fps * rqos->gops; + + return service_rate * rqos->gops; +} + +/* + * qos_meet() - Check the QOS request can be met. + */ +static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops) +{ + u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor; + + if (request_gops <= cgops) + return 0; + + return -EINVAL; +} + +/* + * sanity_check() - Do a basic sanity check on allocation request. + */ +static int sanity_check(struct solver_state *xrs, struct alloc_requests *req) +{ + struct cdo_parts *cdop = &req->cdo; + struct aie_qos *rqos = &req->rqos; + u32 cu_clk_freq; + + if (cdop->ncols > xrs->cfg.total_col) + return -EINVAL; + + /* + * We can find at least one CDOs groups that meet the + * GOPs requirement. + */ + cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1]; + + if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000)) + return -EINVAL; + + return 0; +} + +static bool is_valid_qos_dpm_params(struct aie_qos *rqos) +{ + /* + * gops is retrieved from the xmodel, so it's always set + * fps and latency are the configurable params from the application + */ + if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0)) + return true; + + return false; +} + +static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level) +{ + struct solver_rgroup *rgp = &xrs->rgp; + struct cdo_parts *cdop = &req->cdo; + struct aie_qos *rqos = &req->rqos; + u32 freq, max_dpm_level, level; + struct solver_node *node; + + max_dpm_level = xrs->cfg.clk_list.num_levels - 1; + /* If no QoS parameters are passed, set it to the max DPM level */ + if (!is_valid_qos_dpm_params(rqos)) { + level = max_dpm_level; + goto set_dpm; + } + + /* Find one CDO group that meet the GOPs requirement. */ + for (level = 0; level < max_dpm_level; level++) { + freq = xrs->cfg.clk_list.cu_clk_list[level]; + if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000)) + break; + } + + /* set the dpm level which fits all the sessions */ + list_for_each_entry(node, &rgp->node_list, list) { + if (node->dpm_level > level) + level = node->dpm_level; + } + +set_dpm: + *dpm_level = level; + return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level); +} + +static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid) +{ + struct solver_node *node; + + list_for_each_entry(node, &rgp->node_list, list) { + if (node->rid == rid) + return node; + } + + return NULL; +} + +static void remove_partition_node(struct solver_rgroup *rgp, + struct partition_node *pt_node) +{ + pt_node->nshared--; + if (pt_node->nshared > 0) + return; + + list_del(&pt_node->list); + rgp->npartition_node--; + + bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols); + kfree(pt_node); +} + +static void remove_solver_node(struct solver_rgroup *rgp, + struct solver_node *node) +{ + list_del(&node->list); + rgp->nnode--; + + if (node->pt_node) + remove_partition_node(rgp, node->pt_node); + + kfree(node); +} + +static int get_free_partition(struct solver_state *xrs, + struct solver_node *snode, + struct alloc_requests *req) +{ + struct partition_node *pt_node; + u32 ncols = req->cdo.ncols; + u32 col, i; + + for (i = 0; i < snode->cols_len; i++) { + col = snode->start_cols[i]; + if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols) + break; + } + + if (i == snode->cols_len) + return -ENODEV; + + pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL); + if (!pt_node) + return -ENOMEM; + + pt_node->nshared = 1; + pt_node->start_col = col; + pt_node->ncols = ncols; + + /* + * Always set exclusive to false for now. + */ + pt_node->exclusive = false; + + list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list); + xrs->rgp.npartition_node++; + bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols); + + snode->pt_node = pt_node; + + return 0; +} + +static int allocate_partition(struct solver_state *xrs, + struct solver_node *snode, + struct alloc_requests *req) +{ + struct partition_node *pt_node, *rpt_node = NULL; + int idx, ret; + + ret = get_free_partition(xrs, snode, req); + if (!ret) + return ret; + + /* try to get a share-able partition */ + list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) { + if (pt_node->exclusive) + continue; + + if (rpt_node && pt_node->nshared >= rpt_node->nshared) + continue; + + for (idx = 0; idx < snode->cols_len; idx++) { + if (snode->start_cols[idx] != pt_node->start_col) + continue; + + if (req->cdo.ncols != pt_node->ncols) + continue; + + rpt_node = pt_node; + break; + } + } + + if (!rpt_node) + return -ENODEV; + + rpt_node->nshared++; + snode->pt_node = rpt_node; + + return 0; +} + +static struct solver_node *create_solver_node(struct solver_state *xrs, + struct alloc_requests *req) +{ + struct cdo_parts *cdop = &req->cdo; + struct solver_node *node; + int ret; + + node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->rid = req->rid; + node->cols_len = cdop->cols_len; + memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32)); + + ret = allocate_partition(xrs, node, req); + if (ret) + goto free_node; + + list_add_tail(&node->list, &xrs->rgp.node_list); + xrs->rgp.nnode++; + return node; + +free_node: + kfree(node); + return ERR_PTR(ret); +} + +static void fill_load_action(struct solver_state *xrs, + struct solver_node *snode, + struct xrs_action_load *action) +{ + action->rid = snode->rid; + action->part.start_col = snode->pt_node->start_col; + action->part.ncols = snode->pt_node->ncols; +} + +int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg) +{ + struct xrs_action_load load_act; + struct solver_node *snode; + struct solver_state *xrs; + u32 dpm_level; + int ret; + + xrs = (struct solver_state *)hdl; + + ret = sanity_check(xrs, req); + if (ret) { + drm_err(xrs->cfg.ddev, "invalid request"); + return ret; + } + + if (rg_search_node(&xrs->rgp, req->rid)) { + drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid); + return -EEXIST; + } + + snode = create_solver_node(xrs, req); + if (IS_ERR(snode)) + return PTR_ERR(snode); + + fill_load_action(xrs, snode, &load_act); + ret = xrs->cfg.actions->load(cb_arg, &load_act); + if (ret) + goto free_node; + + ret = set_dpm_level(xrs, req, &dpm_level); + if (ret) + goto free_node; + + snode->dpm_level = dpm_level; + snode->cb_arg = cb_arg; + + drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n", + snode->pt_node->start_col, snode->pt_node->ncols); + + return 0; + +free_node: + remove_solver_node(&xrs->rgp, snode); + + return ret; +} + +int xrs_release_resource(void *hdl, u64 rid) +{ + struct solver_state *xrs = hdl; + struct solver_node *node; + + node = rg_search_node(&xrs->rgp, rid); + if (!node) { + drm_err(xrs->cfg.ddev, "node not exist"); + return -ENODEV; + } + + xrs->cfg.actions->unload(node->cb_arg); + remove_solver_node(&xrs->rgp, node); + + return 0; +} + +void *xrsm_init(struct init_config *cfg) +{ + struct solver_rgroup *rgp; + struct solver_state *xrs; + + xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL); + if (!xrs) + return NULL; + + memcpy(&xrs->cfg, cfg, sizeof(*cfg)); + + rgp = &xrs->rgp; + INIT_LIST_HEAD(&rgp->node_list); + INIT_LIST_HEAD(&rgp->pt_node_list); + + return xrs; +} diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h new file mode 100644 index 000000000000..a2e3c52229e9 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_solver.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_SOLVER_H +#define _AIE2_SOLVER_H + +#define XRS_MAX_COL 128 + +/* + * Structure used to describe a partition. A partition is column based + * allocation unit described by its start column and number of columns. + */ +struct aie_part { + u32 start_col; + u32 ncols; +}; + +/* + * The QoS capabilities of a given AIE partition. + */ +struct aie_qos_cap { + u32 opc; /* operations per cycle */ + u32 dma_bw; /* DMA bandwidth */ +}; + +/* + * QoS requirement of a resource allocation. + */ +struct aie_qos { + u32 gops; /* Giga operations */ + u32 fps; /* Frames per second */ + u32 dma_bw; /* DMA bandwidth */ + u32 latency; /* Frame response latency */ + u32 exec_time; /* Frame execution time */ + u32 priority; /* Request priority */ +}; + +/* + * Structure used to describe a relocatable CDO (Configuration Data Object). + */ +struct cdo_parts { + u32 *start_cols; /* Start column array */ + u32 cols_len; /* Length of start column array */ + u32 ncols; /* # of column */ + struct aie_qos_cap qos_cap; /* CDO QoS capabilities */ +}; + +/* + * Structure used to describe a request to allocate. + */ +struct alloc_requests { + u64 rid; + struct cdo_parts cdo; + struct aie_qos rqos; /* Requested QoS */ +}; + +/* + * Load callback argument + */ +struct xrs_action_load { + u32 rid; + struct aie_part part; +}; + +/* + * Define the power level available + * + * POWER_LEVEL_MIN: + * Lowest power level. Usually set when all actions are unloaded. + * + * POWER_LEVEL_n + * Power levels 0 - n, is a step increase in system frequencies + */ +enum power_level { + POWER_LEVEL_MIN = 0x0, + POWER_LEVEL_0 = 0x1, + POWER_LEVEL_1 = 0x2, + POWER_LEVEL_2 = 0x3, + POWER_LEVEL_3 = 0x4, + POWER_LEVEL_4 = 0x5, + POWER_LEVEL_5 = 0x6, + POWER_LEVEL_6 = 0x7, + POWER_LEVEL_7 = 0x8, + POWER_LEVEL_NUM, +}; + +/* + * Structure used to describe the frequency table. + * Resource solver chooses the frequency from the table + * to meet the QOS requirements. + */ +struct clk_list_info { + u32 num_levels; /* available power levels */ + u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/ +}; + +struct xrs_action_ops { + int (*load)(void *cb_arg, struct xrs_action_load *action); + int (*unload)(void *cb_arg); + int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level); +}; + +/* + * Structure used to describe information for solver during initialization. + */ +struct init_config { + u32 total_col; + u32 sys_eff_factor; /* system efficiency factor */ + u32 latency_adj; /* latency adjustment in ms */ + struct clk_list_info clk_list; /* List of frequencies available in system */ + struct drm_device *ddev; + struct xrs_action_ops *actions; +}; + +/* + * xrsm_init() - Register resource solver. Resource solver client needs + * to call this function to register itself. + * + * @cfg: The system metrics for resource solver to use + * + * Return: A resource solver handle + * + * Note: We should only create one handle per AIE array to be managed. + */ +void *xrsm_init(struct init_config *cfg); + +/* + * xrs_allocate_resource() - Request to allocate resources for a given context + * and a partition metadata. (See struct part_meta) + * + * @hdl: Resource solver handle obtained from xrs_init() + * @req: Input to the Resource solver including request id + * and partition metadata. + * @cb_arg: callback argument pointer + * + * Return: 0 when successful. + * Or standard error number when failing + * + * Note: + * There is no lock mechanism inside resource solver. So it is + * the caller's responsibility to lock down XCLBINs and grab + * necessary lock. + */ +int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg); + +/* + * xrs_release_resource() - Request to free resources for a given context. + * + * @hdl: Resource solver handle obtained from xrs_init() + * @rid: The Request ID to identify the requesting context + */ +int xrs_release_resource(void *hdl, u64 rid); +#endif /* _AIE2_SOLVER_H */ diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c new file mode 100644 index 000000000000..d11b1c83d9c3 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/xarray.h> +#include <trace/events/amdxdna.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +#define MAX_HWCTX_ID 255 +#define MAX_ARG_COUNT 4095 + +struct amdxdna_fence { + struct dma_fence base; + spinlock_t lock; /* for base */ + struct amdxdna_hwctx *hwctx; +}; + +static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence) +{ + return KBUILD_MODNAME; +} + +static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence) +{ + struct amdxdna_fence *xdna_fence; + + xdna_fence = container_of(fence, struct amdxdna_fence, base); + + return xdna_fence->hwctx->name; +} + +static const struct dma_fence_ops fence_ops = { + .get_driver_name = amdxdna_fence_get_driver_name, + .get_timeline_name = amdxdna_fence_get_timeline_name, +}; + +static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence->hwctx = hwctx; + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0); + return &fence->base; +} + +void amdxdna_hwctx_suspend(struct amdxdna_client *client) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + mutex_lock(&client->hwctx_lock); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + xdna->dev_info->ops->hwctx_suspend(hwctx); + mutex_unlock(&client->hwctx_lock); +} + +void amdxdna_hwctx_resume(struct amdxdna_client *client) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + mutex_lock(&client->hwctx_lock); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + xdna->dev_info->ops->hwctx_resume(hwctx); + mutex_unlock(&client->hwctx_lock); +} + +static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, + struct srcu_struct *ss) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + synchronize_srcu(ss); + + /* At this point, user is not able to submit new commands */ + mutex_lock(&xdna->dev_lock); + xdna->dev_info->ops->hwctx_fini(hwctx); + mutex_unlock(&xdna->dev_lock); + + kfree(hwctx->name); + kfree(hwctx); +} + +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + u32 num_masks, count; + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) + num_masks = 0; + else + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); + + if (size) { + count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); + if (unlikely(count <= num_masks)) { + *size = 0; + return NULL; + } + *size = (count - num_masks) * sizeof(u32); + } + return &cmd->data[num_masks]; +} + +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + u32 num_masks, i; + u32 *cu_mask; + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) + return -1; + + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); + cu_mask = cmd->data; + for (i = 0; i < num_masks; i++) { + if (cu_mask[i]) + return ffs(cu_mask[i]) - 1; + } + + return -1; +} + +/* + * This should be called in close() and remove(). DO NOT call in other syscalls. + * This guarantee that when hwctx and resources will be released, if user + * doesn't call amdxdna_drm_destroy_hwctx_ioctl. + */ +void amdxdna_hwctx_remove_all(struct amdxdna_client *client) +{ + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + mutex_lock(&client->hwctx_lock); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + XDNA_DBG(client->xdna, "PID %d close HW context %d", + client->pid, hwctx->id); + xa_erase(&client->hwctx_xa, hwctx->id); + mutex_unlock(&client->hwctx_lock); + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); + mutex_lock(&client->hwctx_lock); + } + mutex_unlock(&client->hwctx_lock); +} + +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_create_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret, idx; + + if (args->ext || args->ext_flags) + return -EINVAL; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL); + if (!hwctx) { + ret = -ENOMEM; + goto exit; + } + + if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) { + XDNA_ERR(xdna, "Access QoS info failed"); + ret = -EFAULT; + goto free_hwctx; + } + + hwctx->client = client; + hwctx->fw_ctx_id = -1; + hwctx->num_tiles = args->num_tiles; + hwctx->mem_size = args->mem_size; + hwctx->max_opc = args->max_opc; + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx, + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID), + &client->next_hwctxid, GFP_KERNEL); + if (ret < 0) { + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); + goto free_hwctx; + } + + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id); + if (!hwctx->name) { + ret = -ENOMEM; + goto rm_id; + } + + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->hwctx_init(hwctx); + if (ret) { + mutex_unlock(&xdna->dev_lock); + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret); + goto free_name; + } + args->handle = hwctx->id; + args->syncobj_handle = hwctx->syncobj_hdl; + mutex_unlock(&xdna->dev_lock); + + XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret); + drm_dev_exit(idx); + return 0; + +free_name: + kfree(hwctx->name); +rm_id: + xa_erase(&client->hwctx_xa, hwctx->id); +free_hwctx: + kfree(hwctx); +exit: + drm_dev_exit(idx); + return ret; +} + +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_destroy_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret = 0, idx; + + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + hwctx = xa_erase(&client->hwctx_xa, args->handle); + if (!hwctx) { + ret = -EINVAL; + XDNA_DBG(xdna, "PID %d HW context %d not exist", + client->pid, args->handle); + goto out; + } + + /* + * The pushed jobs are handled by DRM scheduler during destroy. + * SRCU to synchronize with exec command ioctls. + */ + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); + + XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); +out: + drm_dev_exit(idx); + return ret; +} + +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_config_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret, idx; + u32 buf_size; + void *buf; + u64 val; + + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + + if (!xdna->dev_info->ops->hwctx_config) + return -EOPNOTSUPP; + + val = args->param_val; + buf_size = args->param_val_size; + + switch (args->param_type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + /* For those types that param_val is pointer */ + if (buf_size > PAGE_SIZE) { + XDNA_ERR(xdna, "Config CU param buffer too large"); + return -E2BIG; + } + + /* Hwctx needs to keep buf */ + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) { + kfree(buf); + return -EFAULT; + } + + break; + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + /* For those types that param_val is a value */ + buf = NULL; + buf_size = 0; + break; + default: + XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type); + return -EINVAL; + } + + mutex_lock(&xdna->dev_lock); + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, args->handle); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); + ret = -EINVAL; + goto unlock_srcu; + } + + ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); + +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + mutex_unlock(&xdna->dev_lock); + kfree(buf); + return ret; +} + +static void +amdxdna_arg_bos_put(struct amdxdna_sched_job *job) +{ + int i; + + for (i = 0; i < job->bo_cnt; i++) { + if (!job->bos[i]) + break; + drm_gem_object_put(job->bos[i]); + } +} + +static int +amdxdna_arg_bos_lookup(struct amdxdna_client *client, + struct amdxdna_sched_job *job, + u32 *bo_hdls, u32 bo_cnt) +{ + struct drm_gem_object *gobj; + int i, ret; + + job->bo_cnt = bo_cnt; + for (i = 0; i < job->bo_cnt; i++) { + struct amdxdna_gem_obj *abo; + + gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]); + if (!gobj) { + ret = -ENOENT; + goto put_shmem_bo; + } + abo = to_xdna_obj(gobj); + + mutex_lock(&abo->lock); + if (abo->pinned) { + mutex_unlock(&abo->lock); + job->bos[i] = gobj; + continue; + } + + ret = amdxdna_gem_pin_nolock(abo); + if (ret) { + mutex_unlock(&abo->lock); + drm_gem_object_put(gobj); + goto put_shmem_bo; + } + abo->pinned = true; + mutex_unlock(&abo->lock); + + job->bos[i] = gobj; + } + + return 0; + +put_shmem_bo: + amdxdna_arg_bos_put(job); + return ret; +} + +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) +{ + trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); + amdxdna_arg_bos_put(job); + amdxdna_gem_put_obj(job->cmd_bo); +} + +int amdxdna_cmd_submit(struct amdxdna_client *client, + u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, + u32 hwctx_hdl, u64 *seq) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_sched_job *job; + struct amdxdna_hwctx *hwctx; + int ret, idx; + + XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); + job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); + if (!job) + return -ENOMEM; + + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { + job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); + if (!job->cmd_bo) { + XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl); + ret = -EINVAL; + goto free_job; + } + } else { + job->cmd_bo = NULL; + } + + ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); + if (ret) { + XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); + goto cmd_put; + } + + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, hwctx_hdl); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", + client->pid, hwctx_hdl); + ret = -EINVAL; + goto unlock_srcu; + } + + if (hwctx->status != HWCTX_STAT_READY) { + XDNA_ERR(xdna, "HW Context is not ready"); + ret = -EINVAL; + goto unlock_srcu; + } + + job->hwctx = hwctx; + job->mm = current->mm; + + job->fence = amdxdna_fence_create(hwctx); + if (!job->fence) { + XDNA_ERR(xdna, "Failed to create fence"); + ret = -ENOMEM; + goto unlock_srcu; + } + kref_init(&job->refcnt); + + ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq); + if (ret) + goto put_fence; + + /* + * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated + * resource after synchronize_srcu(). The submitted jobs should be + * handled by the queue, for example DRM scheduler, in device layer. + * For here we can unlock SRCU. + */ + srcu_read_unlock(&client->hwctx_srcu, idx); + trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed"); + + return 0; + +put_fence: + dma_fence_put(job->fence); +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + amdxdna_arg_bos_put(job); +cmd_put: + amdxdna_gem_put_obj(job->cmd_bo); +free_job: + kfree(job); + return ret; +} + +/* + * The submit command ioctl submits a command to firmware. One firmware command + * may contain multiple command BOs for processing as a whole. + * The command sequence number is returned which can be used for wait command ioctl. + */ +static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, + struct amdxdna_drm_exec_cmd *args) +{ + struct amdxdna_dev *xdna = client->xdna; + u32 *arg_bo_hdls; + u32 cmd_bo_hdl; + int ret; + + if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { + XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); + return -EINVAL; + } + + /* Only support single command for now. */ + if (args->cmd_count != 1) { + XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count); + return -EINVAL; + } + + cmd_bo_hdl = (u32)args->cmd_handles; + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); + if (!arg_bo_hdls) + return -ENOMEM; + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), + args->arg_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_cmd_bo_hdls; + } + + ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, + args->arg_count, args->hwctx, &args->seq); + if (ret) + XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); + +free_cmd_bo_hdls: + kfree(arg_bo_hdls); + if (!ret) + XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq); + return ret; +} + +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_exec_cmd *args = data; + + if (args->ext || args->ext_flags) + return -EINVAL; + + switch (args->type) { + case AMDXDNA_CMD_SUBMIT_EXEC_BUF: + return amdxdna_drm_submit_execbuf(client, args); + } + + XDNA_ERR(client->xdna, "Invalid command type %d", args->type); + return -EINVAL; +} diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h new file mode 100644 index 000000000000..80b0304193ec --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_CTX_H_ +#define _AMDXDNA_CTX_H_ + +#include <linux/bitfield.h> + +#include "amdxdna_gem.h" + +struct amdxdna_hwctx_priv; + +enum ert_cmd_opcode { + ERT_START_CU = 0, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, +}; + +enum ert_cmd_state { + ERT_CMD_STATE_INVALID, + ERT_CMD_STATE_NEW, + ERT_CMD_STATE_QUEUED, + ERT_CMD_STATE_RUNNING, + ERT_CMD_STATE_COMPLETED, + ERT_CMD_STATE_ERROR, + ERT_CMD_STATE_ABORT, + ERT_CMD_STATE_SUBMITTED, + ERT_CMD_STATE_TIMEOUT, + ERT_CMD_STATE_NORESPONSE, +}; + +/* + * Interpretation of the beginning of data payload for ERT_START_NPU in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_start_npu { + u64 buffer; /* instruction buffer address */ + u32 buffer_size; /* size of buffer in bytes */ + u32 prop_count; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + +/* + * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles. + */ +struct amdxdna_cmd_chain { + u32 command_count; + u32 submit_index; + u32 error_index; + u32 reserved[3]; + u64 data[] __counted_by(command_count); +}; + +/* Exec buffer command header format */ +#define AMDXDNA_CMD_STATE GENMASK(3, 0) +#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) +#define AMDXDNA_CMD_COUNT GENMASK(22, 12) +#define AMDXDNA_CMD_OPCODE GENMASK(27, 23) +struct amdxdna_cmd { + u32 header; + u32 data[]; +}; + +struct amdxdna_hwctx { + struct amdxdna_client *client; + struct amdxdna_hwctx_priv *priv; + char *name; + + u32 id; + u32 max_opc; + u32 num_tiles; + u32 mem_size; + u32 fw_ctx_id; + u32 col_list_len; + u32 *col_list; + u32 start_col; + u32 num_col; +#define HWCTX_STAT_INIT 0 +#define HWCTX_STAT_READY 1 +#define HWCTX_STAT_STOP 2 + u32 status; + u32 old_status; + + struct amdxdna_qos_info qos; + struct amdxdna_hwctx_param_config_cu *cus; + u32 syncobj_hdl; +}; + +#define drm_job_to_xdna_job(j) \ + container_of(j, struct amdxdna_sched_job, base) + +struct amdxdna_sched_job { + struct drm_sched_job base; + struct kref refcnt; + struct amdxdna_hwctx *hwctx; + struct mm_struct *mm; + /* The fence to notice DRM scheduler that job is done by hardware */ + struct dma_fence *fence; + /* user can wait on this fence */ + struct dma_fence *out_fence; + bool job_done; + u64 seq; + struct amdxdna_gem_obj *cmd_bo; + size_t bo_cnt; + struct drm_gem_object *bos[] __counted_by(bo_cnt); +}; + +static inline u32 +amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header); +} + +static inline void +amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + cmd->header &= ~AMDXDNA_CMD_STATE; + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s); +} + +static inline enum ert_cmd_state +amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header); +} + +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); + +static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx) +{ + return GENMASK(hwctx->start_col + hwctx->num_col - 1, + hwctx->start_col); +} + +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); +void amdxdna_hwctx_remove_all(struct amdxdna_client *client); +void amdxdna_hwctx_suspend(struct amdxdna_client *client); +void amdxdna_hwctx_resume(struct amdxdna_client *client); + +int amdxdna_cmd_submit(struct amdxdna_client *client, + u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt, + u32 hwctx_hdl, u64 *seq); + +int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, + u64 seq, u32 timeout); + +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +#endif /* _AMDXDNA_CTX_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c new file mode 100644 index 000000000000..606433d73236 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/gpu_scheduler.h> +#include <linux/iosys-map.h> +#include <linux/vmalloc.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +#define XDNA_MAX_CMD_BO_SIZE SZ_32K + +static int +amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) +{ + struct amdxdna_client *client = abo->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_mem *mem = &abo->mem; + u64 offset; + u32 align; + int ret; + + align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift); + ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node, + mem->size, align, + 0, DRM_MM_INSERT_BEST); + if (ret) { + XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + return ret; + } + + mem->dev_addr = abo->mm_node.start; + offset = mem->dev_addr - abo->dev_heap->mem.dev_addr; + mem->userptr = abo->dev_heap->mem.userptr + offset; + mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT]; + mem->nr_pages = mem->size >> PAGE_SHIFT; + + if (use_vmap) { + mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL); + if (!mem->kva) { + XDNA_ERR(xdna, "Failed to vmap"); + drm_mm_remove_node(&abo->mm_node); + return -EFAULT; + } + } + + return 0; +} + +static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + if (abo->pinned) + amdxdna_gem_unpin(abo); + + if (abo->type == AMDXDNA_BO_DEV) { + mutex_lock(&abo->client->mm_lock); + drm_mm_remove_node(&abo->mm_node); + mutex_unlock(&abo->client->mm_lock); + + vunmap(abo->mem.kva); + drm_gem_object_put(to_gobj(abo->dev_heap)); + drm_gem_object_release(gobj); + mutex_destroy(&abo->lock); + kfree(abo); + return; + } + + if (abo->type == AMDXDNA_BO_DEV_HEAP) + drm_mm_takedown(&abo->mm); + + drm_gem_vunmap_unlocked(gobj, &map); + mutex_destroy(&abo->lock); + drm_gem_shmem_free(&abo->base); +} + +static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { + .free = amdxdna_gem_obj_free, +}; + +static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj, + mem.notifier); + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + + XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d", + abo->mem.userptr, abo->mem.size, abo->type); + + if (!mmu_notifier_range_blockable(range)) + return false; + + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + + return true; +} + +static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { + .invalidate = amdxdna_hmm_invalidate, +}; + +static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + + if (!xdna->dev_info->ops->hmm_invalidate) + return; + + mmu_interval_notifier_remove(&abo->mem.notifier); + kvfree(abo->mem.pfns); + abo->mem.pfns = NULL; +} + +static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr, + size_t len) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + u32 nr_pages; + int ret; + + if (!xdna->dev_info->ops->hmm_invalidate) + return 0; + + if (abo->mem.pfns) + return -EEXIST; + + nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; + abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns), + GFP_KERNEL); + if (!abo->mem.pfns) + return -ENOMEM; + + ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier, + current->mm, + addr, + len, + &amdxdna_hmm_ops); + if (ret) { + XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); + kvfree(abo->mem.pfns); + } + abo->mem.userptr = addr; + + return ret; +} + +static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, + struct vm_area_struct *vma) +{ + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + unsigned long num_pages; + int ret; + + ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size); + if (ret) + return ret; + + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) + goto hmm_unreg; + + num_pages = gobj->size >> PAGE_SHIFT; + /* Try to insert the pages */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages); + if (ret) + XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret); + + return 0; + +hmm_unreg: + amdxdna_hmm_unregister(abo); + return ret; +} + +static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf) +{ + return drm_gem_shmem_vm_ops.fault(vmf); +} + +static void amdxdna_gem_vm_open(struct vm_area_struct *vma) +{ + drm_gem_shmem_vm_ops.open(vma); +} + +static void amdxdna_gem_vm_close(struct vm_area_struct *vma) +{ + struct drm_gem_object *gobj = vma->vm_private_data; + + amdxdna_hmm_unregister(to_xdna_obj(gobj)); + drm_gem_shmem_vm_ops.close(vma); +} + +static const struct vm_operations_struct amdxdna_gem_vm_ops = { + .fault = amdxdna_gem_vm_fault, + .open = amdxdna_gem_vm_open, + .close = amdxdna_gem_vm_close, +}; + +static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { + .free = amdxdna_gem_obj_free, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = amdxdna_gem_obj_mmap, + .vm_ops = &amdxdna_gem_vm_ops, +}; + +static struct amdxdna_gem_obj * +amdxdna_gem_create_obj(struct drm_device *dev, size_t size) +{ + struct amdxdna_gem_obj *abo; + + abo = kzalloc(sizeof(*abo), GFP_KERNEL); + if (!abo) + return ERR_PTR(-ENOMEM); + + abo->pinned = false; + abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; + mutex_init(&abo->lock); + + abo->mem.userptr = AMDXDNA_INVALID_ADDR; + abo->mem.dev_addr = AMDXDNA_INVALID_ADDR; + abo->mem.size = size; + + return abo; +} + +/* For drm_driver->gem_create_object callback */ +struct drm_gem_object * +amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size) +{ + struct amdxdna_gem_obj *abo; + + abo = amdxdna_gem_create_obj(dev, size); + if (IS_ERR(abo)) + return ERR_CAST(abo); + + to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs; + + return to_gobj(abo); +} + +static struct amdxdna_gem_obj * +amdxdna_drm_alloc_shmem(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct drm_gem_shmem_object *shmem; + struct amdxdna_gem_obj *abo; + + shmem = drm_gem_shmem_create(dev, args->size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); + + shmem->map_wc = false; + + abo = to_xdna_obj(&shmem->base); + abo->client = client; + abo->type = AMDXDNA_BO_SHMEM; + + return abo; +} + +static struct amdxdna_gem_obj * +amdxdna_drm_create_dev_heap(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct drm_gem_shmem_object *shmem; + struct amdxdna_gem_obj *abo; + int ret; + + if (args->size > xdna->dev_info->dev_mem_size) { + XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx", + args->size, xdna->dev_info->dev_mem_size); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&client->mm_lock); + if (client->dev_heap) { + XDNA_DBG(client->xdna, "dev heap is already created"); + ret = -EBUSY; + goto mm_unlock; + } + + shmem = drm_gem_shmem_create(dev, args->size); + if (IS_ERR(shmem)) { + ret = PTR_ERR(shmem); + goto mm_unlock; + } + + shmem->map_wc = false; + abo = to_xdna_obj(&shmem->base); + + abo->type = AMDXDNA_BO_DEV_HEAP; + abo->client = client; + abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base; + drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size); + + client->dev_heap = abo; + drm_gem_object_get(to_gobj(abo)); + mutex_unlock(&client->mm_lock); + + return abo; + +mm_unlock: + mutex_unlock(&client->mm_lock); + return ERR_PTR(ret); +} + +struct amdxdna_gem_obj * +amdxdna_drm_alloc_dev_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp, bool use_vmap) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + size_t aligned_sz = PAGE_ALIGN(args->size); + struct amdxdna_gem_obj *abo, *heap; + int ret; + + mutex_lock(&client->mm_lock); + heap = client->dev_heap; + if (!heap) { + ret = -EINVAL; + goto mm_unlock; + } + + if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) { + XDNA_ERR(xdna, "Invalid dev heap userptr"); + ret = -EINVAL; + goto mm_unlock; + } + + if (args->size > heap->mem.size) { + XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx", + args->size, heap->mem.size); + ret = -EINVAL; + goto mm_unlock; + } + + abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz); + if (IS_ERR(abo)) { + ret = PTR_ERR(abo); + goto mm_unlock; + } + to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs; + abo->type = AMDXDNA_BO_DEV; + abo->client = client; + abo->dev_heap = heap; + ret = amdxdna_gem_insert_node_locked(abo, use_vmap); + if (ret) { + XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + goto mm_unlock; + } + + drm_gem_object_get(to_gobj(heap)); + drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz); + + mutex_unlock(&client->mm_lock); + return abo; + +mm_unlock: + mutex_unlock(&client->mm_lock); + return ERR_PTR(ret); +} + +static struct amdxdna_gem_obj * +amdxdna_drm_create_cmd_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct drm_gem_shmem_object *shmem; + struct amdxdna_gem_obj *abo; + struct iosys_map map; + int ret; + + if (args->size > XDNA_MAX_CMD_BO_SIZE) { + XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size); + return ERR_PTR(-EINVAL); + } + + if (args->size < sizeof(struct amdxdna_cmd)) { + XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size); + return ERR_PTR(-EINVAL); + } + + shmem = drm_gem_shmem_create(dev, args->size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); + + shmem->map_wc = false; + abo = to_xdna_obj(&shmem->base); + + abo->type = AMDXDNA_BO_CMD; + abo->client = filp->driver_priv; + + ret = drm_gem_vmap_unlocked(to_gobj(abo), &map); + if (ret) { + XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); + goto release_obj; + } + abo->mem.kva = map.vaddr; + + return abo; + +release_obj: + drm_gem_shmem_free(shmem); + return ERR_PTR(ret); +} + +int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_create_bo *args = data; + struct amdxdna_gem_obj *abo; + int ret; + + if (args->flags || args->vaddr || !args->size) + return -EINVAL; + + XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx", + args->type, args->vaddr, args->size, args->flags); + switch (args->type) { + case AMDXDNA_BO_SHMEM: + abo = amdxdna_drm_alloc_shmem(dev, args, filp); + break; + case AMDXDNA_BO_DEV_HEAP: + abo = amdxdna_drm_create_dev_heap(dev, args, filp); + break; + case AMDXDNA_BO_DEV: + abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false); + break; + case AMDXDNA_BO_CMD: + abo = amdxdna_drm_create_cmd_bo(dev, args, filp); + break; + default: + return -EINVAL; + } + if (IS_ERR(abo)) + return PTR_ERR(abo); + + /* ready to publish object to userspace */ + ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle); + if (ret) { + XDNA_ERR(xdna, "Create handle failed"); + goto put_obj; + } + + XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx", + args->handle, args->type, abo->mem.userptr, + abo->mem.dev_addr, abo->mem.size); +put_obj: + /* Dereference object reference. Handle holds it now. */ + drm_gem_object_put(to_gobj(abo)); + return ret; +} + +int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + int ret; + + switch (abo->type) { + case AMDXDNA_BO_SHMEM: + case AMDXDNA_BO_DEV_HEAP: + ret = drm_gem_shmem_pin(&abo->base); + break; + case AMDXDNA_BO_DEV: + ret = drm_gem_shmem_pin(&abo->dev_heap->base); + break; + default: + ret = -EOPNOTSUPP; + } + + XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret); + return ret; +} + +int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) +{ + int ret; + + if (abo->type == AMDXDNA_BO_DEV) + abo = abo->dev_heap; + + mutex_lock(&abo->lock); + ret = amdxdna_gem_pin_nolock(abo); + mutex_unlock(&abo->lock); + + return ret; +} + +void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) +{ + if (abo->type == AMDXDNA_BO_DEV) + abo = abo->dev_heap; + + mutex_lock(&abo->lock); + drm_gem_shmem_unpin(&abo->base); + mutex_unlock(&abo->lock); +} + +struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, + u32 bo_hdl, u8 bo_type) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + + gobj = drm_gem_object_lookup(client->filp, bo_hdl); + if (!gobj) { + XDNA_DBG(xdna, "Can not find bo %d", bo_hdl); + return NULL; + } + + abo = to_xdna_obj(gobj); + if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type) + return abo; + + drm_gem_object_put(gobj); + return NULL; +} + +int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_drm_get_bo_info *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret = 0; + + if (args->ext || args->ext_flags || args->pad) + return -EINVAL; + + gobj = drm_gem_object_lookup(filp, args->handle); + if (!gobj) { + XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle); + return -ENOENT; + } + + abo = to_xdna_obj(gobj); + args->vaddr = abo->mem.userptr; + args->xdna_addr = abo->mem.dev_addr; + + if (abo->type != AMDXDNA_BO_DEV) + args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node); + else + args->map_offset = AMDXDNA_INVALID_ADDR; + + XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx", + args->handle, args->map_offset, args->vaddr, args->xdna_addr); + + drm_gem_object_put(gobj); + return ret; +} + +/* + * The sync bo ioctl is to make sure the CPU cache is in sync with memory. + * This is required because NPU is not cache coherent device. CPU cache + * flushing/invalidation is expensive so it is best to handle this outside + * of the command submission path. This ioctl allows explicit cache + * flushing/invalidation outside of the critical path. + */ +int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, + void *data, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_sync_bo *args = data; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret; + + gobj = drm_gem_object_lookup(filp, args->handle); + if (!gobj) { + XDNA_ERR(xdna, "Lookup GEM object failed"); + return -ENOENT; + } + abo = to_xdna_obj(gobj); + + ret = amdxdna_gem_pin(abo); + if (ret) { + XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret); + goto put_obj; + } + + if (abo->type == AMDXDNA_BO_DEV) + drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages); + else + drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); + + amdxdna_gem_unpin(abo); + + XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n", + args->handle, args->offset, args->size); + +put_obj: + drm_gem_object_put(gobj); + return ret; +} diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h new file mode 100644 index 000000000000..8ccc0375dd9d --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_GEM_H_ +#define _AMDXDNA_GEM_H_ + +struct amdxdna_mem { + u64 userptr; + void *kva; + u64 dev_addr; + size_t size; + struct page **pages; + u32 nr_pages; + struct mmu_interval_notifier notifier; + unsigned long *pfns; + bool map_invalid; +}; + +struct amdxdna_gem_obj { + struct drm_gem_shmem_object base; + struct amdxdna_client *client; + u8 type; + bool pinned; + struct mutex lock; /* Protects: pinned */ + struct amdxdna_mem mem; + + /* Below members is uninitialized when needed */ + struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */ + struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */ + struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ + u32 assigned_hwctx; +}; + +#define to_gobj(obj) (&(obj)->base.base) + +static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj) +{ + return container_of(gobj, struct amdxdna_gem_obj, base.base); +} + +struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, + u32 bo_hdl, u8 bo_type); +static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) +{ + drm_gem_object_put(to_gobj(abo)); +} + +struct drm_gem_object * +amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size); +struct amdxdna_gem_obj * +amdxdna_drm_alloc_dev_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp, bool use_vmap); + +int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo); +int amdxdna_gem_pin(struct amdxdna_gem_obj *abo); +void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo); + +int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +#endif /* _AMDXDNA_GEM_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c new file mode 100644 index 000000000000..e5301fac1397 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -0,0 +1,562 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <linux/bitfield.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/slab.h> +#include <linux/xarray.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/amdxdna.h> + +#include "amdxdna_mailbox.h" + +#define MB_ERR(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) +#define MB_DBG(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) +#define MB_WARN_ONCE(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) + +#define MAGIC_VAL 0x1D000000U +#define MAGIC_VAL_MASK 0xFF000000 +#define MAX_MSG_ID_ENTRIES 256 +#define MSG_RX_TIMER 200 /* milliseconds */ +#define MAILBOX_NAME "xdna_mailbox" + +enum channel_res_type { + CHAN_RES_X2I, + CHAN_RES_I2X, + CHAN_RES_NUM +}; + +struct mailbox { + struct device *dev; + struct xdna_mailbox_res res; +}; + +struct mailbox_channel { + struct mailbox *mb; + struct xdna_mailbox_chann_res res[CHAN_RES_NUM]; + int msix_irq; + u32 iohub_int_addr; + struct xarray chan_xa; + u32 next_msgid; + u32 x2i_tail; + + /* Received msg related fields */ + struct workqueue_struct *work_q; + struct work_struct rx_work; + u32 i2x_head; + bool bad_state; +}; + +#define MSG_BODY_SZ GENMASK(10, 0) +#define MSG_PROTO_VER GENMASK(23, 16) +struct xdna_msg_header { + __u32 total_size; + __u32 sz_ver; + __u32 id; + __u32 opcode; +} __packed; + +static_assert(sizeof(struct xdna_msg_header) == 16); + +struct mailbox_pkg { + struct xdna_msg_header header; + __u32 payload[]; +}; + +/* The protocol version. */ +#define MSG_PROTOCOL_VERSION 0x1 +/* The tombstone value. */ +#define TOMBSTONE 0xDEADFACE + +struct mailbox_msg { + void *handle; + int (*notify_cb)(void *handle, const u32 *data, size_t size); + size_t pkg_size; /* package size in bytes */ + struct mailbox_pkg pkg; +}; + +static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + + writel(data, ringbuf_addr); +} + +static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + + return readl(ringbuf_addr); +} + +static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + int ret, value; + + /* Poll till value is not zero */ + ret = readx_poll_timeout(readl, ringbuf_addr, value, + value, 1 /* us */, 100); + if (ret < 0) + return ret; + + *val = value; + return 0; +} + +static inline void +mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val) +{ + mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val); + mb_chann->i2x_head = headptr_val; +} + +static inline void +mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val) +{ + mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val); + mb_chann->x2i_tail = tailptr_val; +} + +static inline u32 +mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg); +} + +static inline u32 +mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg); +} + +static inline u32 +mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mb_chann->res[type].rb_size; +} + +static inline int mailbox_validate_msgid(int msg_id) +{ + return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL; +} + +static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) +{ + u32 msg_id; + int ret; + + ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg, + XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1), + &mb_chann->next_msgid, GFP_NOWAIT); + if (ret < 0) + return ret; + + /* + * Add MAGIC_VAL to the higher bits. + */ + msg_id |= MAGIC_VAL; + return msg_id; +} + +static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id) +{ + msg_id &= ~MAGIC_VAL_MASK; + xa_erase_irq(&mb_chann->chan_xa, msg_id); +} + +static void mailbox_release_msg(struct mailbox_channel *mb_chann, + struct mailbox_msg *mb_msg) +{ + MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x", + mb_msg->pkg.header.id, mb_msg->pkg.header.opcode); + mb_msg->notify_cb(mb_msg->handle, NULL, 0); + kfree(mb_msg); +} + +static int +mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) +{ + void __iomem *write_addr; + u32 ringbuf_size; + u32 head, tail; + u32 start_addr; + u32 tmp_tail; + + head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I); + tail = mb_chann->x2i_tail; + ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I); + start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr; + tmp_tail = tail + mb_msg->pkg_size; + + if (tail < head && tmp_tail >= head) + goto no_space; + + if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) && + mb_msg->pkg_size >= head)) + goto no_space; + + if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) { + write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; + writel(TOMBSTONE, write_addr); + + /* tombstone is set. Write from the start of the ringbuf */ + tail = 0; + } + + write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; + memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size); + mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size); + + trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq, + mb_msg->pkg.header.opcode, + mb_msg->pkg.header.id); + + return 0; + +no_space: + return -ENOSPC; +} + +static int +mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header, + void *data) +{ + struct mailbox_msg *mb_msg; + int msg_id; + int ret; + + msg_id = header->id; + if (!mailbox_validate_msgid(msg_id)) { + MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id); + return -EINVAL; + } + + msg_id &= ~MAGIC_VAL_MASK; + mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id); + if (!mb_msg) { + MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id); + return -EINVAL; + } + + MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", + header->opcode, header->total_size, header->id); + ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size); + if (unlikely(ret)) + MB_ERR(mb_chann, "Message callback ret %d", ret); + + kfree(mb_msg); + return ret; +} + +static int mailbox_get_msg(struct mailbox_channel *mb_chann) +{ + struct xdna_msg_header header; + void __iomem *read_addr; + u32 msg_size, rest; + u32 ringbuf_size; + u32 head, tail; + u32 start_addr; + int ret; + + if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail)) + return -EINVAL; + head = mb_chann->i2x_head; + ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X); + start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr; + + if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) { + MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail); + return -EINVAL; + } + + /* ringbuf empty */ + if (head == tail) + return -ENOENT; + + if (head == ringbuf_size) + head = 0; + + /* Peek size of the message or TOMBSTONE */ + read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head; + header.total_size = readl(read_addr); + /* size is TOMBSTONE, set next read from 0 */ + if (header.total_size == TOMBSTONE) { + if (head < tail) { + MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x", + head, tail); + return -EINVAL; + } + mailbox_set_headptr(mb_chann, 0); + return 0; + } + + if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) { + MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size); + return -EINVAL; + } + msg_size = sizeof(header) + header.total_size; + + if (msg_size > ringbuf_size - head || msg_size > tail - head) { + MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d", + msg_size, tail, head); + return -EINVAL; + } + + rest = sizeof(header) - sizeof(u32); + read_addr += sizeof(u32); + memcpy_fromio((u32 *)&header + 1, read_addr, rest); + read_addr += rest; + + ret = mailbox_get_resp(mb_chann, &header, (u32 *)read_addr); + + mailbox_set_headptr(mb_chann, head + msg_size); + /* After update head, it can equal to ringbuf_size. This is expected. */ + trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq, + header.opcode, header.id); + + return ret; +} + +static irqreturn_t mailbox_irq_handler(int irq, void *p) +{ + struct mailbox_channel *mb_chann = p; + + trace_mbox_irq_handle(MAILBOX_NAME, irq); + /* Schedule a rx_work to call the callback functions */ + queue_work(mb_chann->work_q, &mb_chann->rx_work); + /* Clear IOHUB register */ + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); + + return IRQ_HANDLED; +} + +static void mailbox_rx_worker(struct work_struct *rx_work) +{ + struct mailbox_channel *mb_chann; + int ret; + + mb_chann = container_of(rx_work, struct mailbox_channel, rx_work); + + if (READ_ONCE(mb_chann->bad_state)) { + MB_ERR(mb_chann, "Channel in bad state, work aborted"); + return; + } + + while (1) { + /* + * If return is 0, keep consuming next message, until there is + * no messages or an error happened. + */ + ret = mailbox_get_msg(mb_chann); + if (ret == -ENOENT) + break; + + /* Other error means device doesn't look good, disable irq. */ + if (unlikely(ret)) { + MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret); + WRITE_ONCE(mb_chann->bad_state, true); + disable_irq(mb_chann->msix_irq); + break; + } + } +} + +int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_msg *msg, u64 tx_timeout) +{ + struct xdna_msg_header *header; + struct mailbox_msg *mb_msg; + size_t pkg_size; + int ret; + + pkg_size = sizeof(*header) + msg->send_size; + if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) { + MB_ERR(mb_chann, "Message size larger than ringbuf size"); + return -EINVAL; + } + + if (unlikely(!IS_ALIGNED(msg->send_size, 4))) { + MB_ERR(mb_chann, "Message must be 4 bytes align"); + return -EINVAL; + } + + /* The fist word in payload can NOT be TOMBSTONE */ + if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) { + MB_ERR(mb_chann, "Tomb stone in data"); + return -EINVAL; + } + + if (READ_ONCE(mb_chann->bad_state)) { + MB_ERR(mb_chann, "Channel in bad state"); + return -EPIPE; + } + + mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL); + if (!mb_msg) + return -ENOMEM; + + mb_msg->handle = msg->handle; + mb_msg->notify_cb = msg->notify_cb; + mb_msg->pkg_size = pkg_size; + + header = &mb_msg->pkg.header; + /* + * Hardware use total_size and size to split huge message. + * We do not support it here. Thus the values are the same. + */ + header->total_size = msg->send_size; + header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) | + FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION); + header->opcode = msg->opcode; + memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size); + + ret = mailbox_acquire_msgid(mb_chann, mb_msg); + if (unlikely(ret < 0)) { + MB_ERR(mb_chann, "mailbox_acquire_msgid failed"); + goto msg_id_failed; + } + header->id = ret; + + MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", + header->opcode, header->total_size, header->id); + + ret = mailbox_send_msg(mb_chann, mb_msg); + if (ret) { + MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret); + goto release_id; + } + + return 0; + +release_id: + mailbox_release_msgid(mb_chann, header->id); +msg_id_failed: + kfree(mb_msg); + return ret; +} + +struct mailbox_channel * +xdna_mailbox_create_channel(struct mailbox *mb, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 iohub_int_addr, + int mb_irq) +{ + struct mailbox_channel *mb_chann; + int ret; + + if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { + pr_err("Ring buf size must be power of 2"); + return NULL; + } + + mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL); + if (!mb_chann) + return NULL; + + mb_chann->mb = mb; + mb_chann->msix_irq = mb_irq; + mb_chann->iohub_int_addr = iohub_int_addr; + memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); + memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x)); + + xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); + mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); + mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); + + INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); + mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); + if (!mb_chann->work_q) { + MB_ERR(mb_chann, "Create workqueue failed"); + goto free_and_out; + } + + /* Everything look good. Time to enable irq handler */ + ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann); + if (ret) { + MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret); + goto destroy_wq; + } + + mb_chann->bad_state = false; + + MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); + return mb_chann; + +destroy_wq: + destroy_workqueue(mb_chann->work_q); +free_and_out: + kfree(mb_chann); + return NULL; +} + +int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) +{ + struct mailbox_msg *mb_msg; + unsigned long msg_id; + + MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); + free_irq(mb_chann->msix_irq, mb_chann); + destroy_workqueue(mb_chann->work_q); + /* We can clean up and release resources */ + + xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) + mailbox_release_msg(mb_chann, mb_msg); + + xa_destroy(&mb_chann->chan_xa); + + MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); + kfree(mb_chann); + return 0; +} + +void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) +{ + /* Disable an irq and wait. This might sleep. */ + disable_irq(mb_chann->msix_irq); + + /* Cancel RX work and wait for it to finish */ + cancel_work_sync(&mb_chann->rx_work); + MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); +} + +struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, + const struct xdna_mailbox_res *res) +{ + struct mailbox *mb; + + mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL); + if (!mb) + return NULL; + mb->dev = ddev->dev; + + /* mailbox and ring buf base and size information */ + memcpy(&mb->res, res, sizeof(*res)); + + return mb; +} diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h new file mode 100644 index 000000000000..57954c303bdd --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_MAILBOX_H_ +#define _AIE2_MAILBOX_H_ + +struct mailbox; +struct mailbox_channel; + +/* + * xdna_mailbox_msg - message struct + * + * @opcode: opcode for firmware + * @handle: handle used for the notify callback + * @notify_cb: callback function to notify the sender when there is response + * @send_data: pointing to sending data + * @send_size: size of the sending data + * + * The mailbox will split the sending data in to multiple firmware message if + * the size of the data is too big. This is transparent to the sender. The + * sender will receive one notification. + */ +struct xdna_mailbox_msg { + u32 opcode; + void *handle; + int (*notify_cb)(void *handle, const u32 *data, size_t size); + u8 *send_data; + size_t send_size; +}; + +/* + * xdna_mailbox_res - mailbox hardware resource + * + * @ringbuf_base: ring buffer base address + * @ringbuf_size: ring buffer size + * @mbox_base: mailbox base address + * @mbox_size: mailbox size + */ +struct xdna_mailbox_res { + void __iomem *ringbuf_base; + size_t ringbuf_size; + void __iomem *mbox_base; + size_t mbox_size; + const char *name; +}; + +/* + * xdna_mailbox_chann_res - resources + * + * @rb_start_addr: ring buffer start address + * @rb_size: ring buffer size + * @mb_head_ptr_reg: mailbox head pointer register + * @mb_tail_ptr_reg: mailbox tail pointer register + */ +struct xdna_mailbox_chann_res { + u32 rb_start_addr; + u32 rb_size; + u32 mb_head_ptr_reg; + u32 mb_tail_ptr_reg; +}; + +/* + * xdna_mailbox_create() -- create mailbox subsystem and initialize + * + * @ddev: device pointer + * @res: SRAM and mailbox resources + * + * Return: If success, return a handle of mailbox subsystem. + * Otherwise, return NULL pointer. + */ +struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, + const struct xdna_mailbox_res *res); + +/* + * xdna_mailbox_create_channel() -- Create a mailbox channel instance + * + * @mailbox: the handle return from xdna_mailbox_create() + * @x2i: host to firmware mailbox resources + * @i2x: firmware to host mailbox resources + * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt + * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index + * + * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. + */ +struct mailbox_channel * +xdna_mailbox_create_channel(struct mailbox *mailbox, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 xdna_mailbox_intr_reg, + int mb_irq); + +/* + * xdna_mailbox_destroy_channel() -- destroy mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Return: if success, return 0. otherwise return error code + */ +int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); + +/* + * xdna_mailbox_stop_channel() -- stop mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Return: if success, return 0. otherwise return error code + */ +void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann); + +/* + * xdna_mailbox_send_msg() -- Send a message + * + * @mailbox_chann: Mailbox channel handle + * @msg: message struct for message information + * @tx_timeout: the timeout value for sending the message in ms. + * + * Return: If success return 0, otherwise, return error code + */ +int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann, + const struct xdna_mailbox_msg *msg, u64 tx_timeout); + +#endif /* _AIE2_MAILBOX_ */ diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c new file mode 100644 index 000000000000..5139a9c96a91 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/gpu_scheduler.h> +#include <linux/completion.h> + +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_mailbox_helper.h" +#include "amdxdna_pci_drv.h" + +int xdna_msg_cb(void *handle, const u32 *data, size_t size) +{ + struct xdna_notify *cb_arg = handle; + int ret; + + if (unlikely(!data)) + goto out; + + if (unlikely(cb_arg->size != size)) { + cb_arg->error = -EINVAL; + goto out; + } + + print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET, + 16, 4, data, cb_arg->size, true); + memcpy(cb_arg->data, data, cb_arg->size); +out: + ret = cb_arg->error; + complete(&cb_arg->comp); + return ret; +} + +int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann, + struct xdna_mailbox_msg *msg) +{ + struct xdna_notify *hdl = msg->handle; + int ret; + + ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed, ret %d", ret); + return ret; + } + + ret = wait_for_completion_timeout(&hdl->comp, + msecs_to_jiffies(RX_TIMEOUT)); + if (!ret) { + XDNA_ERR(xdna, "Wait for completion timeout"); + return -ETIME; + } + + return hdl->error; +} diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h new file mode 100644 index 000000000000..23e1317b79fe --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_MAILBOX_HELPER_H +#define _AMDXDNA_MAILBOX_HELPER_H + +#define TX_TIMEOUT 2000 /* milliseconds */ +#define RX_TIMEOUT 5000 /* milliseconds */ + +struct amdxdna_dev; + +struct xdna_notify { + struct completion comp; + u32 *data; + size_t size; + int error; +}; + +#define DECLARE_XDNA_MSG_COMMON(name, op, status) \ + struct name##_req req = { 0 }; \ + struct name##_resp resp = { status }; \ + struct xdna_notify hdl = { \ + .error = 0, \ + .data = (u32 *)&resp, \ + .size = sizeof(resp), \ + .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \ + }; \ + struct xdna_mailbox_msg msg = { \ + .send_data = (u8 *)&req, \ + .send_size = sizeof(req), \ + .handle = &hdl, \ + .opcode = op, \ + .notify_cb = xdna_msg_cb, \ + } + +int xdna_msg_cb(void *handle, const u32 *data, size_t size); +int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann, + struct xdna_mailbox_msg *msg); + +#endif /* _AMDXDNA_MAILBOX_HELPER_H */ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c new file mode 100644 index 000000000000..f5b8497cf5ad --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_managed.h> +#include <drm/gpu_scheduler.h> +#include <linux/iommu.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ + +MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); + +/* + * Bind the driver base on (vendor_id, device_id) pair and later use the + * (device_id, rev_id) pair as a key to select the devices. The devices with + * same device_id have very similar interface to host driver. + */ +static const struct pci_device_id pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) }, + {0} +}; + +MODULE_DEVICE_TABLE(pci, pci_ids); + +static const struct amdxdna_device_id amdxdna_ids[] = { + { 0x1502, 0x0, &dev_npu1_info }, + { 0x17f0, 0x0, &dev_npu2_info }, + { 0x17f0, 0x10, &dev_npu4_info }, + { 0x17f0, 0x11, &dev_npu5_info }, + { 0x17f0, 0x20, &dev_npu6_info }, + {0} +}; + +static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_client *client; + int ret; + + ret = pm_runtime_resume_and_get(ddev->dev); + if (ret) { + XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret); + return ret; + } + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) { + ret = -ENOMEM; + goto put_rpm; + } + + client->pid = pid_nr(rcu_access_pointer(filp->pid)); + client->xdna = xdna; + + client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm); + if (IS_ERR(client->sva)) { + ret = PTR_ERR(client->sva); + XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret); + goto failed; + } + client->pasid = iommu_sva_get_pasid(client->sva); + if (client->pasid == IOMMU_PASID_INVALID) { + XDNA_ERR(xdna, "SVA get pasid failed"); + ret = -ENODEV; + goto unbind_sva; + } + mutex_init(&client->hwctx_lock); + init_srcu_struct(&client->hwctx_srcu); + xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); + mutex_init(&client->mm_lock); + + mutex_lock(&xdna->dev_lock); + list_add_tail(&client->node, &xdna->client_list); + mutex_unlock(&xdna->dev_lock); + + filp->driver_priv = client; + client->filp = filp; + + XDNA_DBG(xdna, "pid %d opened", client->pid); + return 0; + +unbind_sva: + iommu_sva_unbind_device(client->sva); +failed: + kfree(client); +put_rpm: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return ret; +} + +static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + + XDNA_DBG(xdna, "closing pid %d", client->pid); + + xa_destroy(&client->hwctx_xa); + cleanup_srcu_struct(&client->hwctx_srcu); + mutex_destroy(&client->hwctx_lock); + mutex_destroy(&client->mm_lock); + if (client->dev_heap) + drm_gem_object_put(to_gobj(client->dev_heap)); + + iommu_sva_unbind_device(client->sva); + + XDNA_DBG(xdna, "pid %d closed", client->pid); + kfree(client); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); +} + +static int amdxdna_flush(struct file *f, fl_owner_t id) +{ + struct drm_file *filp = f->private_data; + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = client->xdna; + int idx; + + XDNA_DBG(xdna, "PID %d flushing...", client->pid); + if (!drm_dev_enter(&xdna->ddev, &idx)) + return 0; + + mutex_lock(&xdna->dev_lock); + list_del_init(&client->node); + mutex_unlock(&xdna->dev_lock); + amdxdna_hwctx_remove_all(client); + + drm_dev_exit(idx); + return 0; +} + +static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_get_info *args = data; + int ret; + + if (!xdna->dev_info->ops->get_aie_info) + return -EOPNOTSUPP; + + XDNA_DBG(xdna, "Request parameter %u", args->param); + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->get_aie_info(client, args); + mutex_unlock(&xdna->dev_lock); + return ret; +} + +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_set_state *args = data; + int ret; + + if (!xdna->dev_info->ops->set_aie_state) + return -EOPNOTSUPP; + + XDNA_DBG(xdna, "Request parameter %u", args->param); + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->set_aie_state(client, args); + mutex_unlock(&xdna->dev_lock); + + return ret; +} + +static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { + /* Context */ + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0), + /* BO */ + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), + /* Execution */ + DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), + /* AIE hardware */ + DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY), +}; + +static const struct file_operations amdxdna_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .flush = amdxdna_flush, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .poll = drm_poll, + .read = drm_read, + .llseek = noop_llseek, + .mmap = drm_gem_mmap, + .fop_flags = FOP_UNSIGNED_OFFSET, +}; + +const struct drm_driver amdxdna_drm_drv = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL | + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + .fops = &amdxdna_fops, + .name = "amdxdna_accel_driver", + .desc = "AMD XDNA DRM implementation", + .open = amdxdna_drm_open, + .postclose = amdxdna_drm_close, + .ioctls = amdxdna_drm_ioctls, + .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), + + .gem_create_object = amdxdna_gem_create_object_cb, +}; + +static const struct amdxdna_dev_info * +amdxdna_get_dev_info(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) { + if (pdev->device == amdxdna_ids[i].device && + pdev->revision == amdxdna_ids[i].revision) + return amdxdna_ids[i].dev_info; + } + return NULL; +} + +static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct amdxdna_dev *xdna; + int ret; + + xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev); + if (IS_ERR(xdna)) + return PTR_ERR(xdna); + + xdna->dev_info = amdxdna_get_dev_info(pdev); + if (!xdna->dev_info) + return -ENODEV; + + drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); + init_rwsem(&xdna->notifier_lock); + INIT_LIST_HEAD(&xdna->client_list); + pci_set_drvdata(pdev, xdna); + + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&xdna->notifier_lock); + fs_reclaim_release(GFP_KERNEL); + } + + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->init(xdna); + mutex_unlock(&xdna->dev_lock); + if (ret) { + XDNA_ERR(xdna, "Hardware init failed, ret %d", ret); + return ret; + } + + ret = amdxdna_sysfs_init(xdna); + if (ret) { + XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret); + goto failed_dev_fini; + } + + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(dev); + pm_runtime_allow(dev); + + ret = drm_dev_register(&xdna->ddev, 0); + if (ret) { + XDNA_ERR(xdna, "DRM register failed, ret %d", ret); + pm_runtime_forbid(dev); + goto failed_sysfs_fini; + } + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + return 0; + +failed_sysfs_fini: + amdxdna_sysfs_fini(xdna); +failed_dev_fini: + mutex_lock(&xdna->dev_lock); + xdna->dev_info->ops->fini(xdna); + mutex_unlock(&xdna->dev_lock); + return ret; +} + +static void amdxdna_remove(struct pci_dev *pdev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(pdev); + struct device *dev = &pdev->dev; + struct amdxdna_client *client; + + pm_runtime_get_noresume(dev); + pm_runtime_forbid(dev); + + drm_dev_unplug(&xdna->ddev); + amdxdna_sysfs_fini(xdna); + + mutex_lock(&xdna->dev_lock); + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + while (client) { + list_del_init(&client->node); + mutex_unlock(&xdna->dev_lock); + + amdxdna_hwctx_remove_all(client); + + mutex_lock(&xdna->dev_lock); + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + } + + xdna->dev_info->ops->fini(xdna); + mutex_unlock(&xdna->dev_lock); +} + +static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna) +{ + if (xdna->dev_info->ops->suspend) + xdna->dev_info->ops->suspend(xdna); + + return 0; +} + +static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna) +{ + if (xdna->dev_info->ops->resume) + return xdna->dev_info->ops->resume(xdna); + + return 0; +} + +static int amdxdna_pmops_suspend(struct device *dev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); + struct amdxdna_client *client; + + mutex_lock(&xdna->dev_lock); + list_for_each_entry(client, &xdna->client_list, node) + amdxdna_hwctx_suspend(client); + + amdxdna_dev_suspend_nolock(xdna); + mutex_unlock(&xdna->dev_lock); + + return 0; +} + +static int amdxdna_pmops_resume(struct device *dev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); + struct amdxdna_client *client; + int ret; + + XDNA_INFO(xdna, "firmware resuming..."); + mutex_lock(&xdna->dev_lock); + ret = amdxdna_dev_resume_nolock(xdna); + if (ret) { + XDNA_ERR(xdna, "resume NPU firmware failed"); + mutex_unlock(&xdna->dev_lock); + return ret; + } + + XDNA_INFO(xdna, "hardware context resuming..."); + list_for_each_entry(client, &xdna->client_list, node) + amdxdna_hwctx_resume(client); + mutex_unlock(&xdna->dev_lock); + + return 0; +} + +static int amdxdna_rpmops_suspend(struct device *dev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); + int ret; + + mutex_lock(&xdna->dev_lock); + ret = amdxdna_dev_suspend_nolock(xdna); + mutex_unlock(&xdna->dev_lock); + + XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret); + return ret; +} + +static int amdxdna_rpmops_resume(struct device *dev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev)); + int ret; + + mutex_lock(&xdna->dev_lock); + ret = amdxdna_dev_resume_nolock(xdna); + mutex_unlock(&xdna->dev_lock); + + XDNA_DBG(xdna, "Runtime resume done ret: %d", ret); + return ret; +} + +static const struct dev_pm_ops amdxdna_pm_ops = { + SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume) + RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL) +}; + +static struct pci_driver amdxdna_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = pci_ids, + .probe = amdxdna_probe, + .remove = amdxdna_remove, + .driver.pm = &amdxdna_pm_ops, +}; + +module_pci_driver(amdxdna_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>"); +MODULE_DESCRIPTION("amdxdna driver"); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h new file mode 100644 index 000000000000..37848a8d8031 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_PCI_DRV_H_ +#define _AMDXDNA_PCI_DRV_H_ + +#include <linux/xarray.h> + +#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) +#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args) +#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args) +#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args) +#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args) + +#define XDNA_MBZ_DBG(xdna, ptr, sz) \ + ({ \ + int __i; \ + int __ret = 0; \ + u8 *__ptr = (u8 *)(ptr); \ + for (__i = 0; __i < (sz); __i++) { \ + if (__ptr[__i]) { \ + XDNA_DBG(xdna, "MBZ check failed"); \ + __ret = -EINVAL; \ + break; \ + } \ + } \ + __ret; \ + }) + +#define to_xdna_dev(drm_dev) \ + ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev)) + +extern const struct drm_driver amdxdna_drm_drv; + +struct amdxdna_client; +struct amdxdna_dev; +struct amdxdna_drm_get_info; +struct amdxdna_drm_set_state; +struct amdxdna_gem_obj; +struct amdxdna_hwctx; +struct amdxdna_sched_job; + +/* + * struct amdxdna_dev_ops - Device hardware operation callbacks + */ +struct amdxdna_dev_ops { + int (*init)(struct amdxdna_dev *xdna); + void (*fini)(struct amdxdna_dev *xdna); + int (*resume)(struct amdxdna_dev *xdna); + void (*suspend)(struct amdxdna_dev *xdna); + int (*hwctx_init)(struct amdxdna_hwctx *hwctx); + void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); + int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); + void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); + void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx); + void (*hwctx_resume)(struct amdxdna_hwctx *hwctx); + int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); + int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args); + int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args); +}; + +/* + * struct amdxdna_dev_info - Device hardware information + * Record device static information, like reg, mbox, PSP, SMU bar index + */ +struct amdxdna_dev_info { + int reg_bar; + int mbox_bar; + int sram_bar; + int psp_bar; + int smu_bar; + int device_type; + int first_col; + u32 dev_mem_buf_shift; + u64 dev_mem_base; + size_t dev_mem_size; + char *vbnv; + const struct amdxdna_dev_priv *dev_priv; + const struct amdxdna_dev_ops *ops; +}; + +struct amdxdna_fw_ver { + u32 major; + u32 minor; + u32 sub; + u32 build; +}; + +struct amdxdna_dev { + struct drm_device ddev; + struct amdxdna_dev_hdl *dev_handle; + const struct amdxdna_dev_info *dev_info; + void *xrs_hdl; + + struct mutex dev_lock; /* per device lock */ + struct list_head client_list; + struct amdxdna_fw_ver fw_ver; + struct rw_semaphore notifier_lock; /* for mmu notifier*/ +}; + +/* + * struct amdxdna_device_id - PCI device info + */ +struct amdxdna_device_id { + unsigned short device; + u8 revision; + const struct amdxdna_dev_info *dev_info; +}; + +/* + * struct amdxdna_client - amdxdna client + * A per fd data structure for managing context and other user process stuffs. + */ +struct amdxdna_client { + struct list_head node; + pid_t pid; + struct mutex hwctx_lock; /* protect hwctx */ + /* do NOT wait this srcu when hwctx_lock is held */ + struct srcu_struct hwctx_srcu; + struct xarray hwctx_xa; + u32 next_hwctxid; + struct amdxdna_dev *xdna; + struct drm_file *filp; + + struct mutex mm_lock; /* protect memory related */ + struct amdxdna_gem_obj *dev_heap; + + struct iommu_sva *sva; + int pasid; +}; + +#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ + xa_for_each(&(client)->hwctx_xa, hwctx_id, entry) + +/* Add device info below */ +extern const struct amdxdna_dev_info dev_npu1_info; +extern const struct amdxdna_dev_info dev_npu2_info; +extern const struct amdxdna_dev_info dev_npu4_info; +extern const struct amdxdna_dev_info dev_npu5_info; +extern const struct amdxdna_dev_info dev_npu6_info; + +int amdxdna_sysfs_init(struct amdxdna_dev *xdna); +void amdxdna_sysfs_fini(struct amdxdna_dev *xdna); + +#endif /* _AMDXDNA_PCI_DRV_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c new file mode 100644 index 000000000000..f27e4ee960a0 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_sysfs.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/types.h> + +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", xdna->dev_info->vbnv); +} +static DEVICE_ATTR_RO(vbnv); + +static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", xdna->dev_info->device_type); +} +static DEVICE_ATTR_RO(device_type); + +static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major, + xdna->fw_ver.minor, xdna->fw_ver.sub, + xdna->fw_ver.build); +} +static DEVICE_ATTR_RO(fw_version); + +static struct attribute *amdxdna_attrs[] = { + &dev_attr_device_type.attr, + &dev_attr_vbnv.attr, + &dev_attr_fw_version.attr, + NULL, +}; + +static struct attribute_group amdxdna_attr_group = { + .attrs = amdxdna_attrs, +}; + +int amdxdna_sysfs_init(struct amdxdna_dev *xdna) +{ + int ret; + + ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group); + if (ret) + XDNA_ERR(xdna, "Create attr group failed"); + + return ret; +} + +void amdxdna_sysfs_fini(struct amdxdna_dev *xdna) +{ + sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group); +} diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c new file mode 100644 index 000000000000..e4f6dac7d00f --- /dev/null +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* Address definition from NPU1 docs */ +#define MPNPU_PUB_SEC_INTR 0x3010090 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010094 +#define MPNPU_PUB_SCRATCH2 0x30100A0 +#define MPNPU_PUB_SCRATCH3 0x30100A4 +#define MPNPU_PUB_SCRATCH4 0x30100A8 +#define MPNPU_PUB_SCRATCH5 0x30100AC +#define MPNPU_PUB_SCRATCH6 0x30100B0 +#define MPNPU_PUB_SCRATCH7 0x30100B4 +#define MPNPU_PUB_SCRATCH9 0x30100BC + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000 +#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000 +#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000 + +#define MPNPU_APERTURE0_BASE 0x3000000 +#define MPNPU_APERTURE1_BASE 0x3080000 +#define MPNPU_APERTURE2_BASE 0x30C0000 + +/* PCIe BAR Index for NPU1 */ +#define NPU1_REG_BAR_INDEX 0 +#define NPU1_MBOX_BAR_INDEX 4 +#define NPU1_PSP_BAR_INDEX 0 +#define NPU1_SMU_BAR_INDEX 0 +#define NPU1_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE +#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE + +const struct rt_config npu1_default_rt_cfg[] = { + { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; + +const struct dpm_clk_freq npu1_dpm_clk_table[] = { + {400, 800}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {720, 1309}, + {720, 1309}, + {847, 1600}, + { 0 } +}; + +static const struct amdxdna_dev_priv npu1_dev_priv = { + .fw_path = "amdnpu/1502_00/npu.sbin", + .protocol_major = 0x5, + .protocol_minor = 0x7, + .rt_config = npu1_default_rt_cfg, + .dpm_clk_tbl = npu1_dpm_clk_table, + .col_align = COL_ALIGN_NONE, + .mbox_dev_addr = NPU1_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU1_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), + }, + .hw_ops = { + .set_dpm = npu1_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu1_info = { + .reg_bar = NPU1_REG_BAR_INDEX, + .mbox_bar = NPU1_MBOX_BAR_INDEX, + .sram_bar = NPU1_SRAM_BAR_INDEX, + .psp_bar = NPU1_PSP_BAR_INDEX, + .smu_bar = NPU1_SMU_BAR_INDEX, + .first_col = 1, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu1", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu1_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c new file mode 100644 index 000000000000..a081cac75ee0 --- /dev/null +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU2 */ +#define NPU2_REG_BAR_INDEX 0 +#define NPU2_MBOX_BAR_INDEX 0 +#define NPU2_PSP_BAR_INDEX 4 +#define NPU2_SMU_BAR_INDEX 5 +#define NPU2_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu2_dev_priv = { + .fw_path = "amdnpu/17f0_00/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 0x6, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU2_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU2_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu2_info = { + .reg_bar = NPU2_REG_BAR_INDEX, + .mbox_bar = NPU2_MBOX_BAR_INDEX, + .sram_bar = NPU2_SRAM_BAR_INDEX, + .psp_bar = NPU2_PSP_BAR_INDEX, + .smu_bar = NPU2_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu2", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu2_dev_priv, + .ops = &aie2_ops, /* NPU2 can share NPU1's callback */ +}; diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c new file mode 100644 index 000000000000..9f2e33182ec6 --- /dev/null +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU4 */ +#define NPU4_REG_BAR_INDEX 0 +#define NPU4_MBOX_BAR_INDEX 0 +#define NPU4_PSP_BAR_INDEX 4 +#define NPU4_SMU_BAR_INDEX 5 +#define NPU4_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +const struct rt_config npu4_default_rt_cfg[] = { + { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; + +const struct dpm_clk_freq npu4_dpm_clk_table[] = { + {396, 792}, + {600, 1056}, + {792, 1152}, + {975, 1267}, + {975, 1267}, + {1056, 1408}, + {1152, 1584}, + {1267, 1800}, + { 0 } +}; + +static const struct amdxdna_dev_priv npu4_dev_priv = { + .fw_path = "amdnpu/17f0_10/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU4_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU4_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu4_info = { + .reg_bar = NPU4_REG_BAR_INDEX, + .mbox_bar = NPU4_MBOX_BAR_INDEX, + .sram_bar = NPU4_SRAM_BAR_INDEX, + .psp_bar = NPU4_PSP_BAR_INDEX, + .smu_bar = NPU4_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu4", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu4_dev_priv, + .ops = &aie2_ops, /* NPU4 can share NPU1's callback */ +}; diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c new file mode 100644 index 000000000000..5f1cf83461c4 --- /dev/null +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU5 */ +#define NPU5_REG_BAR_INDEX 0 +#define NPU5_MBOX_BAR_INDEX 0 +#define NPU5_PSP_BAR_INDEX 4 +#define NPU5_SMU_BAR_INDEX 5 +#define NPU5_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu5_dev_priv = { + .fw_path = "amdnpu/17f0_11/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU5_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU5_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu5_info = { + .reg_bar = NPU5_REG_BAR_INDEX, + .mbox_bar = NPU5_MBOX_BAR_INDEX, + .sram_bar = NPU5_SRAM_BAR_INDEX, + .psp_bar = NPU5_PSP_BAR_INDEX, + .smu_bar = NPU5_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu5", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu5_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c new file mode 100644 index 000000000000..94a7005685a7 --- /dev/null +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU6 */ +#define NPU6_REG_BAR_INDEX 0 +#define NPU6_MBOX_BAR_INDEX 0 +#define NPU6_PSP_BAR_INDEX 4 +#define NPU6_SMU_BAR_INDEX 5 +#define NPU6_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu6_dev_priv = { + .fw_path = "amdnpu/17f0_10/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU6_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU6_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, + +}; + +const struct amdxdna_dev_info dev_npu6_info = { + .reg_bar = NPU6_REG_BAR_INDEX, + .mbox_bar = NPU6_MBOX_BAR_INDEX, + .sram_bar = NPU6_SRAM_BAR_INDEX, + .psp_bar = NPU6_PSP_BAR_INDEX, + .smu_bar = NPU6_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu6", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu6_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c index 24cac4c0274b..aa826033b0ce 100644 --- a/drivers/accel/drm_accel.c +++ b/drivers/accel/drm_accel.c @@ -8,7 +8,7 @@ #include <linux/debugfs.h> #include <linux/device.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <drm/drm_accel.h> #include <drm/drm_auth.h> @@ -18,12 +18,11 @@ #include <drm/drm_ioctl.h> #include <drm/drm_print.h> -static DEFINE_SPINLOCK(accel_minor_lock); -static struct idr accel_minors_idr; +DEFINE_XARRAY_ALLOC(accel_minors_xa); static struct dentry *accel_debugfs_root; -static struct device_type accel_sysfs_device_minor = { +static const struct device_type accel_sysfs_device_minor = { .name = "accel_minor" }; @@ -118,99 +117,6 @@ void accel_set_device_instance_params(struct device *kdev, int index) } /** - * accel_minor_alloc() - Allocates a new accel minor - * - * This function access the accel minors idr and allocates from it - * a new id to represent a new accel minor - * - * Return: A new id on success or error code in case idr_alloc failed - */ -int accel_minor_alloc(void) -{ - unsigned long flags; - int r; - - spin_lock_irqsave(&accel_minor_lock, flags); - r = idr_alloc(&accel_minors_idr, NULL, 0, ACCEL_MAX_MINORS, GFP_NOWAIT); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - return r; -} - -/** - * accel_minor_remove() - Remove an accel minor - * @index: The minor id to remove. - * - * This function access the accel minors idr and removes from - * it the member with the id that is passed to this function. - */ -void accel_minor_remove(int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_remove(&accel_minors_idr, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/** - * accel_minor_replace() - Replace minor pointer in accel minors idr. - * @minor: Pointer to the new minor. - * @index: The minor id to replace. - * - * This function access the accel minors idr structure and replaces the pointer - * that is associated with an existing id. Because the minor pointer can be - * NULL, we need to explicitly pass the index. - * - * Return: 0 for success, negative value for error - */ -void accel_minor_replace(struct drm_minor *minor, int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_replace(&accel_minors_idr, minor, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/* - * Looks up the given minor-ID and returns the respective DRM-minor object. The - * refence-count of the underlying device is increased so you must release this - * object with accel_minor_release(). - * - * The object can be only a drm_minor that represents an accel device. - * - * As long as you hold this minor, it is guaranteed that the object and the - * minor->dev pointer will stay valid! However, the device may get unplugged and - * unregistered while you hold the minor. - */ -static struct drm_minor *accel_minor_acquire(unsigned int minor_id) -{ - struct drm_minor *minor; - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - minor = idr_find(&accel_minors_idr, minor_id); - if (minor) - drm_dev_get(minor->dev); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - if (!minor) { - return ERR_PTR(-ENODEV); - } else if (drm_dev_is_unplugged(minor->dev)) { - drm_dev_put(minor->dev); - return ERR_PTR(-ENODEV); - } - - return minor; -} - -static void accel_minor_release(struct drm_minor *minor) -{ - drm_dev_put(minor->dev); -} - -/** * accel_open - open method for ACCEL file * @inode: device inode * @filp: file pointer. @@ -227,7 +133,7 @@ int accel_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int retcode; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -246,7 +152,7 @@ int accel_open(struct inode *inode, struct file *filp) err_undo: atomic_dec(&dev->open_count); - accel_minor_release(minor); + drm_minor_release(minor); return retcode; } EXPORT_SYMBOL_GPL(accel_open); @@ -257,7 +163,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int err; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -274,7 +180,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) err = 0; out: - accel_minor_release(minor); + drm_minor_release(minor); return err; } @@ -290,15 +196,13 @@ void accel_core_exit(void) unregister_chrdev(ACCEL_MAJOR, "accel"); debugfs_remove(accel_debugfs_root); accel_sysfs_destroy(); - idr_destroy(&accel_minors_idr); + WARN_ON(!xa_empty(&accel_minors_xa)); } int __init accel_core_init(void) { int ret; - idr_init(&accel_minors_idr); - ret = accel_sysfs_init(); if (ret < 0) { DRM_ERROR("Cannot create ACCEL class: %d\n", ret); diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 3aa6eeef443b..59823e3c3bf7 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -1360,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) return -EINVAL; } - if (!hl_device_operational(hdev, &status)) { + if (!hl_device_operational(hdev, &status)) return -EBUSY; - } if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && !hdev->supports_staged_submission) { @@ -3285,12 +3284,6 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx /* In case the node already registered, need to unregister first then re-use */ if (req_offset_record->ts_reg_info.in_use) { - dev_dbg(data->buf->mmg->dev, - "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n", - req_offset_record, - req_offset_record->ts_reg_info.interrupt->interrupt_id, - req_offset_record->ts_reg_info.timestamp_kernel_addr, - data->interrupt->interrupt_id); /* * Since interrupt here can be different than the one the node currently registered * on, and we don't want to lock two lists while we're doing unregister, so @@ -3346,10 +3339,6 @@ static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx goto put_cq_cb; } - dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n", - data->interrupt->interrupt_id, data->ts_handle, - data->ts_offset, data->cq_offset); - data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); if (!data->buf) { rc = -EINVAL; @@ -3371,9 +3360,6 @@ static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx if (*pend->cq_kernel_addr >= data->target_value) { spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); - dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n", - pend, data->ts_offset, data->interrupt->interrupt_id); - pend->ts_reg_info.in_use = 0; *status = HL_WAIT_CS_STATUS_COMPLETED; *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c index b83141f58319..9f212b17611a 100644 --- a/drivers/accel/habanalabs/common/context.c +++ b/drivers/accel/habanalabs/common/context.c @@ -199,7 +199,6 @@ out_err: int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) { - char task_comm[TASK_COMM_LEN]; int rc = 0, i; ctx->hdev = hdev; @@ -272,7 +271,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) mutex_init(&ctx->ts_reg_lock); dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n", - get_task_comm(task_comm, current), ctx->asid); + current->comm, ctx->asid); } return 0; diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 01f071d52570..ca7677293a55 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -42,9 +42,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; pkt.i2c_len = i2c_len; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, val); - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); return rc; @@ -75,10 +74,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_len = i2c_len; pkt.value = cpu_to_le64(val); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); return rc; @@ -99,10 +96,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.led_index = cpu_to_le32(led); pkt.value = cpu_to_le64(state); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); } @@ -484,7 +479,7 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE]; + char kbuf[MMU_KBUF_SIZE] = {0}; char *c; ssize_t rc; @@ -546,7 +541,7 @@ static ssize_t mmu_ack_error_value_write(struct file *file, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE]; + char kbuf[MMU_KBUF_SIZE] = {0}; ssize_t rc; if (count > sizeof(kbuf) - 1) @@ -1643,19 +1638,19 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent &hl_data64b_fops); debugfs_create_file("set_power_state", - 0200, + 0644, root, dev_entry, &hl_power_fops); debugfs_create_file("device", - 0200, + 0644, root, dev_entry, &hl_device_fops); debugfs_create_file("clk_gate", - 0200, + 0644, root, dev_entry, &hl_clk_gate_fops); @@ -1667,13 +1662,13 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent &hl_stop_on_err_fops); debugfs_create_file("dump_security_violations", - 0644, + 0400, root, dev_entry, &hl_security_violations_fops); debugfs_create_file("dump_razwi_events", - 0644, + 0400, root, dev_entry, &hl_razwi_check_fops); @@ -1706,7 +1701,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent &hdev->reset_info.skip_reset_on_timeout); debugfs_create_file("state_dump", - 0600, + 0644, root, dev_entry, &hl_state_dump_fops); @@ -1722,9 +1717,14 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent root, &hdev->device_release_watchdog_timeout_sec); + debugfs_create_u16("server_type", + 0444, + root, + &hdev->asic_prop.server_type); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, - 0444, + 0644, root, entry, &hl_debugfs_fops); diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index a73bd4be94b1..30277ae410d4 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -30,6 +30,8 @@ enum dma_alloc_type { #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 +static void hl_device_heartbeat(struct work_struct *work); + /* * hl_set_dram_bar- sets the bar to allow later access to address * @@ -55,7 +57,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi if (is_power_of_2(prop->dram_pci_bar_size)) bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); else - bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * + bar_base_addr = region->region_base + + div64_u64((addr - region->region_base), prop->dram_pci_bar_size) * prop->dram_pci_bar_size; old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); @@ -129,8 +132,8 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t } if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) - trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, - caller); + trace_habanalabs_dma_alloc(&(hdev)->pdev->dev, (u64) (uintptr_t) ptr, *dma_handle, + size, caller); return ptr; } @@ -151,7 +154,7 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c break; } - trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); + trace_habanalabs_dma_free(&(hdev)->pdev->dev, store_cpu_addr, dma_handle, size, caller); } void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, @@ -203,15 +206,15 @@ int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, return 0; for_each_sgtable_dma_sg(sgt, sg, i) - trace_habanalabs_dma_map_page(hdev->dev, - page_to_phys(sg_page(sg)), - sg->dma_address - prop->device_dma_offset_for_host_access, + trace_habanalabs_dma_map_page(&(hdev)->pdev->dev, + page_to_phys(sg_page(sg)), + sg->dma_address - prop->device_dma_offset_for_host_access, #ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->dma_length, + sg->dma_length, #else - sg->length, + sg->length, #endif - dir, caller); + dir, caller); return 0; } @@ -246,7 +249,8 @@ void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, if (trace_habanalabs_dma_unmap_page_enabled()) { for_each_sgtable_dma_sg(sgt, sg, i) - trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)), + trace_habanalabs_dma_unmap_page(&(hdev)->pdev->dev, + page_to_phys(sg_page(sg)), sg->dma_address - prop->device_dma_offset_for_host_access, #ifdef CONFIG_NEED_SG_DMA_LENGTH sg->dma_length, @@ -438,16 +442,19 @@ static void print_idle_status_mask(struct hl_device *hdev, const char *message, u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) { if (idle_mask[3]) - dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n", - message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, + idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); else if (idle_mask[2]) - dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n", - message, idle_mask[2], idle_mask[1], idle_mask[0]); + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, + idle_mask[2], idle_mask[1], idle_mask[0]); else if (idle_mask[1]) - dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n", - message, idle_mask[1], idle_mask[0]); + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, idle_mask[1], idle_mask[0]); else - dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]); + dev_err(hdev->dev, "%s %s (mask %#llx)\n", dev_name(&hdev->pdev->dev), message, + idle_mask[0]); } static void hpriv_release(struct kref *ref) @@ -544,7 +551,8 @@ int hl_hpriv_put(struct hl_fpriv *hpriv) return kref_put(&hpriv->refcount, hpriv_release); } -static void print_device_in_use_info(struct hl_device *hdev, const char *message) +static void print_device_in_use_info(struct hl_device *hdev, + struct hl_mem_mgr_fini_stats *mm_fini_stats, const char *message) { u32 active_cs_num, dmabuf_export_cnt; bool unknown_reason = true; @@ -568,6 +576,12 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message dmabuf_export_cnt); } + if (mm_fini_stats->n_busy_cb) { + unknown_reason = false; + offset += scnprintf(buf + offset, size - offset, " [%u live CB handles]", + mm_fini_stats->n_busy_cb); + } + if (unknown_reason) scnprintf(buf + offset, size - offset, " [unknown reason]"); @@ -585,6 +599,7 @@ void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv) { struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = to_hl_device(ddev); + struct hl_mem_mgr_fini_stats mm_fini_stats; if (!hdev) { pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); @@ -596,12 +611,13 @@ void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv) /* Memory buffers might be still in use at this point and thus the handles IDR destruction * is postponed to hpriv_release(). */ - hl_mem_mgr_fini(&hpriv->mem_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr, &mm_fini_stats); hdev->compute_ctx_in_release = 1; if (!hl_hpriv_put(hpriv)) { - print_device_in_use_info(hdev, "User process closed FD but device still in use"); + print_device_in_use_info(hdev, &mm_fini_stats, + "User process closed FD but device still in use"); hl_device_reset(hdev, HL_DRV_RESET_HARD); } @@ -801,7 +817,7 @@ static void device_hard_reset_pending(struct work_struct *work) } queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, - msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); + secs_to_jiffies(HL_PENDING_RESET_PER_SEC)); } } @@ -857,6 +873,10 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI2D: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2D", sizeof(hdev->asic_name)); + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); @@ -945,6 +965,8 @@ static int device_early_init(struct hl_device *hdev) goto free_cb_mgr; } + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); hdev->device_reset_work.hdev = hdev; hdev->device_fini_pending = 0; @@ -967,7 +989,7 @@ static int device_early_init(struct hl_device *hdev) return 0; free_cb_mgr: - hl_mem_mgr_fini(&hdev->kernel_mem_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL); hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); @@ -1011,7 +1033,7 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->clk_throttling.lock); - hl_mem_mgr_fini(&hdev->kernel_mem_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL); hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); kfree(hdev->hl_chip_info); @@ -1034,31 +1056,65 @@ static void device_early_fini(struct hl_device *hdev) static bool is_pci_link_healthy(struct hl_device *hdev) { - u16 vendor_id; + u16 device_id; if (!hdev->pdev) return false; - pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id); + pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id); - return (vendor_id == PCI_VENDOR_ID_HABANALABS); + return (device_id == hdev->pdev->device); } -static int hl_device_eq_heartbeat_check(struct hl_device *hdev) +static void stringify_time_of_last_heartbeat(struct hl_device *hdev, char *time_str, size_t size, + bool is_pq_hb) { + time64_t seconds = is_pq_hb ? hdev->heartbeat_debug_info.last_pq_heartbeat_ts + : hdev->heartbeat_debug_info.last_eq_heartbeat_ts; + struct tm tm; + + if (!seconds) + return; + + time64_to_tm(seconds, 0, &tm); + + snprintf(time_str, size, "%ld-%02d-%02d %02d:%02d:%02d (UTC)", + tm.tm_year + 1900, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); +} + +static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) +{ + struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info; + u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1; struct asic_fixed_properties *prop = &hdev->asic_prop; + char pq_time_str[64] = "N/A", eq_time_str[64] = "N/A"; if (!prop->cpucp_info.eq_health_check_supported) - return 0; + return true; - if (hdev->eq_heartbeat_received) { - hdev->eq_heartbeat_received = false; - } else { + if (!hdev->eq_heartbeat_received) { dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); - return -EIO; + + stringify_time_of_last_heartbeat(hdev, pq_time_str, sizeof(pq_time_str), true); + stringify_time_of_last_heartbeat(hdev, eq_time_str, sizeof(eq_time_str), false); + dev_err(hdev->dev, + "EQ: {CI %u, HB counter %u, last HB time: %s}, PQ: {PI: %u, CI: %u (%u), last HB time: %s}\n", + hdev->event_queue.ci, + heartbeat_debug_info->heartbeat_event_counter, + eq_time_str, + hdev->kernel_queues[cpu_q_id].pi, + atomic_read(&hdev->kernel_queues[cpu_q_id].ci), + atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask, + pq_time_str); + + hl_eq_dump(hdev, &hdev->event_queue); + + return false; } - return 0; + hdev->eq_heartbeat_received = false; + + return true; } static void hl_device_heartbeat(struct work_struct *work) @@ -1077,7 +1133,7 @@ static void hl_device_heartbeat(struct work_struct *work) * in order to validate the eq is working. * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule. */ - if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev))) + if (hl_device_eq_heartbeat_received(hdev) && (!hdev->asic_funcs->send_heartbeat(hdev))) goto reschedule; if (hl_device_operational(hdev, NULL)) @@ -1131,21 +1187,6 @@ static int device_late_init(struct hl_device *hdev) } hdev->high_pll = hdev->asic_prop.high_pll; - - if (hdev->heartbeat) { - /* - * Before scheduling the heartbeat driver will check if eq event has received. - * for the first schedule we need to set the indication as true then for the next - * one this indication will be true only if eq event was sent by FW. - */ - hdev->eq_heartbeat_received = true; - - INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); - - schedule_delayed_work(&hdev->work_heartbeat, - usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); - } - hdev->late_init_done = true; return 0; @@ -1162,9 +1203,6 @@ static void device_late_fini(struct hl_device *hdev) if (!hdev->late_init_done) return; - if (hdev->heartbeat) - cancel_delayed_work_sync(&hdev->work_heartbeat); - if (hdev->asic_funcs->late_fini) hdev->asic_funcs->late_fini(hdev); @@ -1265,8 +1303,12 @@ static void hl_abort_waiting_for_completions(struct hl_device *hdev) static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, bool skip_wq_flush) { - if (hard_reset) + if (hard_reset) { + if (hdev->heartbeat) + cancel_delayed_work_sync(&hdev->work_heartbeat); + device_late_fini(hdev); + } /* * Halt the engines and disable interrupts so we won't get any more @@ -1494,15 +1536,14 @@ static void send_disable_pci_access(struct hl_device *hdev, u32 flags) * of heartbeat, the device CPU is marked as disable * so this message won't be sent */ - if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) { - dev_warn(hdev->dev, "Failed to disable FW's PCI access\n"); + if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) return; - } - /* verify that last EQs are handled before disabled is set */ + /* disable_irq also generates sync irq, this verifies that last EQs are handled + * before disabled is set. The IRQ will be enabled again in request_irq call. + */ if (hdev->cpu_queues_enable) - synchronize_irq(pci_irq_vector(hdev->pdev, - hdev->asic_prop.eq_interrupt_id)); + disable_irq(pci_irq_vector(hdev->pdev, hdev->asic_prop.eq_interrupt_id)); } } @@ -1546,6 +1587,31 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) } } +static void reset_heartbeat_debug_info(struct hl_device *hdev) +{ + hdev->heartbeat_debug_info.last_pq_heartbeat_ts = 0; + hdev->heartbeat_debug_info.last_eq_heartbeat_ts = 0; + hdev->heartbeat_debug_info.heartbeat_event_counter = 0; +} + +static inline void device_heartbeat_schedule(struct hl_device *hdev) +{ + if (!hdev->heartbeat) + return; + + reset_heartbeat_debug_info(hdev); + + /* + * Before scheduling the heartbeat driver will check if eq event has received. + * for the first schedule we need to set the indication as true then for the next + * one this indication will be true only if eq event was sent by FW. + */ + hdev->eq_heartbeat_received = true; + + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); +} + /* * hl_device_reset - reset the device * @@ -1768,14 +1834,16 @@ kill_processes: hdev->device_cpu_disabled = false; hdev->reset_info.hard_reset_pending = false; + /* + * Put the device in an unusable state if there are 2 back to back resets due to + * fatal errors. + */ if (hdev->reset_info.reset_trigger_repeated && - (hdev->reset_info.prev_reset_trigger == - HL_DRV_RESET_FW_FATAL_ERR)) { - /* if there 2 back to back resets from FW, - * ensure driver puts the driver in a unusable state - */ + (hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR || + hdev->reset_info.prev_reset_trigger == + HL_DRV_RESET_HEARTBEAT)) { dev_crit(hdev->dev, - "%s Consecutive FW fatal errors received, stopping hard reset\n", + "%s Consecutive fatal errors, stopping hard reset\n", dev_name(&(hdev)->pdev->dev)); rc = -EIO; goto out_err; @@ -1913,6 +1981,8 @@ kill_processes: if (hard_reset) { hdev->reset_info.hard_reset_cnt++; + device_heartbeat_schedule(hdev); + /* After reset is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events * and if those events are fatal, we won't know about it and @@ -2347,6 +2417,12 @@ int hl_device_init(struct hl_device *hdev) goto out_disabled; } + /* Scheduling the EQ heartbeat thread must come after driver is done with all + * initializations, as we want to make sure the FW gets enough time to be prepared + * to respond to heartbeat packets. + */ + device_heartbeat_schedule(hdev); + dev_notice(hdev->dev, "Successfully added device %s to habanalabs driver\n", dev_name(&(hdev)->pdev->dev)); @@ -2589,7 +2665,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg) u32 val = readl(hdev->rmmio + reg); if (unlikely(trace_habanalabs_rreg32_enabled())) - trace_habanalabs_rreg32(hdev->dev, reg, val); + trace_habanalabs_rreg32(&(hdev)->pdev->dev, reg, val); return val; } @@ -2607,7 +2683,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg) inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) { if (unlikely(trace_habanalabs_wreg32_enabled())) - trace_habanalabs_wreg32(hdev->dev, reg, val); + trace_habanalabs_wreg32(&(hdev)->pdev->dev, reg, val); writel(val, hdev->rmmio + reg); } @@ -2801,3 +2877,88 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info) atomic_set(&captured_err_info->cs_timeout.write_enable, 1); captured_err_info->undef_opcode.write_enable = true; } + +void hl_init_cpu_for_irq(struct hl_device *hdev) +{ +#ifdef CONFIG_NUMA + struct cpumask *available_mask = &hdev->irq_affinity_mask; + int numa_node = hdev->pdev->dev.numa_node, i; + static struct cpumask cpu_mask; + + if (numa_node < 0) + return; + + if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) { + dev_err(hdev->dev, "No available affinities in current numa node\n"); + return; + } + + /* Remove HT siblings */ + for_each_cpu(i, &cpu_mask) + cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask); +#endif +} + +void hl_set_irq_affinity(struct hl_device *hdev, int irq) +{ + if (cpumask_empty(&hdev->irq_affinity_mask)) { + dev_dbg(hdev->dev, "affinity mask is empty\n"); + return; + } + + if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask)) + dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq); +} + +void hl_eq_heartbeat_event_handle(struct hl_device *hdev) +{ + hdev->heartbeat_debug_info.heartbeat_event_counter++; + hdev->heartbeat_debug_info.last_eq_heartbeat_ts = ktime_get_real_seconds(); + hdev->eq_heartbeat_received = true; +} + +void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask) +{ + struct hl_clk_throttle *clk_throttle = &hdev->clk_throttling; + ktime_t zero_time = ktime_set(0, 0); + + mutex_lock(&clk_throttle->lock); + + switch (event_type) { + case EQ_EVENT_POWER_EVT_START: + clk_throttle->current_reason |= HL_CLK_THROTTLE_POWER; + clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_POWER; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; + dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); + break; + + case EQ_EVENT_POWER_EVT_END: + clk_throttle->current_reason &= ~HL_CLK_THROTTLE_POWER; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); + dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); + break; + + case EQ_EVENT_THERMAL_EVT_START: + clk_throttle->current_reason |= HL_CLK_THROTTLE_THERMAL; + clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_THERMAL; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; + *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); + break; + + case EQ_EVENT_THERMAL_EVT_END: + clk_throttle->current_reason &= ~HL_CLK_THROTTLE_THERMAL; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); + *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); + break; + + default: + dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); + break; + } + + mutex_unlock(&clk_throttle->lock); +} diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 3558a6a8e192..eeb6b2a80fc7 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -8,6 +8,7 @@ #include "habanalabs.h" #include <linux/habanalabs/hl_boot_if.h> +#include <linux/pci.h> #include <linux/firmware.h> #include <linux/crc32.h> #include <linux/slab.h> @@ -40,6 +41,31 @@ static char *comms_sts_str_arr[COMMS_STS_INVLD_LAST] = { [COMMS_STS_TIMEOUT_ERR] = __stringify(COMMS_STS_TIMEOUT_ERR), }; +/** + * hl_fw_version_cmp() - compares the FW version to a specific version + * + * @hdev: pointer to hl_device structure + * @major: major number of a reference version + * @minor: minor number of a reference version + * @subminor: sub-minor number of a reference version + * + * Return 1 if FW version greater than the reference version, -1 if it's + * smaller and 0 if versions are identical. + */ +int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor) +{ + if (hdev->fw_sw_major_ver != major) + return (hdev->fw_sw_major_ver > major) ? 1 : -1; + + if (hdev->fw_sw_minor_ver != minor) + return (hdev->fw_sw_minor_ver > minor) ? 1 : -1; + + if (hdev->fw_sw_sub_minor_ver != subminor) + return (hdev->fw_sw_sub_minor_ver > subminor) ? 1 : -1; + + return 0; +} + static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; @@ -345,43 +371,63 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value) { struct cpucp_packet pkt = {}; + int rc; pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(value); - return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "Failed to disable FW's PCI access\n"); + + return rc; } +/** + * hl_fw_send_cpu_message() - send CPU message to the device. + * + * @hdev: pointer to hl_device structure. + * @hw_queue_id: HW queue ID + * @msg: raw data of the message/packet + * @size: size of @msg in bytes + * @timeout_us: timeout in usec to wait for CPU reply on the message + * @result: return code reported by FW + * + * send message to the device CPU. + * + * Return: 0 on success, non-zero for failure. + * -ENOMEM: memory allocation failure + * -EAGAIN: CPU is disabled (try again when enabled) + * -ETIMEDOUT: timeout waiting for FW response + * -EIO: protocol error + */ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, u64 *result) + u16 size, u32 timeout_us, u64 *result) { struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id]; struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 tmp, expected_ack_val, pi, opcode; struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; - u32 tmp, expected_ack_val, pi, opcode; - int rc; + int rc = 0, fw_rc; - pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); + pkt = hl_cpu_accessible_dma_pool_alloc(hdev, size, &pkt_dma_addr); if (!pkt) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for packet to CPU\n"); + dev_err(hdev->dev, "Failed to allocate DMA memory for packet to CPU\n"); return -ENOMEM; } - memcpy(pkt, msg, len); + memcpy(pkt, msg, size); mutex_lock(&hdev->send_cpu_message_lock); /* CPU-CP messages can be sent during soft-reset */ - if (hdev->disabled && !hdev->reset_info.in_compute_reset) { - rc = 0; + if (hdev->disabled && !hdev->reset_info.in_compute_reset) goto out; - } if (hdev->device_cpu_disabled) { - rc = -EIO; + rc = -EAGAIN; goto out; } @@ -397,7 +443,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, * Which means that we don't need to lock the access to the entire H/W * queues module when submitting a JOB to the CPU queue. */ - hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr); + hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), size, pkt_dma_addr); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) expected_ack_val = queue->pi; @@ -406,7 +452,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, (tmp == expected_ack_val), 1000, - timeout, true); + timeout_us, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); @@ -414,19 +460,27 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* If FW performed reset just before sending it a packet, we will get a timeout. * This is expected behavior, hence no need for error message. */ - if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) + if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) { dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n", tmp); - else - dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp); + } else { + struct hl_bd *bd = queue->kernel_address; + + bd += hl_pi_2_offset(pi); + + dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n" + "Pkt info[%u]: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n", + tmp, pi, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr, + queue->dram_bd); + } hdev->device_cpu_disabled = true; goto out; } tmp = le32_to_cpu(pkt->ctl); - rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; - if (rc) { + fw_rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; + if (fw_rc) { opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT; if (!prop->supports_advanced_cpucp_rc) { @@ -435,7 +489,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, goto scrub_descriptor; } - switch (rc) { + switch (fw_rc) { case cpucp_packet_invalid: dev_err(hdev->dev, "CPU packet %d is not supported by F/W\n", opcode); @@ -460,7 +514,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* propagate the return code from the f/w to the callers who want to check it */ if (result) - *result = rc; + *result = fw_rc; rc = -EIO; @@ -480,7 +534,7 @@ scrub_descriptor: out: mutex_unlock(&hdev->send_cpu_message_lock); - hl_cpu_accessible_dma_pool_free(hdev, len, pkt); + hl_cpu_accessible_dma_pool_free(hdev, size, pkt); return rc; } @@ -501,7 +555,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) 0, &result); if (rc) - dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); + dev_err(hdev->dev, "failed to unmask event %d", event_type); return rc; } @@ -540,7 +594,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, total_pkt_size, 0, &result); if (rc) - dev_err(hdev->dev, "failed to unmask IRQ array\n"); + dev_err(hdev->dev, "failed to unmask event array\n"); kfree(pkt); @@ -550,7 +604,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, int hl_fw_test_cpu_queue(struct hl_device *hdev) { struct cpucp_packet test_pkt = {}; - u64 result; + u64 result = 0; int rc; test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << @@ -623,16 +677,14 @@ int hl_fw_send_device_activity(struct hl_device *hdev, bool open) int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt; - u64 result; + u64 result = 0; int rc; memset(&hb_pkt, 0, sizeof(hb_pkt)); - hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << - CPUCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << CPUCP_PKT_CTL_OPCODE_SHIFT); hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), 0, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result); if ((rc) || (result != CPUCP_PACKET_FENCE_VAL)) return -EIO; @@ -643,6 +695,8 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) rc = -EIO; } + hdev->heartbeat_debug_info.last_pq_heartbeat_ts = ktime_get_real_seconds(); + return rc; } @@ -885,7 +939,7 @@ static int hl_fw_send_msi_info_msg(struct hl_device *hdev) { struct cpucp_array_data_packet *pkt; size_t total_pkt_size, data_size; - u64 result; + u64 result = 0; int rc; /* skip sending this info for unsupported ASICs */ @@ -976,11 +1030,10 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_EEPROM_TIMEOUT_USEC, &result); - if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP EEPROM packet, error %d\n", - rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP EEPROM packet, error %d\n", rc); goto out; } @@ -1021,7 +1074,9 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); goto out; } @@ -1055,8 +1110,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->rx_throughput = result; @@ -1070,8 +1126,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->tx_throughput = result; @@ -1084,8 +1141,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->replay_cnt = (u32) result; @@ -1105,9 +1163,9 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CpuCP total energy pkt, error %d\n", - rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CpuCP total energy pkt, error %d\n", rc); return rc; } @@ -1183,7 +1241,8 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); return rc; } @@ -1210,7 +1269,8 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to read power, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to read power, error %d\n", rc); return rc; } @@ -1247,8 +1307,9 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc); goto out; } @@ -1273,7 +1334,8 @@ int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); if (rc) { - dev_err(hdev->dev, + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to handle CPU-CP pending rows info pkt, error %d\n", rc); goto out; } @@ -1428,7 +1490,7 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) { struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; u32 status = 0, timeout; - int rc, tries = 1; + int rc, tries = 1, fw_err = 0; bool preboot_still_runs; /* Need to check two possible scenarios: @@ -1468,18 +1530,18 @@ retry: } } - if (rc) { + /* If we read all FF, then something is totally wrong, no point + * of reading specific errors + */ + if (status != -1) + fw_err = fw_read_errors(hdev, pre_fw_load->boot_err0_reg, + pre_fw_load->boot_err1_reg, + pre_fw_load->sts_boot_dev_sts0_reg, + pre_fw_load->sts_boot_dev_sts1_reg); + if (rc || fw_err) { detect_cpu_boot_status(hdev, status); - dev_err(hdev->dev, "CPU boot ready timeout (status = %d)\n", status); - - /* If we read all FF, then something is totally wrong, no point - * of reading specific errors - */ - if (status != -1) - fw_read_errors(hdev, pre_fw_load->boot_err0_reg, - pre_fw_load->boot_err1_reg, - pre_fw_load->sts_boot_dev_sts0_reg, - pre_fw_load->sts_boot_dev_sts1_reg); + dev_err(hdev->dev, "CPU boot %s (status = %d)\n", + fw_err ? "failed due to an error" : "ready timeout", status); return -EIO; } @@ -1750,7 +1812,7 @@ static void hl_fw_dynamic_send_cmd(struct hl_device *hdev, val = FIELD_PREP(COMMS_COMMAND_CMD_MASK, cmd); val |= FIELD_PREP(COMMS_COMMAND_SIZE_MASK, size); - trace_habanalabs_comms_send_cmd(hdev->dev, comms_cmd_str_arr[cmd]); + trace_habanalabs_comms_send_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]); WREG32(le32_to_cpu(dyn_regs->kmd_msg_to_cpu), val); } @@ -1808,7 +1870,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; - trace_habanalabs_comms_wait_status(hdev->dev, comms_sts_str_arr[expected_status]); + trace_habanalabs_comms_wait_status(&hdev->pdev->dev, comms_sts_str_arr[expected_status]); /* Wait for expected status */ rc = hl_poll_timeout( @@ -1825,7 +1887,8 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, return -EIO; } - trace_habanalabs_comms_wait_status_done(hdev->dev, comms_sts_str_arr[expected_status]); + trace_habanalabs_comms_wait_status_done(&hdev->pdev->dev, + comms_sts_str_arr[expected_status]); /* * skip storing FW response for NOOP to preserve the actual desired @@ -1899,7 +1962,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, { int rc; - trace_habanalabs_comms_protocol_cmd(hdev->dev, comms_cmd_str_arr[cmd]); + trace_habanalabs_comms_protocol_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]); /* first send clear command to clean former commands */ rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader); @@ -2038,7 +2101,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, * note that no alignment/stride address issues here as all structures * are 64 bit padded. */ - data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header); + data_ptr = (u8 *)fw_desc + sizeof(struct comms_msg_header); data_size = le16_to_cpu(fw_desc->header.size); data_crc32 = hl_fw_compat_crc32(data_ptr, data_size); @@ -2192,11 +2255,11 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev, memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc)); fw_data_size = le16_to_cpu(fw_desc->header.size); - temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size); + temp_fw_desc = vzalloc(sizeof(struct comms_msg_header) + fw_data_size); if (!temp_fw_desc) return -ENOMEM; - memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size); + memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_msg_header) + fw_data_size); rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader, (struct lkd_fw_comms_desc *) temp_fw_desc); @@ -2718,18 +2781,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; } + rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms)); + if (rc) + goto protocol_err; + + if (hdev->asic_prop.support_dynamic_resereved_fw_size) + hdev->asic_prop.reserved_fw_mem_size = + le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M; + if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) { struct lkd_fw_binning_info *binning_info; - rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, - sizeof(struct lkd_msg_comms)); - if (rc) - goto protocol_err; - /* read preboot version */ rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - if (rc) return rc; @@ -2756,11 +2821,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hdev->decoder_binning, hdev->rotator_binning); } - if (hdev->asic_prop.support_dynamic_resereved_fw_size) { - hdev->asic_prop.reserved_fw_mem_size = - le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb); - } - return 0; } @@ -2795,7 +2855,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hdev->asic_funcs->init_cpu_scrambler_dram(hdev); if (!(hdev->fw_components & FW_TYPE_LINUX)) { - dev_info(hdev->dev, "Skip loading Linux F/W\n"); + dev_dbg(hdev->dev, "Skip loading Linux F/W\n"); return 0; } @@ -3125,10 +3185,10 @@ long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32((u32)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); - if (rc) { - dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", - used_pll_idx, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", + used_pll_idx, rc); return rc; } @@ -3152,8 +3212,7 @@ void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", used_pll_idx, rc); } @@ -3169,9 +3228,9 @@ long hl_fw_get_max_power(struct hl_device *hdev) pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); - if (rc) { - dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); return rc; } @@ -3193,8 +3252,7 @@ void hl_fw_set_max_power(struct hl_device *hdev) pkt.value = cpu_to_le64(hdev->max_power); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); } @@ -3220,11 +3278,11 @@ static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void pkt.data_max_size = cpu_to_le32(size); pkt.nonce = cpu_to_le32(nonce); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - timeout, NULL); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), timeout, NULL); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); goto out; } @@ -3266,10 +3324,12 @@ int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); - if (rc) - dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n"); - else + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n"); + } else { dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result); + } *size = (u32)result; diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 2a900c9941fe..6f27ce4fa01b 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -71,7 +71,7 @@ struct hl_fpriv; #define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ -#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ +#define HL_HEARTBEAT_PER_USEC 10000000 /* 10 s */ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ @@ -443,18 +443,22 @@ enum hl_collective_mode { * a CB handle can be provided for jobs on this queue. * Otherwise, a CB address must be provided. * @collective_mode: collective mode of current queue + * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. * @binned: True if the queue is binned out and should not be used * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hw_queue_properties { enum hl_queue_type type; enum queue_cb_alloc_flags cb_alloc_flags; enum hl_collective_mode collective_mode; + u64 q_dram_bd_address; u8 driver_only; u8 binned; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -590,8 +594,6 @@ struct hl_hints_range { * we display to the user * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. - * @mmu_hop_table_size: MMU hop table size. - * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. * @dram_page_size: The DRAM physical page size. * @cfg_size: configuration space size on SRAM. * @sram_size: total size of SRAM. @@ -645,10 +647,12 @@ struct hl_hints_range { * @num_engine_cores: number of engine cpu cores. * @max_num_of_engines: maximum number of all engines in the ASIC. * @num_of_special_blocks: special_blocks array size. - * @glbl_err_cause_num: global err cause number. + * @glbl_err_max_cause_num: global err max cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * not supported. - * @reserved_fw_mem_size: size in MB of dram memory reserved for FW. + * @reserved_fw_mem_size: size of dram memory reserved for FW. + * @fw_event_queue_size: queue size for events from CPU-CP. + * A value of 0 means using the default HL_EQ_SIZE_IN_BYTES value. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -743,8 +747,6 @@ struct asic_fixed_properties { u32 clk_pll_index; u32 mmu_pgt_size; u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; u32 dram_page_size; u32 cfg_size; u32 sram_size; @@ -779,9 +781,10 @@ struct asic_fixed_properties { u32 num_engine_cores; u32 max_num_of_engines; u32 num_of_special_blocks; - u32 glbl_err_cause_num; + u32 glbl_err_max_cause_num; u32 hbw_flush_reg; u32 reserved_fw_mem_size; + u32 fw_event_queue_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -902,6 +905,18 @@ struct hl_mem_mgr { }; /** + * struct hl_mem_mgr_fini_stats - describes statistics returned during memory manager teardown. + * @n_busy_cb: the amount of CB handles that could not be removed + * @n_busy_ts: the amount of TS handles that could not be removed + * @n_busy_other: the amount of any other type of handles that could not be removed + */ +struct hl_mem_mgr_fini_stats { + u32 n_busy_cb; + u32 n_busy_ts; + u32 n_busy_other; +}; + +/** * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior * @topic: string identifier used for logging * @mem_id: memory type identifier, embedded in the handle and used to identify @@ -1052,6 +1067,8 @@ struct hl_encaps_signals_mgr { * @collective_mode: collective mode of current queue * @kernel_address: holds the queue's kernel virtual address. * @bus_address: holds the queue's DMA address. + * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in + * queue properites. * @pi: holds the queue's pi value. * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). * @hw_queue_id: the id of the H/W queue. @@ -1061,6 +1078,7 @@ struct hl_encaps_signals_mgr { * @valid: is the queue valid (we have array of 32 queues, not all of them * exist). * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hl_hw_queue { struct hl_cs_job **shadow_queue; @@ -1069,6 +1087,7 @@ struct hl_hw_queue { enum hl_collective_mode collective_mode; void *kernel_address; dma_addr_t bus_address; + u64 pq_dram_address; u32 pi; atomic_t ci; u32 hw_queue_id; @@ -1077,6 +1096,7 @@ struct hl_hw_queue { u16 int_queue_len; u8 valid; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -1224,6 +1244,7 @@ struct hl_user_pending_interrupt { * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address * @bus_address: holds the queue's DMA address + * @size: the event queue size * @ci: ci inside the queue * @prev_eqe_index: the index of the previous event queue entry. The index of * the current entry's index must be +1 of the previous one. @@ -1235,6 +1256,7 @@ struct hl_eq { struct hl_device *hdev; void *kernel_address; dma_addr_t bus_address; + u32 size; u32 ci; u32 prev_eqe_index; bool check_eqe_index; @@ -1263,15 +1285,18 @@ struct hl_dec { * @ASIC_GAUDI2: Gaudi2 device. * @ASIC_GAUDI2B: Gaudi2B device. * @ASIC_GAUDI2C: Gaudi2C device. + * @ASIC_GAUDI2D: Gaudi2D device. */ enum hl_asic_type { ASIC_INVALID, + ASIC_GOYA, ASIC_GAUDI, ASIC_GAUDI_SEC, ASIC_GAUDI2, ASIC_GAUDI2B, ASIC_GAUDI2C, + ASIC_GAUDI2D, }; struct hl_cs_parser; @@ -2547,7 +2572,7 @@ struct hl_state_dump_specs { * DEVICES */ -#define HL_STR_MAX 32 +#define HL_STR_MAX 64 #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) @@ -2704,11 +2729,16 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); * updated directly by the device. If false, the host memory being polled will * be updated by host CPU. Required so host knows whether or not the memory * might need to be byte-swapped before returning value to caller. + * + * On the first 4 polling iterations the macro goes to sleep for short period of + * time that gradually increases and reaches sleep_us on the fifth iteration. */ #define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ mem_written_by_device) \ ({ \ + u64 __sleep_step_us; \ ktime_t __timeout; \ + u8 __step = 8; \ \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ might_sleep_if(sleep_us); \ @@ -2726,8 +2756,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + __sleep_step_us = sleep_us >> __step; \ + if (__sleep_step_us) \ + usleep_range((__sleep_step_us >> 2) + 1, __sleep_step_us); \ + __step >>= 1; \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -3170,6 +3202,21 @@ struct hl_reset_info { }; /** + * struct eq_heartbeat_debug_info - stores debug info to be used upon heartbeat failure. + * @last_pq_heartbeat_ts: timestamp of the last test packet that was sent to FW. + * This packet is the trigger in FW to send the EQ heartbeat event. + * @last_eq_heartbeat_ts: timestamp of the last EQ heartbeat event that was received from FW. + * @heartbeat_event_counter: number of heartbeat events received. + * @cpu_queue_id: used to read the queue pi/ci + */ +struct eq_heartbeat_debug_info { + time64_t last_pq_heartbeat_ts; + time64_t last_eq_heartbeat_ts; + u32 heartbeat_event_counter; + u32 cpu_queue_id; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar_phys: array of available PCIe bars physical addresses. @@ -3257,6 +3304,8 @@ struct hl_reset_info { * @clk_throttling: holds information about current/previous clock throttling events * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. + * @heartbeat_debug_info: counters used to debug heartbeat failures. + * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling. * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_inner_major_ver: the major of current loaded preboot inner version. * @fw_inner_minor_ver: the minor of current loaded preboot inner version. @@ -3446,6 +3495,10 @@ struct hl_device { struct hl_reset_info reset_info; + struct eq_heartbeat_debug_info heartbeat_debug_info; + + cpumask_t irq_affinity_mask; + u32 *stream_master_qid_arr; u32 fw_inner_major_ver; u32 fw_inner_minor_ver; @@ -3588,25 +3641,6 @@ struct hl_ioctl_desc { hl_ioctl_t *func; }; -static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major, u32 fw_sw_minor) -{ - if (hdev->fw_sw_major_ver < fw_sw_major) - return true; - if (hdev->fw_sw_major_ver > fw_sw_major) - return false; - if (hdev->fw_sw_minor_ver < fw_sw_minor) - return true; - return false; -} - -static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major, - u32 fw_sw_minor) -{ - return (hdev->fw_sw_major_ver > fw_sw_major || - (hdev->fw_sw_major_ver == fw_sw_major && - hdev->fw_sw_minor_ver >= fw_sw_minor)); -} - /* * Kernel module functions that can be accessed by entire module */ @@ -3732,6 +3766,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); @@ -3886,6 +3921,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_ struct hl_hr_mmu_funcs *hr_func); int hl_mmu_if_set_funcs(struct hl_device *hdev); void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); +void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, @@ -3893,7 +3929,24 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr); u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr); bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); - +struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info); +u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx); +u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx); +void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr); +u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); +void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr); +int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr); +u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop); +u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx); +void hl_mmu_dr_flush(struct hl_ctx *ctx); +int hl_mmu_dr_init(struct hl_device *hdev); +void hl_mmu_dr_fini(struct hl_device *hdev); + +int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value); @@ -4008,7 +4061,7 @@ char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); -void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats); void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg); int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, void *args); @@ -4032,6 +4085,10 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count); void hl_enable_err_info_capture(struct hl_error_info *captured_err_info); +void hl_init_cpu_for_irq(struct hl_device *hdev); +void hl_set_irq_affinity(struct hl_device *hdev, int irq); +void hl_eq_heartbeat_event_handle(struct hl_device *hdev); +void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index e542fd40e16c..596c52e8aa26 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -101,7 +101,6 @@ static const struct drm_driver hl_driver = { .major = LINUX_VERSION_MAJOR, .minor = LINUX_VERSION_PATCHLEVEL, .patchlevel = LINUX_VERSION_SUBLEVEL, - .date = "20190505", .fops = &hl_fops, .open = hl_device_open, @@ -144,6 +143,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev) case REV_ID_C: asic_type = ASIC_GAUDI2C; break; + case REV_ID_D: + asic_type = ASIC_GAUDI2D; + break; default: break; } @@ -260,7 +262,7 @@ int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) out_err: mutex_unlock(&hdev->fpriv_list_lock); - hl_mem_mgr_fini(&hpriv->mem_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr, NULL); hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); mutex_destroy(&hpriv->ctx_lock); @@ -359,8 +361,7 @@ static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) * a different default timeout for Gaudi */ if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) - hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * - MSEC_PER_SEC); + hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED); hdev->reset_upon_device_release = 0; break; diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index 1dd6e23172ca..8729a0c57d78 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -1279,13 +1279,10 @@ static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long ar retcode = -EFAULT; out_err: - if (retcode) { - char task_comm[TASK_COMM_LEN]; - + if (retcode) dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", - task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr); - } + task_pid_nr(current), current->comm, cmd, nr); if (kdata != stack_kdata) kfree(kdata); @@ -1308,11 +1305,9 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) { ioctl = &hl_ioctls_control[nr - HL_COMMAND_START]; } else { - char task_comm[TASK_COMM_LEN]; - dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", - task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr); + task_pid_nr(current), current->comm, cmd, nr); return -ENOTTY; } diff --git a/drivers/accel/habanalabs/common/hw_queue.c b/drivers/accel/habanalabs/common/hw_queue.c index d0087c0ec48c..3d04a7507cce 100644 --- a/drivers/accel/habanalabs/common/hw_queue.c +++ b/drivers/accel/habanalabs/common/hw_queue.c @@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; + u64 addr; + int i; bd = q->kernel_address; bd += hl_pi_2_offset(q->pi); @@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, bd->len = cpu_to_le32(len); bd->ptr = cpu_to_le64(ptr); + if (q->dram_bd) + for (i = 0 ; i < 2 ; i++) { + addr = q->pq_dram_address + + ((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64))); + hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr, + (u64 *)(bd) + i, DEBUGFS_WRITE64); + } + q->pi = hl_queue_inc_ptr(q->pi); + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); } @@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev) q->supports_sync_stream = asic->hw_queues_props[i].supports_sync_stream; q->collective_mode = asic->hw_queues_props[i].collective_mode; + q->dram_bd = asic->hw_queues_props[i].dram_bd; + rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, "failed to initialize queue %d\n", i); goto release_queues; } + + /* Set DRAM PQ address for the queue if it should be at DRAM */ + if (q->dram_bd) + q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address; } return 0; diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c index 1ee2ee07e9ed..52d1e6bf10dc 100644 --- a/drivers/accel/habanalabs/common/hwmon.c +++ b/drivers/accel/habanalabs/common/hwmon.c @@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types break; default: - dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type); flags = cpucp_flags; break; } @@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty break; default: - dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type); flags = cpucp_flags; break; } @@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen break; if (type >= HWMON_NR_SENSOR_TYPES) { - dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type); + dev_err_ratelimited(hdev->dev, + "Got wrong sensor type %d from device\n", type); return -EINVAL; } @@ -584,9 +585,10 @@ int hl_get_temperature(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get temperature from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get temperature from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -609,9 +611,8 @@ int hl_set_temperature(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set temperature of sensor %d, error %d\n", sensor_index, rc); @@ -638,9 +639,10 @@ int hl_get_voltage(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get voltage from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get voltage from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -667,9 +669,10 @@ int hl_get_current(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get current from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get current from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -696,9 +699,10 @@ int hl_get_fan_speed(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get fan speed from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get fan speed from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -725,9 +729,10 @@ int hl_get_pwm_info(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get pwm info from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get pwm info from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -750,9 +755,8 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set pwm info to sensor %d, error %d\n", sensor_index, rc); } @@ -773,9 +777,8 @@ int hl_set_voltage(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set voltage of sensor %d, error %d\n", sensor_index, rc); @@ -796,11 +799,9 @@ int hl_set_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) - dev_err(hdev->dev, + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set current of sensor %d, error %d\n", sensor_index, rc); @@ -829,9 +830,8 @@ int hl_set_power(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set power of sensor %d, error %d\n", sensor_index, rc); @@ -858,9 +858,10 @@ int hl_get_power(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get power of sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get power of sensor %d, error %d\n", + sensor_index, rc); *value = 0; } diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index 978b7f4d5eeb..7c9f2f6a2870 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -652,14 +652,16 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) */ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) { + u32 size = hdev->asic_prop.fw_event_queue_size ? : HL_EQ_SIZE_IN_BYTES; void *p; - p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_EQ_SIZE_IN_BYTES, &q->bus_address); + p = hl_cpu_accessible_dma_pool_alloc(hdev, size, &q->bus_address); if (!p) return -ENOMEM; q->hdev = hdev; q->kernel_address = p; + q->size = size; q->ci = 0; q->prev_eqe_index = 0; @@ -678,7 +680,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) { flush_workqueue(hdev->eq_wq); - hl_cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, q->kernel_address); + hl_cpu_accessible_dma_pool_free(hdev, q->size, q->kernel_address); } void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) @@ -693,5 +695,30 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) * when the device is operational again */ - memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, q->size); +} + +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q) +{ + u32 eq_length, eqe_size, ctl, ready, mode, type, index; + struct hl_eq_header *hdr; + u8 *ptr; + int i; + + eq_length = HL_EQ_LENGTH; + eqe_size = q->size / HL_EQ_LENGTH; + + dev_info(hdev->dev, "Contents of EQ entries headers:\n"); + + for (i = 0, ptr = q->kernel_address ; i < eq_length ; ++i, ptr += eqe_size) { + hdr = (struct hl_eq_header *) ptr; + ctl = le32_to_cpu(hdr->ctl); + ready = FIELD_GET(EQ_CTL_READY_MASK, ctl); + mode = FIELD_GET(EQ_CTL_EVENT_MODE_MASK, ctl); + type = FIELD_GET(EQ_CTL_EVENT_TYPE_MASK, ctl); + index = FIELD_GET(EQ_CTL_INDEX_MASK, ctl); + + dev_info(hdev->dev, "%02u: %#010x [ready: %u, mode %u, type %04u, index %05u]\n", + i, ctl, ready, mode, type, index); + } } diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 3348ad12c237..601fdbe70179 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -14,7 +14,7 @@ #include <linux/vmalloc.h> #include <linux/pci-p2pdma.h> -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); #define HL_MMU_DEBUG 0 diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c index c4d84df355b0..99cd83139d46 100644 --- a/drivers/accel/habanalabs/common/memory_mgr.c +++ b/drivers/accel/habanalabs/common/memory_mgr.c @@ -318,28 +318,61 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) idr_init(&mmg->handles); } +static void hl_mem_mgr_fini_stats_reset(struct hl_mem_mgr_fini_stats *stats) +{ + if (!stats) + return; + + memset(stats, 0, sizeof(*stats)); +} + +static void hl_mem_mgr_fini_stats_inc(u64 mem_id, struct hl_mem_mgr_fini_stats *stats) +{ + if (!stats) + return; + + switch (mem_id) { + case HL_MMAP_TYPE_CB: + ++stats->n_busy_cb; + break; + case HL_MMAP_TYPE_TS_BUFF: + ++stats->n_busy_ts; + break; + default: + /* we currently store only CB/TS so this shouldn't happen */ + ++stats->n_busy_other; + } +} + /** * hl_mem_mgr_fini - release unified memory manager * * @mmg: parent unified memory manager + * @stats: if non-NULL, will return some counters for handles that could not be removed. * * Release the unified memory manager. Shall be called from an interrupt context. */ -void hl_mem_mgr_fini(struct hl_mem_mgr *mmg) +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats) { struct hl_mmap_mem_buf *buf; struct idr *idp; const char *topic; + u64 mem_id; u32 id; + hl_mem_mgr_fini_stats_reset(stats); + idp = &mmg->handles; idr_for_each_entry(idp, buf, id) { topic = buf->behavior->topic; - if (hl_mmap_mem_buf_put(buf) != 1) + mem_id = buf->behavior->mem_id; + if (hl_mmap_mem_buf_put(buf) != 1) { dev_err(mmg->dev, "%s: Buff handle %u for CTX is still alive\n", topic, id); + hl_mem_mgr_fini_stats_inc(mem_id, stats); + } } } diff --git a/drivers/accel/habanalabs/common/mmu/Makefile b/drivers/accel/habanalabs/common/mmu/Makefile index 1806c524e04a..f4b815bf4f7d 100644 --- a/drivers/accel/habanalabs/common/mmu/Makefile +++ b/drivers/accel/habanalabs/common/mmu/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \ - common/mmu/mmu_v2_hr.o + common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c index b654302a68fc..79823facce7f 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu.c +++ b/drivers/accel/habanalabs/common/mmu/mmu.c @@ -6,6 +6,7 @@ */ #include <linux/slab.h> +#include <linux/pci.h> #include "../habanalabs.h" @@ -262,7 +263,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu mmu_funcs->flush(ctx); if (trace_habanalabs_mmu_unmap_enabled() && !rc) - trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); + trace_habanalabs_mmu_unmap(&hdev->pdev->dev, virt_addr, 0, page_size, flush_pte); return rc; } @@ -349,7 +350,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s if (flush_pte) mmu_funcs->flush(ctx); - trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); + trace_habanalabs_mmu_map(&hdev->pdev->dev, virt_addr, phys_addr, page_size, flush_pte); return 0; @@ -585,6 +586,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, int hl_mmu_if_set_funcs(struct hl_device *hdev) { + struct asic_fixed_properties *prop = &hdev->asic_prop; + if (hdev->mmu_disable) return 0; @@ -597,8 +600,10 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) case ASIC_GAUDI2: case ASIC_GAUDI2B: case ASIC_GAUDI2C: - /* MMUs in Gaudi2 are always host resident */ - hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); + case ASIC_GAUDI2D: + hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); + if (prop->pmmu.host_resident) + hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -641,7 +646,8 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); if (rc) dev_err_ratelimited(hdev->dev, - "%s cache invalidation failed, rc=%d\n", + "%s: %s cache invalidation failed, rc=%d\n", + dev_name(&hdev->pdev->dev), flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc); return rc; @@ -656,8 +662,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, asid, va, size); if (rc) dev_err_ratelimited(hdev->dev, - "%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d", - flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc); + "%s: %s cache range invalidation failed: va=%#llx, size=%llu, rc=%d", + dev_name(&hdev->pdev->dev), flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", + va, size, rc); return rc; } @@ -1209,3 +1216,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_ return 0; } +struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = NULL; + + hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, + (unsigned long) hop_addr) + if (hop_addr == pgt_info->shadow_addr) + break; + + return pgt_info; +} + +void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr); + + hl_mmu_dr_free_pgt_node(ctx, pgt_info); +} + +void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info) +{ + struct hl_device *hdev = ctx->hdev; + + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, + hdev->asic_prop.dmmu.hop_table_size); + hash_del(&pgt_info->node); + kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); + kfree(pgt_info); +} + +u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size); +} + +u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx) +{ + return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + + (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size); +} + +u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) +{ + u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1; + u64 shadow_hop_addr = shadow_addr & (~page_mask); + u64 pte_offset = shadow_addr & page_mask; + u64 phys_hop_addr; + + if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx)) + phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr; + else + phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); + + return phys_hop_addr + pte_offset; +} + +void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val); + + ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), + phys_val); + + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val); + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr) +{ + hl_mmu_dr_write_final_pte(ctx, pte_addr, 0); +} + +void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++; +} + +int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr); + int num_of_ptes_left; + + pgt_info->num_of_ptes--; + + /* + * Need to save the number of ptes left because hl_mmu_free_hop might free + * the pgt_info + */ + num_of_ptes_left = pgt_info->num_of_ptes; + if (!num_of_ptes_left) + hl_mmu_dr_free_pgt_node(ctx, pgt_info); + + return num_of_ptes_left; +} + +u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pgt_info *pgt_info; + u64 phys_addr, shadow_addr; + + pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + if (!pgt_info) + return ULLONG_MAX; + + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, + prop->dmmu.hop_table_size); + if (!phys_addr) { + dev_err(hdev->dev, "failed to allocate page\n"); + goto pool_add_err; + } + + shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size, + GFP_KERNEL); + if (!shadow_addr) + goto shadow_err; + + pgt_info->phys_addr = phys_addr; + pgt_info->shadow_addr = shadow_addr; + pgt_info->ctx = ctx; + pgt_info->num_of_ptes = 0; + hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); + + return shadow_addr; + +shadow_err: + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, + phys_addr, prop->dmmu.hop_table_size); +pool_add_err: + kfree(pgt_info); + + return ULLONG_MAX; +} + +u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop) +{ + u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); + + if (hop_addr == ULLONG_MAX) { + hop_addr = hl_mmu_dr_alloc_hop(ctx); + *is_new_hop = (hop_addr != ULLONG_MAX); + } + + return hop_addr; +} + +void hl_mmu_dr_flush(struct hl_ctx *ctx) +{ + /* flush all writes from all cores to reach PCI */ + mb(); + ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx)); +} + +int hl_mmu_dr_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + hdev->mmu_priv.dr.mmu_pgt_pool = + gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1); + + if (!hdev->mmu_priv.dr.mmu_pgt_pool) { + dev_err(hdev->dev, "Failed to create page gen pool\n"); + return -ENOMEM; + } + + rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + + prop->dmmu.hop0_tables_total_size, + prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size, + -1); + if (rc) { + dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); + goto err_pool_add; + } + + hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, + prop->dmmu.hop_table_size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { + rc = -ENOMEM; + goto err_pool_add; + } + + /* MMU H/W init will be done in device hw_init() */ + + return 0; + +err_pool_add: + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + + return rc; +} + +void hl_mmu_dr_fini(struct hl_device *hdev) +{ + /* MMU H/W fini was already done in device hw_fini() */ + + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) + return; + + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + + /* Make sure that if we arrive here again without init was + * called we won't cause kernel panic. This can happen for + * example if we fail during hard reset code at certain points + */ + hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; +} diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v1.c b/drivers/accel/habanalabs/common/mmu/mmu_v1.c index d925dc4dd097..845d16aaa637 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/accel/habanalabs/common/mmu/mmu_v1.c @@ -12,166 +12,6 @@ #define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); - -static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = NULL; - - hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, - (unsigned long) hop_addr) - if (hop_addr == pgt_info->shadow_addr) - break; - - return pgt_info; -} - -static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) -{ - struct hl_device *hdev = ctx->hdev; - - gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, - hdev->asic_prop.mmu_hop_table_size); - hash_del(&pgt_info->node); - kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); - kfree(pgt_info); -} - -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - - _free_hop(ctx, pgt_info); -} - -static u64 alloc_hop(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct pgt_info *pgt_info; - u64 phys_addr, shadow_addr; - - pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); - if (!pgt_info) - return ULLONG_MAX; - - phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, - prop->mmu_hop_table_size); - if (!phys_addr) { - dev_err(hdev->dev, "failed to allocate page\n"); - goto pool_add_err; - } - - shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, - GFP_KERNEL); - if (!shadow_addr) - goto shadow_err; - - pgt_info->phys_addr = phys_addr; - pgt_info->shadow_addr = shadow_addr; - pgt_info->ctx = ctx; - pgt_info->num_of_ptes = 0; - hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); - - return shadow_addr; - -shadow_err: - gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, - prop->mmu_hop_table_size); -pool_add_err: - kfree(pgt_info); - - return ULLONG_MAX; -} - -static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static void flush(struct hl_ctx *ctx) -{ - /* flush all writes from all cores to reach PCI */ - mb(); - ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); -} - -/* transform the value to physical address when writing to H/W */ -static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) -{ - /* - * The value to write is actually the address of the next shadow hop + - * flags at the 12 LSBs. - * Hence in order to get the value to write to the physical PTE, we - * clear the 12 LSBs and translate the shadow hop to its associated - * physical hop, and add back the original 12 LSBs. - */ - u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | - (val & FLAGS_MASK); - - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - phys_val); - - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* do not transform the value to physical address when writing to H/W */ -static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, - u64 val) -{ - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - val); - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* clear the last and present bits */ -static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) -{ - /* no need to transform the value to physical address */ - write_final_pte(ctx, pte_addr, 0); -} - -static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - get_pgt_info(ctx, hop_addr)->num_of_ptes++; -} - -/* - * put_pte - decrement the num of ptes and free the hop if possible - * - * @ctx: pointer to the context structure - * @hop_addr: addr of the hop - * - * This function returns the number of ptes left on this hop. If the number is - * 0, it means the pte was freed. - */ -static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - int num_of_ptes_left; - - pgt_info->num_of_ptes--; - - /* - * Need to save the number of ptes left because free_hop might free - * the pgt_info - */ - num_of_ptes_left = pgt_info->num_of_ptes; - if (!num_of_ptes_left) - _free_hop(ctx, pgt_info); - - return num_of_ptes_left; -} - static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) { @@ -183,35 +23,6 @@ static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } -static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, - bool *is_new_hop) -{ - u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop_addr == ULLONG_MAX) { - hop_addr = alloc_hop(ctx); - *is_new_hop = (hop_addr != ULLONG_MAX); - } - - return hop_addr; -} - -/* translates shadow address inside hop to a physical address */ -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) -{ - u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); - u64 shadow_hop_addr = shadow_addr & ~page_mask; - u64 pte_offset = shadow_addr & page_mask; - u64 phys_hop_addr; - - if (shadow_hop_addr != get_hop0_addr(ctx)) - phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; - else - phys_hop_addr = get_phys_hop0_addr(ctx); - - return phys_hop_addr + pte_offset; -} - static int dram_default_mapping_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -232,13 +43,13 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) /* add hop1 and hop2 */ total_hops = num_of_hop3 + 2; - ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); + ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL); if (!ctx->dram_default_hops) return -ENOMEM; - hop0_addr = get_hop0_addr(ctx); + hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); - hop1_addr = alloc_hop(ctx); + hop1_addr = hl_mmu_dr_alloc_hop(ctx); if (hop1_addr == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 1\n"); rc = -ENOMEM; @@ -247,7 +58,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) ctx->dram_default_hops[total_hops - 1] = hop1_addr; - hop2_addr = alloc_hop(ctx); + hop2_addr = hl_mmu_dr_alloc_hop(ctx); if (hop2_addr == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 2\n"); rc = -ENOMEM; @@ -257,7 +68,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) ctx->dram_default_hops[total_hops - 2] = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - ctx->dram_default_hops[i] = alloc_hop(ctx); + ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx); if (ctx->dram_default_hops[i] == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); rc = -ENOMEM; @@ -268,18 +79,18 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) /* need only pte 0 in hops 0 and 1 */ pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_addr, pte_val); + hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val); pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_addr, pte_val); - get_pte(ctx, hop1_addr); + hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val); + hl_mmu_dr_get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, pte_val); - get_pte(ctx, hop2_addr); + hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val); + hl_mmu_dr_get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } @@ -289,23 +100,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { - write_final_pte(ctx, hop3_pte_addr, pte_val); - get_pte(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val); + hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; } } - flush(ctx); + hl_mmu_dr_flush(ctx); return 0; hop3_err: for (i = 0 ; i < hop3_allocated ; i++) - free_hop(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]); - free_hop(ctx, hop2_addr); + hl_mmu_dr_free_hop(ctx, hop2_addr); hop2_err: - free_hop(ctx, hop1_addr); + hl_mmu_dr_free_hop(ctx, hop1_addr); hop1_err: kfree(ctx->dram_default_hops); @@ -329,7 +140,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) do_div(num_of_hop3, prop->dram_page_size); do_div(num_of_hop3, HOP_PTE_ENTRIES_512); - hop0_addr = get_hop0_addr(ctx); + hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); /* add hop1 and hop2 */ total_hops = num_of_hop3 + 2; hop1_addr = ctx->dram_default_hops[total_hops - 1]; @@ -338,101 +149,26 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { - clear_pte(ctx, hop3_pte_addr); - put_pte(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_clear_pte(ctx, hop3_pte_addr); + hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; } } hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - clear_pte(ctx, hop2_pte_addr); - put_pte(ctx, hop2_addr); + hl_mmu_dr_clear_pte(ctx, hop2_pte_addr); + hl_mmu_dr_put_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } - clear_pte(ctx, hop1_addr); - put_pte(ctx, hop1_addr); - clear_pte(ctx, hop0_addr); + hl_mmu_dr_clear_pte(ctx, hop1_addr); + hl_mmu_dr_put_pte(ctx, hop1_addr); + hl_mmu_dr_clear_pte(ctx, hop0_addr); kfree(ctx->dram_default_hops); - flush(ctx); -} - -/** - * hl_mmu_v1_init() - initialize the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Create a pool of pages for pgt_infos. - * - Create a shadow table for pgt - * - * Return: 0 for success, non-zero for failure. - */ -static int hl_mmu_v1_init(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - int rc; - - hdev->mmu_priv.dr.mmu_pgt_pool = - gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - - if (!hdev->mmu_priv.dr.mmu_pgt_pool) { - dev_err(hdev->dev, "Failed to create page gen pool\n"); - return -ENOMEM; - } - - rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + - prop->mmu_hop0_tables_total_size, - prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, - -1); - if (rc) { - dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); - goto err_pool_add; - } - - hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size, - GFP_KERNEL); - if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { - rc = -ENOMEM; - goto err_pool_add; - } - - /* MMU H/W init will be done in device hw_init() */ - - return 0; - -err_pool_add: - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); - - return rc; -} - -/** - * hl_mmu_v1_fini() - release the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Disable MMU in H/W. - * - Free the pgt_infos pool. - * - * All contexts should be freed before calling this function. - */ -static void hl_mmu_v1_fini(struct hl_device *hdev) -{ - /* MMU H/W fini was already done in device hw_fini() */ - - if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { - kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); - - /* Make sure that if we arrive here again without init was - * called we won't cause kernel panic. This can happen for - * example if we fail during hard reset code at certain points - */ - hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; - } + hl_mmu_dr_flush(ctx); } /** @@ -476,7 +212,7 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) dev_err_ratelimited(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - _free_hop(ctx, pgt_info); + hl_mmu_dr_free_pgt_node(ctx, pgt_info); } } @@ -495,7 +231,7 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx, for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { if (hop_idx == MMU_HOP0) { - hop_addr[hop_idx] = get_hop0_addr(ctx); + hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); } else { hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop_addr[hop_idx] == ULLONG_MAX) @@ -546,30 +282,30 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx, } hop_idx = MMU_HOP3; - write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); - put_pte(ctx, hop_addr[hop_idx]); + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); + hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; if (hop_addr[MMU_HOP4]) - clear_pte(ctx, hop_pte_addr[MMU_HOP4]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]); else - clear_pte(ctx, hop_pte_addr[MMU_HOP3]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]); - if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) + if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4])) clear_hop3 = true; if (!clear_hop3) goto mapped; for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { - clear_pte(ctx, hop_pte_addr[hop_idx]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]); if (hop_idx == MMU_HOP0) break; - if (put_pte(ctx, hop_addr[hop_idx])) + if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx])) goto mapped; } } @@ -616,10 +352,10 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { if (hop_idx == MMU_HOP0) { - hop_addr[hop_idx] = get_hop0_addr(ctx); + hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); } else { hop_addr[hop_idx] = - get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); + hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); if (hop_addr[hop_idx] == ULLONG_MAX) goto err; } @@ -666,27 +402,27 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; - write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { prev_hop = hop_idx - 1; if (hop_new[hop_idx]) { curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); + hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); if (hop_idx != MMU_HOP1) - get_pte(ctx, hop_addr[prev_hop]); + hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]); } } - get_pte(ctx, hop_addr[num_hops - 1]); + hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]); return 0; err: for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { if (hop_new[hop_idx]) - free_hop(ctx, hop_addr[hop_idx]); + hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]); } return rc; @@ -752,7 +488,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, if (is_huge) used_hops--; - hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, hops->hop_info[0].hop_addr, virt_addr); @@ -801,13 +537,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, */ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) { - mmu->init = hl_mmu_v1_init; - mmu->fini = hl_mmu_v1_fini; + mmu->init = hl_mmu_dr_init; + mmu->fini = hl_mmu_dr_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_fini = hl_mmu_v1_ctx_fini; mmu->map = hl_mmu_v1_map; mmu->unmap = hl_mmu_v1_unmap; - mmu->flush = flush; + mmu->flush = hl_mmu_dr_flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2.c b/drivers/accel/habanalabs/common/mmu/mmu_v2.c new file mode 100644 index 000000000000..4bc0268fff1c --- /dev/null +++ b/drivers/accel/habanalabs/common/mmu/mmu_v2.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "../habanalabs.h" +#include "../../include/hw_ip/mmu/mmu_general.h" +#include "../../include/hw_ip/mmu/mmu_v2_0.h" + +#include <linux/slab.h> + +/** + * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx) +{ + hash_init(ctx->mmu_shadow_hash); + + return 0; +} + +/* + * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module + * + * @ctx: pointer to the context structure + * + * This function does the following: + * - Free any pgts which were not freed yet + * - Free the mutex + * - Free DRAM default page mapping hops + */ +static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct pgt_info *pgt_info; + struct hlist_node *tmp; + int i; + + if (!hash_empty(ctx->mmu_shadow_hash)) + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); + + hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { + dev_err_ratelimited(hdev->dev, + "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", + pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); + hl_mmu_dr_free_pgt_node(ctx, pgt_info); + } +} + +static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) +{ + u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte, + scrambled_virt_addr; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + bool is_huge = false; + int i, hop_last; + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + + hop_last = mmu_prop->num_hops - 1; + + scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + + hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx); + hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hop_addr[0], scrambled_virt_addr); + if (hop_pte_addr[0] == U64_MAX) + return -EFAULT; + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0]; + + for (i = 1 ; i < mmu_prop->num_hops ; i++) { + hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[i] == ULLONG_MAX) + goto not_mapped; + + hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hop_addr[i], scrambled_virt_addr); + if (hop_pte_addr[i] == U64_MAX) + return -EFAULT; + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i]; + + if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) { + hop_last = i; + is_huge = true; + break; + } + } + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); + return -EFAULT; + } + + if (!(curr_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + for (i = hop_last ; i > 0 ; i--) { + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]); + if (hl_mmu_dr_put_pte(ctx, hop_addr[i])) + goto mapped; + } + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]); + +mapped: + return 0; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + + return -EINVAL; +} + +static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool is_dram_addr) +{ + u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, + curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + bool hop_new[MMU_ARCH_6_HOPS] = { false }; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + int rc, i, hop_last; + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + + hop_last = mmu_prop->num_hops - 1; + + scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr); + + /* First hop is preallocated therefore it is treated differently */ + hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx); + hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hop_addr[0], scrambled_virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0]; + + /* Handle hop1 to hop_last */ + for (i = 1 ; i <= hop_last ; i++) { + hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]); + if (hop_addr[i] == ULLONG_MAX) { + rc = -ENOMEM; + goto err; + } + + hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hop_addr[i], scrambled_virt_addr); + if (hop_pte_addr[i] == U64_MAX) { + rc = -EINVAL; + goto err; + } + + if (!hop_pte_addr[i]) { + rc = -EINVAL; + goto err; + } + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i]; + } + + if (curr_pte & PAGE_PRESENT_MASK) { + dev_err(hdev->dev, + "mapping already exists for virt_addr 0x%llx\n", + virt_addr); + + for (i = 0 ; i <= hop_last ; i++) + dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", + i, *(u64 *) (uintptr_t) hop_pte_addr[i], + hop_pte_addr[i]); + + rc = -EINVAL; + goto err; + } + + curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK) + | mmu_prop->last_mask | PAGE_PRESENT_MASK; + + /* Write the PTEs */ + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte); + + /* for each new hop, add its address to the table of previous-hop */ + for (i = 1 ; i <= hop_last ; i++) { + if (hop_new[i]) { + curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte); + + if (i - 1) + hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]); + } + } + hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]); + + return 0; + +err: + for (i = 1 ; i <= hop_last ; i++) + if (hop_new[i] && (hop_addr[i] != U64_MAX)) + hl_mmu_dr_free_hop(ctx, hop_addr[i]); + + return rc; +} + +/* + * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v2_swap_out(struct hl_ctx *ctx) +{ + +} + +/* + * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v2_swap_in(struct hl_ctx *ctx) +{ + +} + +static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) +{ + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + bool is_dram_addr; + int i; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + hops->range_type = HL_VA_RANGE_TYPE_DRAM; + + hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + + hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hops->hop_info[0].hop_addr, + hops->scrambled_vaddr); + if (hops->hop_info[0].hop_pte_addr == U64_MAX) + return -EFAULT; + + hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev, + hops->hop_info[0].hop_pte_addr); + if (hops->hop_info[0].hop_pte_val == U64_MAX) + return -EFAULT; + + for (i = 1 ; i < mmu_prop->num_hops ; i++) { + hops->hop_info[i].hop_addr = + hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val); + if (hops->hop_info[i].hop_addr == ULLONG_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_addr = + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hops->hop_info[i].hop_addr, + hops->scrambled_vaddr); + if (hops->hop_info[i].hop_pte_addr == U64_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[i].hop_pte_addr); + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) + break; + } + + /* if passed over all hops then no last hop was found */ + if (i == mmu_prop->num_hops) + return -EFAULT; + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->scrambled_vaddr != virt_addr) + hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr + (hdev, hops->hop_info[i].hop_pte_val); + else + hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val; + + hops->used_hops = i + 1; + + return 0; +} + +/* + * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2 + * + * @hdev: pointer to the device structure + * @mmu_if: pointer to the mmu interface structure + */ +void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) +{ + mmu->init = hl_mmu_dr_init; + mmu->fini = hl_mmu_dr_fini; + mmu->ctx_init = hl_mmu_v2_ctx_init; + mmu->ctx_fini = hl_mmu_v2_ctx_fini; + mmu->map = hl_mmu_v2_map; + mmu->unmap = hl_mmu_v2_unmap; + mmu->flush = hl_mmu_dr_flush; + mmu->swap_out = hl_mmu_v2_swap_out; + mmu->swap_in = hl_mmu_v2_swap_in; + mmu->get_tlb_info = hl_mmu_v2_get_tlb_info; +} diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c index afe7ef964f82..31507b2a431b 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c +++ b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c @@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size, + return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size, prop->mmu_pgt_size); } @@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size); + hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size); } /** @@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx) "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); } } @@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) { hop_last = i; @@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx, for (i = hop_last ; i > 0 ; i--) { hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size)) + ctx->hdev->asic_prop.pmmu.hop_table_size)) goto mapped; } hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); mapped: return 0; @@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, scrambled_virt_addr); curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); } if (curr_pte & PAGE_PRESENT_MASK) { @@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size), + ctx->hdev->asic_prop.pmmu.hop_table_size), hop_pte_phys_addr[i]); rc = -EINVAL; goto err; @@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, /* Write the PTEs */ hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); /* for each new hop, add its address to the table of previous-hop */ for (i = 1 ; i <= hop_last ; i++) { @@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1], - curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size); + curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size); if (i - 1) hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, hops_pgt_info[i - 1]->phys_addr); @@ -303,7 +303,7 @@ err: for (i = 1 ; i <= hop_last ; i++) if (hop_new[i] && hops_pgt_info[i]) hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); return rc; } diff --git a/drivers/accel/habanalabs/common/pci/pci.c b/drivers/accel/habanalabs/common/pci/pci.c index 191e0e3cf3a5..81cbd8697d4c 100644 --- a/drivers/accel/habanalabs/common/pci/pci.c +++ b/drivers/accel/habanalabs/common/pci/pci.c @@ -123,7 +123,7 @@ int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data) pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); if (unlikely(trace_habanalabs_elbi_read_enabled())) - trace_habanalabs_elbi_read(hdev->dev, (u32) addr, val); + trace_habanalabs_elbi_read(&hdev->pdev->dev, (u32) addr, val); return 0; } @@ -186,7 +186,7 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) { if (unlikely(trace_habanalabs_elbi_write_enabled())) - trace_habanalabs_elbi_write(hdev->dev, (u32) addr, val); + trace_habanalabs_elbi_write(&hdev->pdev->dev, (u32) addr, val); return 0; } diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c index fe913965dbad..5402a3cd0491 100644 --- a/drivers/accel/habanalabs/common/security.c +++ b/drivers/accel/habanalabs/common/security.c @@ -7,15 +7,31 @@ #include "habanalabs.h" -static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = { +static const char * const hl_glbl_error_cause[] = { "Error due to un-priv read", "Error due to un-secure read", "Error due to read from unmapped reg", "Error due to un-priv write", "Error due to un-secure write", "Error due to write to unmapped reg", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", "External I/F write sec violation", "External I/F write to un-mapped reg", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", "Read to write only", "Write to read only" }; @@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev, static int hl_read_glbl_errors(struct hl_device *hdev, u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data) { - struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_special_block_info *special_blocks = prop->special_blocks; struct hl_special_block_info *current_block = &special_blocks[blk_idx]; u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base, - base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address); + base = current_block->base_addr - lower_32_bits(prop->cfg_base_address); int i; block_base = base + major * current_block->major_offset + @@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev, glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET; addr_val = RREG32(glbl_err_addr); - for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) { + for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) { if (cause_val & BIT(i)) dev_err_ratelimited(hdev->dev, - "%s, addr %#llx\n", - hl_glbl_error_cause[i], - hdev->asic_prop.cfg_base_address + block_base + - FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val)); + "%s, addr %#llx\n", + hl_glbl_error_cause[i], + prop->cfg_base_address + block_base + + FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val)); } WREG32(glbl_err_cause, cause_val); diff --git a/drivers/accel/habanalabs/common/security.h b/drivers/accel/habanalabs/common/security.h index d7a3b3e82ea4..476f70687c09 100644 --- a/drivers/accel/habanalabs/common/security.h +++ b/drivers/accel/habanalabs/common/security.h @@ -13,8 +13,7 @@ struct hl_device; /* special blocks */ -#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10 -#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0) +#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0) /* GLBL_ERR_ADDR register offset from the start of the block */ #define HL_GLBL_ERR_ADDR_OFFSET 0xF44 /* GLBL_ERR_CAUSE register offset from the start of the block */ diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 8a9f98832157..e9f8ccc0bbf9 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -142,8 +142,9 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "0x%08x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); + return sprintf(buf, "0x%08x%08x\n", + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_timestamp), + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); } static ssize_t cpucp_kernel_ver_show(struct device *dev, @@ -270,6 +271,9 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI2C: str = "GAUDI2C"; break; + case ASIC_GAUDI2D: + str = "GAUDI2D"; + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index 53292d4c15c8..fa893a9b826e 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -614,8 +614,6 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = false; @@ -637,8 +635,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -649,6 +647,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); prop->dmmu.end_addr = VA_HOST_SPACE_END; prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->dmmu.pgt_size = prop->mmu_pgt_size; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; @@ -1640,10 +1639,8 @@ static int gaudi_late_init(struct hl_device *hdev) } rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); - if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + if (rc) return rc; - } /* Scrub both SRAM and DRAM */ rc = hdev->asic_funcs->scrub_device_mem(hdev); @@ -3652,7 +3649,7 @@ static int gaudi_mmu_init(struct hl_device *hdev) for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + - (i * prop->mmu_hop_table_size); + (i * prop->dmmu.hop_table_size); rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); if (rc) { @@ -4155,13 +4152,7 @@ skip_reset: static int gaudi_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } static int gaudi_resume(struct hl_device *hdev) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index e0e5615ef9b0..a38b88baadf2 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -158,11 +158,13 @@ #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \ (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh)) -#define PSOC_RAZWI_ENG_STR_SIZE 128 -#define PSOC_RAZWI_MAX_ENG_PER_RTR 5 +#define PSOC_RAZWI_ENG_STR_SIZE 128 +#define PSOC_RAZWI_MAX_ENG_PER_RTR 5 /* HW scrambles only bits 0-25 */ -#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26) +#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26) + +#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17 struct gaudi2_razwi_info { u32 axuser_xy; @@ -2308,11 +2310,26 @@ static int set_number_of_functional_hbms(struct hl_device *hdev) return 0; } +static bool gaudi2_is_edma_queue_id(u32 queue_id) +{ + + switch (queue_id) { + case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: + return true; + default: + return false; + } +} + static int gaudi2_set_dram_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 basic_hbm_page_size; - int rc; + u64 hbm_drv_base_offset = 0, edma_pq_base_addr; + u32 basic_hbm_page_size, edma_idx = 0; + int rc, i; rc = set_number_of_functional_hbms(hdev); if (rc) @@ -2356,9 +2373,35 @@ static int gaudi2_set_dram_properties(struct hl_device *hdev) prop->dmmu.start_addr = prop->dram_base_address + (prop->dram_page_size * DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); - prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); + /* + * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block + * the driver part by range register, so it must start at the next (48MB) page + */ + hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M); + + /* + * The NIC driver section size and the HMMU page tables section in the HBM needs + * to be the remaining size in the first dram page after taking into + * account the F/W image size + */ + + /* Reserve region in HBM for HMMU page tables */ + prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset + + ((prop->dram_page_size - hbm_drv_base_offset) - + (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE)); + + /* Set EDMA PQs HBM addresses */ + edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE; + + for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { + if (gaudi2_is_edma_queue_id(i)) { + prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr + + (edma_idx * HL_QUEUE_SIZE_IN_BYTES); + edma_idx++; + } + } return 0; } @@ -2368,7 +2411,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct hw_queue_properties *q_props; u32 num_sync_stream_queues = 0; - int i; + int i, rc; prop->max_queues = GAUDI2_QUEUE_ID_SIZE; prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), @@ -2391,6 +2434,9 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) } q_props[i].cb_alloc_flags = CB_ALLOC_USER; + + if (gaudi2_is_edma_queue_id(i)) + q_props[i].dram_bd = 1; } q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; @@ -2419,46 +2465,43 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1; - if (hdev->pldm) - prop->mmu_pgt_size = 0x800000; /* 8MB */ - else - prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; + prop->max_asid = 2; + prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; - prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; - prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; prop->dmmu.page_size = PAGE_SIZE_1GB; - prop->dmmu.num_hops = MMU_ARCH_6_HOPS; + prop->dmmu.num_hops = MMU_ARCH_4_HOPS; prop->dmmu.last_mask = LAST_MASK; - prop->dmmu.host_resident = 1; - prop->dmmu.hop_table_size = prop->mmu_hop_table_size; - prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->dmmu.host_resident = 0; + prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid; - /* - * this is done in order to be able to validate FW descriptor (i.e. validating that - * the addresses and allocated space for FW image does not cross memory bounds). - * for this reason we set the DRAM size to the minimum possible and later it will - * be modified according to what reported in the cpucp info packet + /* As we need to set the pgt address in dram for HMMU init so we cannot + * wait to the fw cpucp info to set the dram props as mmu init comes before + * hw init */ - prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; + rc = hdev->asic_funcs->set_dram_properties(hdev); + if (rc) + goto free_qprops; + + prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE; + prop->pmmu.pgt_size = prop->mmu_pgt_size; hdev->pmmu_huge_range = true; prop->pmmu.host_resident = 1; prop->pmmu.num_hops = MMU_ARCH_6_HOPS; prop->pmmu.last_mask = LAST_MASK; - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid; prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; @@ -2516,7 +2559,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE; prop->num_engine_cores = CPU_ID_MAX; prop->cfg_size = CFG_SIZE; - prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI2_EVENT_SIZE; prop->supports_engine_modes = true; @@ -2559,7 +2601,13 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; + prop->supports_advanced_cpucp_rc = true; + return 0; + +free_qprops: + kfree(prop->hw_queues_props); + return rc; } static int gaudi2_pci_bars_map(struct hl_device *hdev) @@ -3033,6 +3081,25 @@ static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) return 0; } +static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) + return 0; + + if (prop->dmmu.host_resident) + return 0; + + rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0); + if (rc) + dev_err(hdev->dev, "Failed to clear mmu pgt"); + + return rc; +} + static int gaudi2_early_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -3243,14 +3310,10 @@ static int gaudi2_late_init(struct hl_device *hdev) struct gaudi2_device *gaudi2 = hdev->asic_specific; int rc; - hdev->asic_prop.supports_advanced_cpucp_rc = true; - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, gaudi2->virt_msix_db_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + if (rc) return rc; - } rc = gaudi2_fetch_psoc_frequency(hdev); if (rc) { @@ -3258,6 +3321,12 @@ static int gaudi2_late_init(struct hl_device *hdev) goto disable_pci_access; } + rc = gaudi2_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); + goto disable_pci_access; + } + gaudi2_init_arcs(hdev); rc = gaudi2_scrub_arcs_dccm(hdev); @@ -3518,7 +3587,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) int i, rc; /* Configure Special blocks */ - prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; + prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM; prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, sizeof(*prop->special_blocks), GFP_KERNEL); @@ -3697,13 +3766,7 @@ static int gaudi2_sw_init(struct hl_device *hdev) spin_lock_init(&gaudi2->hw_queues_lock); - gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, - &gaudi2->scratchpad_bus_address, - GFP_KERNEL | __GFP_ZERO); - if (!gaudi2->scratchpad_kernel_address) { - rc = -ENOMEM; - goto free_virt_msix_db_mem; - } + gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE; gaudi2_user_mapped_blocks_init(hdev); @@ -3718,7 +3781,7 @@ static int gaudi2_sw_init(struct hl_device *hdev) prop->supports_compute_reset = true; /* Event queue sanity check added in FW version 1.11 */ - if (hl_is_fw_sw_ver_below(hdev, 1, 11)) + if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0) hdev->event_queue.check_eqe_index = false; else hdev->event_queue.check_eqe_index = true; @@ -3727,19 +3790,18 @@ static int gaudi2_sw_init(struct hl_device *hdev) rc = gaudi2_special_blocks_iterator_config(hdev); if (rc) - goto free_scratchpad_mem; + goto free_virt_msix_db_mem; rc = gaudi2_test_queues_msgs_alloc(hdev); if (rc) goto special_blocks_free; + hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ; + return 0; special_blocks_free: gaudi2_special_blocks_iterator_free(hdev); -free_scratchpad_mem: - hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, - gaudi2->scratchpad_bus_address); free_virt_msix_db_mem: hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); free_cpu_accessible_dma_pool: @@ -3770,9 +3832,6 @@ static int gaudi2_sw_fini(struct hl_device *hdev) hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); - hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, - gaudi2->scratchpad_bus_address); - dma_pool_destroy(hdev->dma_pool); kfree(gaudi2); @@ -4254,6 +4313,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev) if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) return 0; + hl_init_cpu_for_irq(hdev); + rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, PCI_IRQ_MSIX); if (rc < 0) { @@ -4307,6 +4368,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev) i++, j++, user_irq_init_cnt++) { irq = pci_irq_vector(hdev->pdev, i); + hl_set_irq_affinity(hdev, irq); rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); if (rc) { @@ -4333,6 +4395,7 @@ free_user_irq: i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { irq = pci_irq_vector(hdev->pdev, i); + irq_set_affinity_and_hint(irq, NULL); free_irq(irq, &hdev->user_interrupt[j]); } irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); @@ -4413,6 +4476,7 @@ static void gaudi2_disable_msix(struct hl_device *hdev) k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { irq = pci_irq_vector(hdev->pdev, i); + irq_set_affinity_and_hint(irq, NULL); free_irq(irq, &hdev->user_interrupt[j]); } @@ -4957,10 +5021,17 @@ static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, q = &hdev->kernel_queues[queue_id_base + pq_id]; pq_offset = pq_id * 4; - WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, - lower_32_bits(q->bus_address)); - WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, - upper_32_bits(q->bus_address)); + if (q->dram_bd) { + WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, + lower_32_bits(q->pq_dram_address)); + WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, + upper_32_bits(q->pq_dram_address)); + } else { + WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, + lower_32_bits(q->bus_address)); + WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, + upper_32_bits(q->bus_address)); + } WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); @@ -5847,7 +5918,8 @@ static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_har return rc; } -static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) +static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base, + bool host_resident_pgt) { struct asic_fixed_properties *prop = &hdev->asic_prop; u64 hop0_addr; @@ -5859,7 +5931,11 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) max_asid = min((u32) 8, max_asid); for (asid = 0 ; asid < max_asid ; asid++) { - hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; + if (host_resident_pgt) + hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; + else + hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size); + rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); if (rc) { dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); @@ -5870,7 +5946,8 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) return 0; } -static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) +static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base, + bool host_resident_pgt) { u32 status, timeout_usec; int rc; @@ -5893,7 +5970,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb if (rc) dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); - rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); + rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt); if (rc) return rc; @@ -5917,6 +5994,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb static int gaudi2_pci_mmu_init(struct hl_device *hdev) { + struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; u32 mmu_base, stlb_base; int rc; @@ -5956,7 +6034,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); - rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident); if (rc) return rc; @@ -6008,7 +6086,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); - rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident); if (rc) return rc; @@ -6236,26 +6314,6 @@ static void gaudi2_execute_hard_reset(struct hl_device *hdev) WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); } -static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) -{ - int i, rc = 0; - u32 reg_val; - - for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) - rc = hl_poll_timeout( - hdev, - mmCPU_RST_STATUS_TO_HOST, - reg_val, - reg_val == CPU_RST_STATUS_SOFT_RST_DONE, - 1000, - poll_timeout_us); - - if (rc) - dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", - reg_val); - return rc; -} - /** * gaudi2_execute_soft_reset - execute soft reset by driver/FW * @@ -6268,23 +6326,8 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_ static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset, u32 poll_timeout_us) { - int rc; - - if (!driver_performs_reset) { - if (hl_is_fw_sw_ver_below(hdev, 1, 10)) { - /* set SP to indicate reset request sent to FW */ - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); - - WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG, - gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); - - /* wait for f/w response */ - rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); - } else { - rc = hl_fw_send_soft_reset(hdev); - } - return rc; - } + if (!driver_performs_reset) + return hl_fw_send_soft_reset(hdev); /* Block access to engines, QMANs and SM during reset, these * RRs will be reconfigured after soft reset. @@ -6424,13 +6467,7 @@ skip_reset: static int gaudi2_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } static int gaudi2_resume(struct hl_device *hdev) @@ -7046,7 +7083,7 @@ static int gaudi2_test_queues(struct hl_device *hdev) /* send test message on all enabled Qs */ for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { - if (!gaudi2_is_queue_enabled(hdev, i)) + if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i)) continue; msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0]; @@ -7063,7 +7100,7 @@ static int gaudi2_test_queues(struct hl_device *hdev) /* verify that all messages were processed */ for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { - if (!gaudi2_is_queue_enabled(hdev, i)) + if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i)) continue; rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val); @@ -7836,7 +7873,7 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, bool has_block_id = false; u16 block_id; - if (!hl_is_fw_sw_ver_below(hdev, 1, 12)) + if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0) has_block_id = true; ecc_address = le64_to_cpu(ecc_data->ecc_address); @@ -8087,13 +8124,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, } hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; - - if (hl_is_fw_sw_ver_below(hdev, 1, 9) && - !hdev->asic_prop.fw_security_enabled && - ((module_idx == 0) || (module_idx == 1))) - lbw_rtr_id = DCORE0_RTR0; - else - lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; + lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; break; case RAZWI_MME: sprintf(initiator_name, "MME_%u", module_idx); @@ -8907,9 +8938,6 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ u32 error_count = 0; int i; - gaudi2_print_event(hdev, event_type, true, - "intr_cause_data: %#llx", intr_cause_data); - for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { if (!(intr_cause_data & BIT_ULL(i))) continue; @@ -8918,15 +8946,16 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); error_count++; - /* - * Always check for LBW and HBW additional info as the indication itself is - * sometimes missing - */ + switch (intr_cause_data & BIT_ULL(i)) { + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: + hl_check_for_glbl_errors(hdev); + break; + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); + break; + } } - hl_check_for_glbl_errors(hdev); - gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); - return error_count; } @@ -8983,7 +9012,6 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool if (is_pmmu) { dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr); } else { - addr = gaudi2_mmu_descramble_addr(hdev, addr); addr &= HW_UNSCRAMBLED_BITS_MASK; dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n", @@ -9235,8 +9263,8 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) { + bool require_hard_reset = false; u32 addr, beat, beat_shift; - bool rc = false; dev_err_ratelimited(hdev->dev, "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", @@ -9268,7 +9296,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, beat, le32_to_cpu(rd_err_data->dbg_rd_err_dm), le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); - rc |= true; + require_hard_reset = true; } beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; @@ -9281,7 +9309,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); - rc |= true; + require_hard_reset = true; } dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); @@ -9291,7 +9319,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); } - return rc; + return require_hard_reset; } static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, @@ -9514,25 +9542,17 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) static int gaudi2_handle_pcie_drain(struct hl_device *hdev, struct hl_eq_pcie_drain_ind_data *drain_data) { - u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; + u64 cause, error_count = 0; cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); - lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); - lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); - hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); - hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); if (cause & BIT_ULL(0)) { - dev_err_ratelimited(hdev->dev, - "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", - !!lbw_rd, !!lbw_wr); + dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n"); error_count++; } if (cause & BIT_ULL(1)) { - dev_err_ratelimited(hdev->dev, - "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", - hbw_rd, hbw_wr); + dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n"); error_count++; } @@ -9757,11 +9777,6 @@ static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type) return U16_MAX; } -static void hl_eq_heartbeat_event_handle(struct hl_device *hdev) -{ - hdev->eq_heartbeat_received = true; -} - static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -9983,6 +9998,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; + is_critical = eq_entry->sei_data.hdr.is_critical; } error_count++; break; @@ -10003,7 +10019,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; - if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13)) + if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0) is_critical = true; break; @@ -10214,8 +10230,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_print_event(hdev, event_type, true, "No error cause for H/W event %u", event_type); - if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || - reset_required) { + if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) { if (reset_required || (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) reset_flags |= HL_DRV_RESET_HARD; @@ -10250,11 +10265,11 @@ reset_device: } static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, - struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, - u32 hw_queue_id, u32 size, u64 addr, u32 val) + struct packet_lin_dma *lin_dma_pkt, + u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val) { u32 ctl, pkt_size; - int rc = 0; + int rc = 0, i; ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); @@ -10268,9 +10283,20 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, pkt_size = sizeof(struct packet_lin_dma); - rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); + for (i = 0; i < 3; i++) { + rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, + phys_addr + (i * sizeof(u64)), + ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64); + if (rc) { + dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n", + phys_addr); + return rc; + } + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr); if (rc) - dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", + dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n", hw_queue_id); return rc; @@ -10283,12 +10309,11 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, - old_mmubp, mmubp, num_of_pkts, busy, pkt_size; + old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len; u64 comp_addr, cur_addr = addr, end_addr = addr + size; struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc = 0, dma_num = 0, i; void *lin_dma_pkts_arr; - dma_addr_t pkt_dma_addr; - int rc = 0, dma_num = 0; if (prop->edma_enabled_mask == 0) { dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); @@ -10306,9 +10331,19 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz /* Calculate how many lin dma pkts we'll need */ num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); pkt_size = sizeof(struct packet_lin_dma); + cb_len = pkt_size * num_of_pkts; - lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, - &pkt_dma_addr, GFP_KERNEL); + /* + * if we're not scrubing HMMU or NIC reserved sections in hbm, + * then it the scrubing of the user section, as we use the start of the user section + * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly + * and scrub the CB section before leaving this function. + */ + if ((addr >= prop->dram_user_base_address) && + (addr < prop->dram_user_base_address + cb_len)) + cur_addr += (prop->dram_user_base_address + cb_len) - addr; + + lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL); if (!lin_dma_pkts_arr) return -ENOMEM; @@ -10354,7 +10389,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, - pkt_dma_addr + dma_num * pkt_size, + prop->dram_user_base_address + (dma_num * pkt_size), edma_queues_id[dcore] + edma_idx * 4, chunk_size, cur_addr, val); if (rc) @@ -10363,14 +10398,16 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz dma_num++; cur_addr += chunk_size; if (cur_addr == end_addr) - break; + goto edma_wait; } } } +edma_wait: rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); if (rc) { - dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); + dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n", + busy, dma_num); goto end; } end: @@ -10391,8 +10428,16 @@ end: } } + memset(lin_dma_pkts_arr, 0, sizeof(u64)); + + /* Zero the HBM area where we copied the CB */ + for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64)) + rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, + prop->dram_user_base_address + i, + (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64); WREG32(sob_addr, 0); - hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); + + kfree(lin_dma_pkts_arr); return rc; } @@ -11450,7 +11495,7 @@ static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_p return 0; page_size_err: - dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", + dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n", page_size, mmu_prop->page_size >> 10); return -EFAULT; } @@ -11470,6 +11515,29 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open) return hl_fw_send_device_activity(hdev, open); } +static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u64 val; + + if (hdev->reset_info.hard_reset_pending) + return U64_MAX; + + val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr)); + + return val; +} + +static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + + if (hdev->reset_info.hard_reset_pending) + return; + + writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr)); +} + static const struct hl_asic_funcs gaudi2_funcs = { .early_init = gaudi2_early_init, .early_fini = gaudi2_early_fini, @@ -11506,8 +11574,8 @@ static const struct hl_asic_funcs gaudi2_funcs = { .add_device_attr = gaudi2_add_device_attr, .handle_eqe = gaudi2_handle_eqe, .get_events_stat = gaudi2_get_events_stat, - .read_pte = NULL, - .write_pte = NULL, + .read_pte = gaudi2_read_pte, + .write_pte = gaudi2_write_pte, .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, .mmu_prefetch_cache_range = NULL, diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h index 9b9eef0d97d6..05117272cac7 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h @@ -19,8 +19,6 @@ #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" -#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */ - #define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define NUMBER_OF_PDMA_QUEUES 2 @@ -109,13 +107,11 @@ /* DRAM Memory Map */ #define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ - -/* This define should be used only when working in a debug mode without dram. - * When working with dram, the driver size will be calculated dynamically. - */ -#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */ - #define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE +#define PMMU_PAGE_TABLES_SIZE 0x10000000 /* 256MB */ +#define EDMA_PQS_SIZE SZ_2M +#define EDMA_SCRATCHPAD_SIZE SZ_1M +#define HMMU_PAGE_TABLES_SIZE SZ_1M #define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES @@ -241,9 +237,8 @@ #define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \ FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1)) -#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0) +#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0) -#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8 enum gaudi2_reserved_sob_id { GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST, @@ -389,7 +384,7 @@ enum gaudi2_edma_id { /* User interrupt count is aligned with HW CQ count. * We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode */ -#define GAUDI2_NUM_USER_INTERRUPTS 255 +#define GAUDI2_NUM_USER_INTERRUPTS 64 #define GAUDI2_NUM_RESERVED_INTERRUPTS 1 #define GAUDI2_TOTAL_USER_INTERRUPTS (GAUDI2_NUM_USER_INTERRUPTS + GAUDI2_NUM_RESERVED_INTERRUPTS) @@ -421,11 +416,11 @@ enum gaudi2_irq_num { GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1), GAUDI2_IRQ_NUM_TPC_ASSERT, GAUDI2_IRQ_NUM_EQ_ERROR, + GAUDI2_IRQ_NUM_USER_FIRST, + GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1), GAUDI2_IRQ_NUM_RESERVED_FIRST, - GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1), + GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_RESERVED_INTERRUPTS - 1), GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT, - GAUDI2_IRQ_NUM_USER_FIRST = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR + 1, - GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1), GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1) }; diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 34bf80c5a44b..307ccb912ccd 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -479,6 +479,7 @@ static const u32 gaudi2_pb_dcr0_edma0_unsecured_regs[] = { mmDCORE0_EDMA0_CORE_CTX_TE_NUMROWS, mmDCORE0_EDMA0_CORE_CTX_IDX, mmDCORE0_EDMA0_CORE_CTX_IDX_INC, + mmDCORE0_EDMA0_CORE_WR_COMP_MAX_OUTSTAND, mmDCORE0_EDMA0_CORE_RD_LBW_RATE_LIM_CFG, mmDCORE0_EDMA0_QM_CQ_CFG0_0, mmDCORE0_EDMA0_QM_CQ_CFG0_1, diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c index 1322cb330c57..84768e306269 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -413,8 +413,6 @@ int goya_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = true; @@ -435,8 +433,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->dmmu.num_hops = MMU_ARCH_5_HOPS; prop->dmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->dmmu.hop_table_size = prop->mmu_hop_table_size; - prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); @@ -446,8 +444,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -895,11 +893,8 @@ int goya_late_init(struct hl_device *hdev) WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); - if (rc) { - dev_err(hdev->dev, - "Failed to enable PCI access from CPU %d\n", rc); + if (rc) return rc; - } /* force setting to low frequency */ goya->curr_pll_profile = PLL_LOW; @@ -2678,7 +2673,7 @@ int goya_mmu_init(struct hl_device *hdev) for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + - (i * prop->mmu_hop_table_size); + (i * prop->dmmu.hop_table_size); rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); if (rc) { @@ -2866,13 +2861,7 @@ static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) int goya_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } int goya_resume(struct hl_device *hdev) diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c index 41cae5fd843b..3827ea4c02f7 100644 --- a/drivers/accel/habanalabs/goya/goya_coresight.c +++ b/drivers/accel/habanalabs/goya/goya_coresight.c @@ -576,7 +576,6 @@ static int goya_config_spmu(struct hl_device *hdev, struct hl_debug_params *params) { u64 base_reg; - struct hl_debug_params_spmu *input = params->input; u64 *output; u32 output_arr_len; u32 events_num; @@ -592,7 +591,7 @@ static int goya_config_spmu(struct hl_device *hdev, base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE; if (params->enable) { - input = params->input; + struct hl_debug_params_spmu *input = params->input; if (!input) return -EINVAL; diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h index 0231d6c55b4a..753d46a2836b 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h @@ -63,9 +63,9 @@ #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START 0xFFF0F80000000000ull #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END 0xFFF0FFFFFFFFFFFFull -#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 256 +#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 127 -#define GAUDI2_MSIX_ENTRIES 512 +#define GAUDI2_MSIX_ENTRIES 128 #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h index b2dbe1f64430..82d639990cca 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h @@ -330,9 +330,9 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 149, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "EDMA7_ECC_SERR" }, { .fc_id = 150, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, - .name = "HDMA4_ECC_SERR" }, + .name = "EDMA4_ECC_SERR" }, { .fc_id = 151, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, - .name = "HDMA5_ECC_SERR" }, + .name = "EDMA5_ECC_SERR" }, { .fc_id = 152, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "EDMA2_ECC_DERR" }, { .fc_id = 153, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, @@ -856,55 +856,55 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 412, .cpu_id = 84, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PCIE_ADDR_DEC_ERR" }, { .fc_id = 413, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC0_AXI_ERR_RSP" }, + .name = "DCORE0_TPC0_AXI_ERR_RSP" }, { .fc_id = 414, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC1_AXI_ERR_RSP" }, + .name = "DCORE0_TPC1_AXI_ERR_RSP" }, { .fc_id = 415, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC2_AXI_ERR_RSP" }, + .name = "DCORE0_TPC2_AXI_ERR_RSP" }, { .fc_id = 416, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC3_AXI_ERR_RSP" }, + .name = "DCORE0_TPC3_AXI_ERR_RSP" }, { .fc_id = 417, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC4_AXI_ERR_RSP" }, + .name = "DCORE0_TPC4_AXI_ERR_RSP" }, { .fc_id = 418, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC5_AXI_ERR_RSP" }, + .name = "DCORE0_TPC5_AXI_ERR_RSP" }, { .fc_id = 419, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC6_AXI_ERR_RSP" }, + .name = "DCORE1_TPC0_AXI_ERR_RSP" }, { .fc_id = 420, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC7_AXI_ERR_RSP" }, + .name = "DCORE1_TPC1_AXI_ERR_RSP" }, { .fc_id = 421, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC8_AXI_ERR_RSP" }, + .name = "DCORE1_TPC2_AXI_ERR_RSP" }, { .fc_id = 422, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC9_AXI_ERR_RSP" }, + .name = "DCORE1_TPC3_AXI_ERR_RSP" }, { .fc_id = 423, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC10_AXI_ERR_RSP" }, + .name = "DCORE1_TPC4_AXI_ERR_RSP" }, { .fc_id = 424, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC11_AXI_ERR_RSP" }, + .name = "DCORE1_TPC5_AXI_ERR_RSP" }, { .fc_id = 425, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC12_AXI_ERR_RSP" }, + .name = "DCORE2_TPC0_AXI_ERR_RSP" }, { .fc_id = 426, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC13_AXI_ERR_RSP" }, + .name = "DCORE2_TPC1_AXI_ERR_RSP" }, { .fc_id = 427, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC14_AXI_ERR_RSP" }, + .name = "DCORE2_TPC2_AXI_ERR_RSP" }, { .fc_id = 428, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC15_AXI_ERR_RSP" }, + .name = "DCORE2_TPC3_AXI_ERR_RSP" }, { .fc_id = 429, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC16_AXI_ERR_RSP" }, + .name = "DCORE2_TPC4_AXI_ERR_RSP" }, { .fc_id = 430, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC17_AXI_ERR_RSP" }, + .name = "DCORE2_TPC5_AXI_ERR_RSP" }, { .fc_id = 431, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC18_AXI_ERR_RSP" }, + .name = "DCORE3_TPC0_AXI_ERR_RSP" }, { .fc_id = 432, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC19_AXI_ERR_RSP" }, + .name = "DCORE3_TPC1_AXI_ERR_RSP" }, { .fc_id = 433, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC20_AXI_ERR_RSP" }, + .name = "DCORE3_TPC2_AXI_ERR_RSP" }, { .fc_id = 434, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC21_AXI_ERR_RSP" }, + .name = "DCORE3_TPC3_AXI_ERR_RSP" }, { .fc_id = 435, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC22_AXI_ERR_RSP" }, + .name = "DCORE3_TPC4_AXI_ERR_RSP" }, { .fc_id = 436, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC23_AXI_ERR_RSP" }, + .name = "DCORE3_TPC5_AXI_ERR_RSP" }, { .fc_id = 437, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC24_AXI_ERR_RSP" }, + .name = "DCORE4_TPC0_AXI_ERR_RSP" }, { .fc_id = 438, .cpu_id = 86, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "AXI_ECC" }, { .fc_id = 439, .cpu_id = 87, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, @@ -965,73 +965,73 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .name = "MME3_CTRL_AXI_ERROR_RESPONSE" }, { .fc_id = 467, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, .name = "MME3_QMAN_SW_ERROR" }, - { .fc_id = 468, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 468, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PSOC_MME_PLL_LOCK_ERR" }, - { .fc_id = 469, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 469, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PSOC_CPU_PLL_LOCK_ERR" }, - { .fc_id = 470, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 470, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_TPC_PLL_LOCK_ERR" }, - { .fc_id = 471, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 471, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_NIC_PLL_LOCK_ERR" }, - { .fc_id = 472, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 472, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 473, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 473, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 474, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 474, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 475, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 475, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_XBAR_BANK_PLL_LOCK_ERR" }, - { .fc_id = 476, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 476, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 477, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 477, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 478, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 478, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 479, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 479, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_XBAR_MESH_PLL_LOCK_ERR" }, - { .fc_id = 480, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 480, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_TPC_PLL_LOCK_ERR" }, - { .fc_id = 481, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 481, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_NIC_PLL_LOCK_ERR" }, - { .fc_id = 482, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 482, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PMMU_MME_PLL_LOCK_ERR" }, - { .fc_id = 483, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 483, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_TPC_PLL_LOCK_ERR" }, - { .fc_id = 484, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 484, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_PCI_PLL_LOCK_ERR" }, - { .fc_id = 485, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 485, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 486, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 486, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 487, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 487, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 488, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 488, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_XBAR_MESH_PLL_LOCK_ERR" }, - { .fc_id = 489, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 489, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 490, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 490, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 491, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 491, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 492, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 492, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_XBAR_BANK_PLL_LOCK_ERR" }, - { .fc_id = 493, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 493, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_TPC_PLL_LOCK_ERR" }, - { .fc_id = 494, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 494, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PSOC_VID_PLL_LOCK_ERR" }, - { .fc_id = 495, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 495, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PMMU_VID_PLL_LOCK_ERR" }, - { .fc_id = 496, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 496, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE3_HBM_PLL_LOCK_ERR" }, - { .fc_id = 497, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 497, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_XBAR_HBM_PLL_LOCK_ERR" }, - { .fc_id = 498, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 498, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE1_HBM_PLL_LOCK_ERR" }, - { .fc_id = 499, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 499, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE0_HBM_PLL_LOCK_ERR" }, - { .fc_id = 500, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 500, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_XBAR_HBM_PLL_LOCK_ERR" }, - { .fc_id = 501, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 501, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "DCORE2_HBM_PLL_LOCK_ERR" }, { .fc_id = 502, .cpu_id = 93, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "CPU_AXI_ERR_RSP" }, @@ -1298,103 +1298,103 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC0_BMON_SPMU" }, { .fc_id = 634, .cpu_id = 131, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC0_KERNEL_ERR" }, + .name = "DCORE0_TPC0_KERNEL_ERR" }, { .fc_id = 635, .cpu_id = 132, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC1_BMON_SPMU" }, { .fc_id = 636, .cpu_id = 133, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC1_KERNEL_ERR" }, + .name = "DCORE0_TPC1_KERNEL_ERR" }, { .fc_id = 637, .cpu_id = 134, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC2_BMON_SPMU" }, { .fc_id = 638, .cpu_id = 135, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC2_KERNEL_ERR" }, + .name = "DCORE0_TPC2_KERNEL_ERR" }, { .fc_id = 639, .cpu_id = 136, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC3_BMON_SPMU" }, { .fc_id = 640, .cpu_id = 137, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC3_KERNEL_ERR" }, + .name = "DCORE0_TPC3_KERNEL_ERR" }, { .fc_id = 641, .cpu_id = 138, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC4_BMON_SPMU" }, { .fc_id = 642, .cpu_id = 139, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC4_KERNEL_ERR" }, + .name = "DCORE0_TPC4_KERNEL_ERR" }, { .fc_id = 643, .cpu_id = 140, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC5_BMON_SPMU" }, { .fc_id = 644, .cpu_id = 141, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC5_KERNEL_ERR" }, + .name = "DCORE0_TPC5_KERNEL_ERR" }, { .fc_id = 645, .cpu_id = 150, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC6_BMON_SPMU" }, { .fc_id = 646, .cpu_id = 151, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC6_KERNEL_ERR" }, + .name = "DCORE1_TPC0_KERNEL_ERR" }, { .fc_id = 647, .cpu_id = 152, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC7_BMON_SPMU" }, { .fc_id = 648, .cpu_id = 153, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC7_KERNEL_ERR" }, + .name = "DCORE1_TPC1_KERNEL_ERR" }, { .fc_id = 649, .cpu_id = 146, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC8_BMON_SPMU" }, { .fc_id = 650, .cpu_id = 147, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC8_KERNEL_ERR" }, + .name = "DCORE1_TPC2_KERNEL_ERR" }, { .fc_id = 651, .cpu_id = 148, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC9_BMON_SPMU" }, { .fc_id = 652, .cpu_id = 149, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC9_KERNEL_ERR" }, + .name = "DCORE1_TPC3_KERNEL_ERR" }, { .fc_id = 653, .cpu_id = 142, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC10_BMON_SPMU" }, { .fc_id = 654, .cpu_id = 143, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC10_KERNEL_ERR" }, + .name = "DCORE1_TPC4_KERNEL_ERR" }, { .fc_id = 655, .cpu_id = 144, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC11_BMON_SPMU" }, { .fc_id = 656, .cpu_id = 145, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC11_KERNEL_ERR" }, + .name = "DCORE1_TPC5_KERNEL_ERR" }, { .fc_id = 657, .cpu_id = 162, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC12_BMON_SPMU" }, { .fc_id = 658, .cpu_id = 163, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC12_KERNEL_ERR" }, + .name = "DCORE2_TPC0_KERNEL_ERR" }, { .fc_id = 659, .cpu_id = 164, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC13_BMON_SPMU" }, { .fc_id = 660, .cpu_id = 165, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC13_KERNEL_ERR" }, + .name = "DCORE2_TPC1_KERNEL_ERR" }, { .fc_id = 661, .cpu_id = 158, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC14_BMON_SPMU" }, { .fc_id = 662, .cpu_id = 159, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC14_KERNEL_ERR" }, + .name = "DCORE2_TPC2_KERNEL_ERR" }, { .fc_id = 663, .cpu_id = 160, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC15_BMON_SPMU" }, { .fc_id = 664, .cpu_id = 161, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC15_KERNEL_ERR" }, + .name = "DCORE2_TPC3_KERNEL_ERR" }, { .fc_id = 665, .cpu_id = 154, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC16_BMON_SPMU" }, { .fc_id = 666, .cpu_id = 155, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC16_KERNEL_ERR" }, + .name = "DCORE2_TPC4_KERNEL_ERR" }, { .fc_id = 667, .cpu_id = 156, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC17_BMON_SPMU" }, { .fc_id = 668, .cpu_id = 157, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC17_KERNEL_ERR" }, + .name = "DCORE2_TPC5_KERNEL_ERR" }, { .fc_id = 669, .cpu_id = 166, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC18_BMON_SPMU" }, { .fc_id = 670, .cpu_id = 167, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC18_KERNEL_ERR" }, + .name = "DCORE3_TPC0_KERNEL_ERR" }, { .fc_id = 671, .cpu_id = 168, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC19_BMON_SPMU" }, { .fc_id = 672, .cpu_id = 169, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC19_KERNEL_ERR" }, + .name = "DCORE3_TPC1_KERNEL_ERR" }, { .fc_id = 673, .cpu_id = 170, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC20_BMON_SPMU" }, { .fc_id = 674, .cpu_id = 171, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC20_KERNEL_ERR" }, + .name = "DCORE3_TPC2_KERNEL_ERR" }, { .fc_id = 675, .cpu_id = 172, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC21_BMON_SPMU" }, { .fc_id = 676, .cpu_id = 173, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC21_KERNEL_ERR" }, + .name = "DCORE3_TPC3_KERNEL_ERR" }, { .fc_id = 677, .cpu_id = 174, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC22_BMON_SPMU" }, { .fc_id = 678, .cpu_id = 175, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC22_KERNEL_ERR" }, + .name = "DCORE3_TPC4_KERNEL_ERR" }, { .fc_id = 679, .cpu_id = 176, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC23_BMON_SPMU" }, { .fc_id = 680, .cpu_id = 177, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC23_KERNEL_ERR" }, + .name = "DCORE3_TPC5_KERNEL_ERR" }, { .fc_id = 681, .cpu_id = 178, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC24_BMON_SPMU" }, { .fc_id = 682, .cpu_id = 179, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC24_KERNEL_ERR" }, + .name = "DCORE4_TPC0_KERNEL_ERR" }, { .fc_id = 683, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "" }, { .fc_id = 684, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, @@ -1827,8 +1827,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .name = "DEC0_BMON_SPMU" }, { .fc_id = 898, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, .name = "DEC1_SPI" }, - { .fc_id = 899, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "DEC1_SPI" }, + { .fc_id = 899, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC1_BMON_SPMU" }, { .fc_id = 900, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, .name = "DEC2_SPI" }, { .fc_id = 901, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, @@ -2377,8 +2377,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .name = "" }, { .fc_id = 1173, .cpu_id = 479, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "" }, - { .fc_id = 1174, .cpu_id = 480, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, - .name = "" }, + { .fc_id = 1174, .cpu_id = 480, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PSOC_DMA_QM" }, { .fc_id = 1175, .cpu_id = 481, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "" }, { .fc_id = 1176, .cpu_id = 482, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, @@ -2442,55 +2442,55 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 1205, .cpu_id = 511, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "" }, { .fc_id = 1206, .cpu_id = 512, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC0_QM" }, + .name = "DCORE0_TPC0_QM" }, { .fc_id = 1207, .cpu_id = 513, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC1_QM" }, + .name = "DCORE0_TPC1_QM" }, { .fc_id = 1208, .cpu_id = 514, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC2_QM" }, + .name = "DCORE0_TPC2_QM" }, { .fc_id = 1209, .cpu_id = 515, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC3_QM" }, + .name = "DCORE0_TPC3_QM" }, { .fc_id = 1210, .cpu_id = 516, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC4_QM" }, + .name = "DCORE0_TPC4_QM" }, { .fc_id = 1211, .cpu_id = 517, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC5_QM" }, + .name = "DCORE0_TPC5_QM" }, { .fc_id = 1212, .cpu_id = 518, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC6_QM" }, + .name = "DCORE1_TPC0_QM" }, { .fc_id = 1213, .cpu_id = 519, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC7_QM" }, + .name = "DCORE1_TPC1_QM" }, { .fc_id = 1214, .cpu_id = 520, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC8_QM" }, + .name = "DCORE1_TPC2_QM" }, { .fc_id = 1215, .cpu_id = 521, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC9_QM" }, + .name = "DCORE1_TPC3_QM" }, { .fc_id = 1216, .cpu_id = 522, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC10_QM" }, + .name = "DCORE1_TPC4_QM" }, { .fc_id = 1217, .cpu_id = 523, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC11_QM" }, + .name = "DCORE1_TPC5_QM" }, { .fc_id = 1218, .cpu_id = 524, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC12_QM" }, + .name = "DCORE2_TPC0_QM" }, { .fc_id = 1219, .cpu_id = 525, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC13_QM" }, + .name = "DCORE2_TPC1_QM" }, { .fc_id = 1220, .cpu_id = 526, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC14_QM" }, + .name = "DCORE2_TPC2_QM" }, { .fc_id = 1221, .cpu_id = 527, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC15_QM" }, + .name = "DCORE2_TPC3_QM" }, { .fc_id = 1222, .cpu_id = 528, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC16_QM" }, + .name = "DCORE2_TPC4_QM" }, { .fc_id = 1223, .cpu_id = 529, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC17_QM" }, + .name = "DCORE2_TPC5_QM" }, { .fc_id = 1224, .cpu_id = 530, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC18_QM" }, + .name = "DCORE3_TPC0_QM" }, { .fc_id = 1225, .cpu_id = 531, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC19_QM" }, + .name = "DCORE3_TPC1_QM" }, { .fc_id = 1226, .cpu_id = 532, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC20_QM" }, + .name = "DCORE3_TPC2_QM" }, { .fc_id = 1227, .cpu_id = 533, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC21_QM" }, + .name = "DCORE3_TPC3_QM" }, { .fc_id = 1228, .cpu_id = 534, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC22_QM" }, + .name = "DCORE3_TPC4_QM" }, { .fc_id = 1229, .cpu_id = 535, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC23_QM" }, + .name = "DCORE3_TPC5_QM" }, { .fc_id = 1230, .cpu_id = 536, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, - .name = "TPC24_QM" }, + .name = "DCORE4_TPC0_QM" }, { .fc_id = 1231, .cpu_id = 537, .valid = 0, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, .name = "" }, { .fc_id = 1232, .cpu_id = 538, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, @@ -2674,19 +2674,19 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, .name = "DEV_RESET_REQ" }, { .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_BRK_ENTRY" }, + .name = "PWR_BRK_ENTRY" }, { .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_BRK_EXT" }, + .name = "PWR_BRK_EXT" }, { .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_RD_MODE0" }, + .name = "PWR_RD_MODE0" }, { .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_RD_MODE1" }, + .name = "PWR_RD_MODE1" }, { .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_RD_MODE2" }, + .name = "PWR_RD_MODE2" }, { .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_PWR_RD_MODE3" }, + .name = "PWR_RD_MODE3" }, { .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "ARC_EQ_HEARTBEAT" }, + .name = "EQ_HEARTBEAT" }, }; #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */ diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h index 18ca147b1c86..6ea936c9594e 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h @@ -45,6 +45,13 @@ #define GAUDI2_ARM_RX_MB_OFFSET (GAUDI2_ARM_RX_MB_ADDR - \ GAUDI2_SP_SRAM_BASE_ADDR) +#define POWER_MODE_LEVELS { \ + 150000, /* 00 */ \ + 250000, /* 01 */ \ + 400000, /* 10 */ \ + /* 11: Normal mode */ \ +} + enum gaudi2_fw_status { GAUDI2_PID_STATUS_UP = 0x1, /* PID on ARC0 is up */ GAUDI2_ARM_STATUS_UP = 0x2, /* ARM Linux Boot complete */ @@ -52,26 +59,6 @@ enum gaudi2_fw_status { GAUDI2_STATUS_LAST = 0xFF }; -struct gaudi2_cold_rst_data { - union { - struct { - u32 recovery_flag: 1; - u32 validation_flag: 1; - u32 efuse_read_flag: 1; - u32 spsram_init_done : 1; - u32 fake_security_enable : 1; - u32 fake_sig_validation_en : 1; - u32 bist_skip_enable : 1; - u32 reserved1 : 1; - u32 fake_bis_compliant : 1; - u32 wd_rst_cause_arm : 1; - u32 wd_rst_cause_arcpid : 1; - u32 reserved : 21; - }; - __le32 data; - }; -}; - enum gaudi2_rst_src { HL_COLD_RST = 1, HL_MANUAL_RST = 2, diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h index f3eaeb6d9b7e..1e9c056e437d 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h @@ -58,4 +58,12 @@ #define mmWD_GPIO_DATAOUT_REG mmPSOC_GPIO3_DATAOUT #define mmSTM_PROFILER_SPE_REG mmPSOC_STM_STMSPER +/* Registers below are used to pass the boot_if data between ARM and ARC1 */ +#define mmARM_MSG_BOOT_ERR_SET mmCPU_IF_SPECIAL_GLBL_SPARE_0 +#define mmARM_MSG_BOOT_ERR_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_1 +#define mmARM_MSG_BOOT_DEV_STS_SET mmCPU_IF_SPECIAL_GLBL_SPARE_2 +#define mmARM_MSG_BOOT_DEV_STS_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_3 +#define mmMGMT_MSG_BOOT_ERR mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_0 +#define mmMGMT_MSG_BOOT_DEV_STS mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_1 + #endif /* GAUDI2_REG_MAP_H_ */ diff --git a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h index d408feecd483..b4a5e95be354 100644 --- a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -26,6 +26,8 @@ #define LAST_MASK 0x0000000000800ull #define FLAGS_MASK 0x0000000000FFFull +#define MMU_ARCH_3_HOPS 3 +#define MMU_ARCH_4_HOPS 4 #define MMU_ARCH_5_HOPS 5 #define MMU_ARCH_6_HOPS 6 diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h index 4f951cada077..a75faa00197f 100644 --- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h +++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h @@ -25,7 +25,8 @@ enum hl_revision_id { REV_ID_INVALID = 0x00, REV_ID_A = 0x01, REV_ID_B = 0x02, - REV_ID_C = 0x03 + REV_ID_C = 0x03, + REV_ID_D = 0x04 }; #endif /* INCLUDE_PCI_GENERAL_H_ */ diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 682c53245286..9e055b5ce03d 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU select FW_LOADER select DRM_GEM_SHMEM_HELPER select GENERIC_ALLOCATOR + select WANT_DEV_COREDUMP help Choose this option if you have a system with an 14th generation Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) @@ -15,3 +16,12 @@ config DRM_ACCEL_IVPU and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. + +config DRM_ACCEL_IVPU_DEBUG + bool "Intel NPU debug mode" + depends on DRM_ACCEL_IVPU + help + Choose this option to enable additional + debug features for the Intel NPU driver: + - Always print debug messages regardless of dyndbg config, + - Enable unsafe module params. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 95ff7ad16338..1029e0bab061 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -1,20 +1,29 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation intel_vpu-y := \ ivpu_drv.o \ ivpu_fw.o \ ivpu_fw_log.o \ ivpu_gem.o \ - ivpu_hw_37xx.o \ - ivpu_hw_40xx.o \ + ivpu_hw.o \ + ivpu_hw_btrs.o \ + ivpu_hw_ip.o \ ivpu_ipc.o \ ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o \ - ivpu_pm.o + ivpu_ms.o \ + ivpu_pm.o \ + ivpu_sysfs.o \ + ivpu_trace_points.o intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o +intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o + +subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG + +CFLAGS_ivpu_trace_points.o = -I$(src) diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c new file mode 100644 index 000000000000..16ad0c30818c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> + +#include "ivpu_coredump.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "vpu_boot_api.h" + +#define CRASH_DUMP_HEADER "Intel NPU crash dump" +#define CRASH_DUMP_HEADERS_SIZE SZ_4K + +void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_print_iterator pi = {}; + struct drm_printer p; + size_t coredump_size; + char *coredump; + + coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + + ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); + coredump = vmalloc(coredump_size); + if (!coredump) + return; + + pi.data = coredump; + pi.remain = coredump_size; + p = drm_coredump_printer(&pi); + + drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); + drm_printf(&p, "FW version: %s\n", vdev->fw->version); + ivpu_fw_log_print(vdev, false, &p); + + dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); +} diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h new file mode 100644 index 000000000000..8efb09d02441 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_COREDUMP_H__ +#define __IVPU_COREDUMP_H__ + +#include <drm/drm_print.h> + +#include "ivpu_drv.h" +#include "ivpu_fw_log.h" + +#ifdef CONFIG_DEV_COREDUMP +void ivpu_dev_coredump(struct ivpu_device *vdev); +#else +static inline void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_printer p = drm_info_printer(vdev->drm.dev); + + ivpu_fw_log_print(vdev, false, &p); +} +#endif + +#endif /* __IVPU_COREDUMP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 7cb962e21453..8180b95ed69d 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ +#include <linux/debugfs.h> + #include <drm/drm_debugfs.h> #include <drm/drm_file.h> #include <drm/drm_print.h> @@ -43,6 +45,14 @@ static int fw_name_show(struct seq_file *s, void *v) return 0; } +static int fw_version_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", vdev->fw->version); + return 0; +} + static int fw_trace_capability_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); @@ -106,41 +116,66 @@ static int reset_pending_show(struct seq_file *s, void *v) return 0; } +static int firewall_irq_counter_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter)); + return 0; +} + static const struct drm_debugfs_info vdev_debugfs_list[] = { {"bo_list", bo_list_show, 0}, {"fw_name", fw_name_show, 0}, + {"fw_version", fw_version_show, 0}, {"fw_trace_capability", fw_trace_capability_show, 0}, {"fw_trace_config", fw_trace_config_show, 0}, {"last_bootmode", last_bootmode_show, 0}, {"reset_counter", reset_counter_show, 0}, {"reset_pending", reset_pending_show, 0}, + {"firewall_irq_counter", firewall_irq_counter_show, 0}, }; +static int dvfs_mode_get(void *data, u64 *dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + *dvfs_mode = vdev->fw->dvfs_mode; + return 0; +} + +static int dvfs_mode_set(void *data, u64 dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + vdev->fw->dvfs_mode = (u32)dvfs_mode; + return pci_try_reset_function(to_pci_dev(vdev->drm.dev)); +} + +DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n"); + static ssize_t -dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; - struct ivpu_fw_info *fw = vdev->fw; - u32 dvfs_mode; + char buffer[VPU_DYNDBG_CMD_MAX_LEN] = {}; int ret; - ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode); - if (ret < 0) - return ret; - - fw->dvfs_mode = dvfs_mode; + if (size >= VPU_DYNDBG_CMD_MAX_LEN) + return -EINVAL; - ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret) + ret = strncpy_from_user(buffer, user_buf, size); + if (ret < 0) return ret; + ivpu_jsm_dyndbg_control(vdev, buffer, size); return size; } -static const struct file_operations dvfs_mode_fops = { +static const struct file_operations fw_dyndbg_fops = { .owner = THIS_MODULE, .open = simple_open, - .write = dvfs_mode_fops_write, + .write = fw_dyndbg_fops_write, }; static int fw_log_show(struct seq_file *s, void *v) @@ -166,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l if (!size) return -EINVAL; - ivpu_fw_log_clear(vdev); + ivpu_fw_log_mark_read(vdev); return size; } @@ -287,22 +322,6 @@ static const struct file_operations fw_trace_level_fops = { }; static ssize_t -ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) -{ - struct ivpu_device *vdev = file->private_data; - - if (!size) - return -EINVAL; - - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) - return -ENODEV; - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY)) - return -ENODEV; - - return size; -} - -static ssize_t ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; @@ -327,11 +346,56 @@ static const struct file_operations ivpu_force_recovery_fops = { .write = ivpu_force_recovery_fn, }; -static const struct file_operations ivpu_reset_engine_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = ivpu_reset_engine_fn, -}; +static int ivpu_reset_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_reset_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n"); + +static int ivpu_resume_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_hws_resume_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n"); + +static int dct_active_get(void *data, u64 *active_percent) +{ + struct ivpu_device *vdev = data; + + *active_percent = vdev->pm->dct_active_percent; + + return 0; +} + +static int dct_active_set(void *data, u64 active_percent) +{ + struct ivpu_device *vdev = data; + int ret; + + if (active_percent > 100) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret) + return ret; + + if (active_percent) + ret = ivpu_pm_dct_enable(vdev, active_percent); + else + ret = ivpu_pm_dct_disable(vdev); + + ivpu_rpm_put(vdev); + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n"); void ivpu_debugfs_init(struct ivpu_device *vdev) { @@ -342,9 +406,11 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); - debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev, + debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev, &dvfs_mode_fops); + debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev, + &fw_dyndbg_fops); debugfs_create_file("fw_log", 0644, debugfs_root, vdev, &fw_log_fops); debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev, @@ -356,8 +422,12 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, &ivpu_reset_engine_fops); + debugfs_create_file("resume_engine", 0200, debugfs_root, vdev, + &ivpu_resume_engine_fops); - if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) { debugfs_create_file("fw_profiling_freq_drive", 0200, debugfs_root, vdev, &fw_profiling_freq_fops); + debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops); + } } diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 4b0640226986..38cf1c342c72 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/pm_runtime.h> +#include <generated/utsrelease.h> #include <drm/drm_accel.h> #include <drm/drm_file.h> @@ -14,7 +15,7 @@ #include <drm/drm_ioctl.h> #include <drm/drm_prime.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" @@ -26,11 +27,13 @@ #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_sysfs.h" +#include "vpu_boot_api.h" #ifndef DRIVER_VERSION_STR -#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ - __stringify(DRM_IVPU_DRIVER_MINOR) "." +#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE #endif static struct lock_class_key submitted_jobs_xa_lock_class_key; @@ -40,21 +43,31 @@ module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); +#endif u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); -MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set NPU frequency"); u8 ivpu_pll_max_ratio = U8_MAX; module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); -MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency"); + +int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO; +module_param_named(sched_mode, ivpu_sched_mode, int, 0444); +MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler"); bool ivpu_disable_mmu_cont_pages; -module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644); +module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444); MODULE_PARM_DESC(disable_mmu_cont_pages, "Disable MMU contiguous pages optimization"); +bool ivpu_force_snoop; +module_param_named(force_snoop, ivpu_force_snoop, bool, 0444); +MODULE_PARM_DESC(force_snoop, "Force snooping for NPU host memory access"); + struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; @@ -74,9 +87,8 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id); ivpu_cmdq_release_all_locked(file_priv); - ivpu_jsm_context_release(vdev, file_priv->ctx.id); ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + ivpu_mmu_context_fini(vdev, &file_priv->ctx); file_priv->bound = false; drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); } @@ -94,9 +106,12 @@ static void file_priv_release(struct kref *ref) pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&vdev->context_list_lock); file_priv_unbind(vdev, file_priv); + drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa)); + xa_destroy(&file_priv->cmdq_xa); mutex_unlock(&vdev->context_list_lock); pm_runtime_put_autosuspend(vdev->drm.dev); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -106,8 +121,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; - drm_WARN_ON(&vdev->drm, !file_priv); - ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -119,7 +132,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param { switch (args->index) { case DRM_IVPU_CAP_METRIC_STREAMER: - args->value = 0; + args->value = 1; break; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: args->value = 1; @@ -131,22 +144,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param return 0; } -static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate) -{ - int ret; - - ret = ivpu_rpm_get_if_active(vdev); - if (ret < 0) - return ret; - - *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0; - - if (ret) - ivpu_rpm_put(vdev); - - return 0; -} - static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -170,7 +167,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - ret = ivpu_get_core_clock_rate(vdev, &args->value); + args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -244,10 +241,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_dev_exit; } + INIT_LIST_HEAD(&file_priv->ms_instance_list); + file_priv->vdev = vdev; file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_init(&file_priv->ms_lock); mutex_lock(&vdev->context_list_lock); @@ -258,9 +258,14 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_unlock; } - ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); - if (ret) - goto err_xa_erase; + ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id); + + file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK; + + xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1); + file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID; + file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID; mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx); @@ -272,10 +277,9 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) return 0; -err_xa_erase: - xa_erase_irq(&vdev->context_xa, ctx_id); err_unlock: mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); err_dev_exit: @@ -291,6 +295,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", file_priv->ctx.id, current->comm, task_pid_nr(current)); + ivpu_ms_cleanup(file_priv); ivpu_file_priv_put(&file_priv); } @@ -301,6 +306,10 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -317,7 +326,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); while (1) { - ivpu_ipc_irq_handler(vdev, NULL); + ivpu_ipc_irq_handler(vdev); ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) break; @@ -328,13 +337,28 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) ivpu_ipc_consumer_del(vdev, &cons); if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) { - ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n", + ivpu_err(vdev, "Invalid NPU ready message: 0x%x\n", ipc_hdr.data_addr); return -EIO; } if (!ret) - ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); + ivpu_dbg(vdev, PM, "NPU ready message received successfully\n"); + + return ret; +} + +static int ivpu_hw_sched_init(struct ivpu_device *vdev) +{ + int ret = 0; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_setup_priority_bands(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret); + return ret; + } + } return ret; } @@ -362,17 +386,35 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); - ivpu_hw_diagnose_failure(vdev); - ivpu_mmu_evtq_dump(vdev); - ivpu_fw_log_dump(vdev); - return ret; + goto err_diagnose_failure; } ivpu_hw_irq_clear(vdev); enable_irq(vdev->irq); ivpu_hw_irq_enable(vdev); ivpu_ipc_enable(vdev); + + if (ivpu_fw_is_cold_boot(vdev)) { + ret = ivpu_pm_dct_init(vdev); + if (ret) + goto err_disable_ipc; + + ret = ivpu_hw_sched_init(vdev); + if (ret) + goto err_disable_ipc; + } + return 0; + +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); +err_diagnose_failure: + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); + ivpu_dev_coredump(vdev); + return ret; } void ivpu_prepare_for_reset(struct ivpu_device *vdev) @@ -387,12 +429,15 @@ int ivpu_shutdown(struct ivpu_device *vdev) { int ret; - ivpu_prepare_for_reset(vdev); + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); ret = ivpu_hw_power_down(vdev); if (ret) ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + return ret; } @@ -416,16 +461,56 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, - .major = DRM_IVPU_DRIVER_MAJOR, - .minor = DRM_IVPU_DRIVER_MINOR, + + .major = 1, }; +static void ivpu_context_abort_invalid(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) + continue; + + mutex_lock(&file_priv->lock); + ivpu_context_abort_locked(file_priv); + file_priv->aborted = true; + mutex_unlock(&file_priv->lock); + } + + mutex_unlock(&vdev->context_list_lock); +} + static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) { struct ivpu_device *vdev = arg; + u8 irq_src; - return ivpu_ipc_irq_thread_handler(vdev); + if (kfifo_is_empty(&vdev->hw->irq.fifo)) + return IRQ_NONE; + + while (kfifo_get(&vdev->hw->irq.fifo, &irq_src)) { + switch (irq_src) { + case IVPU_HW_IRQ_SRC_IPC: + ivpu_ipc_irq_thread_handler(vdev); + break; + case IVPU_HW_IRQ_SRC_MMU_EVTQ: + ivpu_context_abort_invalid(vdev); + break; + case IVPU_HW_IRQ_SRC_DCT: + ivpu_pm_dct_irq_thread_handler(vdev); + break; + default: + ivpu_err_ratelimited(vdev, "Unknown IRQ source: %u\n", irq_src); + break; + } + } + + return IRQ_HANDLED; } static int ivpu_irq_init(struct ivpu_device *vdev) @@ -439,9 +524,11 @@ static int ivpu_irq_init(struct ivpu_device *vdev) return ret; } + ivpu_irq_handlers_init(vdev); + vdev->irq = pci_irq_vector(pdev, 0); - ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler, ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev); if (ret) ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); @@ -518,23 +605,24 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->pm) return -ENOMEM; - if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) { - vdev->hw->ops = &ivpu_hw_40xx_ops; + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) vdev->hw->dma_bits = 48; - } else { - vdev->hw->ops = &ivpu_hw_37xx_ops; + else vdev->hw->dma_bits = 38; - } vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); - xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB; + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy; @@ -552,18 +640,16 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_xa_destroy; /* Init basic HW info based on buttress registers which are accessible before power up */ - ret = ivpu_hw_info_init(vdev); + ret = ivpu_hw_init(vdev); if (ret) goto err_xa_destroy; /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); if (ret) - goto err_power_down; + goto err_shutdown; - ret = ivpu_mmu_global_context_init(vdev); - if (ret) - goto err_power_down; + ivpu_mmu_global_context_init(vdev); ret = ivpu_mmu_init(vdev); if (ret) @@ -600,11 +686,10 @@ err_mmu_rctx_fini: ivpu_mmu_reserved_context_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); -err_power_down: - ivpu_hw_power_down(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); +err_shutdown: + ivpu_shutdown(vdev); err_xa_destroy: + xa_destroy(&vdev->db_xa); xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); return ret; @@ -625,14 +710,14 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { + ivpu_jobs_abort_all(vdev); + ivpu_pm_cancel_recovery(vdev); ivpu_pm_disable(vdev); + ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); - ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); ivpu_job_done_consumer_fini(vdev); - ivpu_pm_cancel_recovery(vdev); ivpu_bo_unbind_all_user_contexts(vdev); ivpu_ipc_fini(vdev); @@ -640,6 +725,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_mmu_reserved_context_fini(vdev); ivpu_mmu_global_context_fini(vdev); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->db_xa)); + xa_destroy(&vdev->db_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); xa_destroy(&vdev->submitted_jobs_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); @@ -650,6 +737,7 @@ static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) }, { } }; MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); @@ -670,6 +758,7 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; ivpu_debugfs_init(vdev); + ivpu_sysfs_init(vdev); ret = drm_dev_register(&vdev->drm, 0); if (ret) { diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 069ace4adb2d..3fdff3f6cffd 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_DRV_H__ @@ -21,14 +21,21 @@ #define DRIVER_NAME "intel_vpu" #define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" -#define DRIVER_DATE "20230117" -#define PCI_DEVICE_ID_MTL 0x7d1d -#define PCI_DEVICE_ID_ARL 0xad1d -#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d +#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_PTL_P 0xb03e -#define IVPU_HW_37XX 37 -#define IVPU_HW_40XX 40 +#define IVPU_HW_IP_37XX 37 +#define IVPU_HW_IP_40XX 40 +#define IVPU_HW_IP_50XX 50 +#define IVPU_HW_IP_60XX 60 + +#define IVPU_HW_IP_REV_LNL_B0 4 + +#define IVPU_HW_BTRS_MTL 1 +#define IVPU_HW_BTRS_LNL 2 #define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 /* SSID 1 is used by the VPU to represent reserved context */ @@ -36,13 +43,25 @@ #define IVPU_USER_CONTEXT_MIN_SSID 2 #define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) -#define IVPU_NUM_ENGINES 2 +#define IVPU_MIN_DB 1 +#define IVPU_MAX_DB 255 + +#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) +#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) + +#define IVPU_NUM_PRIORITIES 4 +#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES) + +#define IVPU_CMDQ_MIN_ID 1 +#define IVPU_CMDQ_MAX_ID 255 #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 #define IVPU_PLATFORM_INVALID 8 +#define IVPU_SCHED_MODE_AUTO -1 + #define IVPU_DBG_REG BIT(0) #define IVPU_DBG_IRQ BIT(1) #define IVPU_DBG_MMU BIT(2) @@ -87,10 +106,10 @@ struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; - bool d3hot_after_power_off; bool interrupt_clear_with_0; bool disable_clock_relinquish; bool disable_d0i3_msg; + bool wp0_during_power_up; }; struct ivpu_hw_info; @@ -119,6 +138,10 @@ struct ivpu_device { struct xarray context_xa; struct xa_limit context_xa_limit; + struct xarray db_xa; + struct xa_limit db_limit; + u32 db_next; + struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -127,13 +150,16 @@ struct ivpu_device { atomic64_t unique_id_counter; + ktime_t busy_start_ts; + ktime_t busy_time; + struct { int boot; int jsm; int tdr; - int reschedule_suspend; int autosuspend; int d0i3_entry_msg; + int state_dump_msg; } timeout; }; @@ -145,22 +171,35 @@ struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; struct mutex lock; /* Protects cmdq */ - struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; + struct xarray cmdq_xa; struct ivpu_mmu_context ctx; + struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ + struct list_head ms_instance_list; + struct ivpu_bo *ms_info_bo; + struct xa_limit job_limit; + u32 job_id_next; + struct xa_limit cmdq_limit; + u32 cmdq_id_next; bool has_mmu_faults; bool bound; + bool aborted; }; extern int ivpu_dbg_mask; extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; +extern int ivpu_sched_mode; extern bool ivpu_disable_mmu_cont_pages; +extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_FW_TEST BIT(0) #define IVPU_TEST_MODE_NULL_HW BIT(1) #define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) #define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) #define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) +#define IVPU_TEST_MODE_MIP_DISABLE BIT(6) +#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) +#define IVPU_TEST_MODE_TURBO BIT(9) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); @@ -180,16 +219,35 @@ static inline u16 ivpu_device_id(struct ivpu_device *vdev) return to_pci_dev(vdev->drm.dev)->device; } -static inline int ivpu_hw_gen(struct ivpu_device *vdev) +static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev) { switch (ivpu_device_id(vdev)) { case PCI_DEVICE_ID_MTL: case PCI_DEVICE_ID_ARL: - return IVPU_HW_37XX; + return IVPU_HW_IP_37XX; case PCI_DEVICE_ID_LNL: - return IVPU_HW_40XX; + return IVPU_HW_IP_40XX; + case PCI_DEVICE_ID_PTL_P: + return IVPU_HW_IP_50XX; default: - ivpu_err(vdev, "Unknown VPU device\n"); + dump_stack(); + ivpu_err(vdev, "Unknown NPU IP generation\n"); + return 0; + } +} + +static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev) +{ + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: + return IVPU_HW_BTRS_MTL; + case PCI_DEVICE_ID_LNL: + case PCI_DEVICE_ID_PTL_P: + return IVPU_HW_BTRS_LNL; + default: + dump_stack(); + ivpu_err(vdev, "Unknown buttress generation\n"); return 0; } } @@ -227,4 +285,9 @@ static inline bool ivpu_is_fpga(struct ivpu_device *vdev) return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; } +static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev) +{ + return ivpu_force_snoop; +} + #endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5fa8bd4603d5..6037ec0b3096 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/firmware.h> @@ -25,7 +25,6 @@ #define FW_SHAVE_NN_MAX_SIZE SZ_2M #define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) #define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) -#define FW_VERSION_HEADER_SIZE SZ_4K #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) #define WATCHDOG_MSS_REDIRECT 32 @@ -44,22 +43,31 @@ #define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \ ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor) +#define IVPU_FOCUS_PRESENT_TIMER_MS 1000 + static char *ivpu_firmware; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); -MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); +MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/.."); +#endif -/* TODO: Remove mtl_vpu.bin from names after transition to generation based FW names */ static struct { int gen; const char *name; } fw_names[] = { - { IVPU_HW_37XX, "vpu_37xx.bin" }, - { IVPU_HW_37XX, "mtl_vpu.bin" }, - { IVPU_HW_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, - { IVPU_HW_40XX, "vpu_40xx.bin" }, - { IVPU_HW_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_37XX, "vpu_37xx.bin" }, + { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, + { IVPU_HW_IP_40XX, "vpu_40xx.bin" }, + { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_50XX, "vpu_50xx.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" }, }; +/* Production fw_names from the table above */ +MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin"); + static int ivpu_fw_request(struct ivpu_device *vdev) { int ret = -ENOENT; @@ -73,7 +81,7 @@ static int ivpu_fw_request(struct ivpu_device *vdev) } for (i = 0; i < ARRAY_SIZE(fw_names); i++) { - if (fw_names[i].gen != ivpu_hw_gen(vdev)) + if (fw_names[i].gen != ivpu_hw_ip_gen(vdev)) continue; ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i].name, vdev->drm.dev); @@ -123,6 +131,23 @@ ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_hea return false; } +static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range_size) +{ + if (addr < range_start || addr + size > range_start + range_size) + return false; + + return true; +} + +static u32 +ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) +{ + if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) + return ivpu_sched_mode; + + return VPU_SCHEDULING_MODE_OS; +} + static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -179,8 +204,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", fw_hdr->header_version, fw_hdr->image_format); - ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, - (const char *)fw_hdr + VPU_FW_HEADER_SIZE); + if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) + ivpu_warn(vdev, "Missing firmware version\n"); + + ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) return -EINVAL; @@ -196,16 +223,35 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->cold_boot_entry_point = fw_hdr->entry_point; fw->entry_point = fw->cold_boot_entry_point; - fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL); + fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; fw->trace_hw_component_mask = -1; fw->dvfs_mode = 0; + fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, + fw_hdr->ro_section_size, + fw_hdr->image_load_address, + fw_hdr->image_size)) { + ivpu_err(vdev, "Invalid read-only section: start address 0x%llx, size %u\n", + fw_hdr->ro_section_start_address, fw_hdr->ro_section_size); + return -EINVAL; + } + + fw->read_only_addr = fw_hdr->ro_section_start_address; + fw->read_only_size = fw_hdr->ro_section_size; + ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", fw->runtime_addr, image_load_addr, fw->entry_point); + ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n", + fw->read_only_addr, fw->read_only_size); return 0; } @@ -243,13 +289,14 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev) return -EINVAL; } - ivpu_hw_init_range(&vdev->hw->ranges.global, start, size); + ivpu_hw_range_init(&vdev->hw->ranges.global, start, size); return 0; } static int ivpu_fw_mem_init(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; + struct ivpu_addr_range fw_range; int log_verb_size; int ret; @@ -257,37 +304,48 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (ret) return ret; - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + fw_range.start = fw->runtime_addr; + fw_range.end = fw->runtime_addr + fw->runtime_size; + fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size, + DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!fw->mem) { - ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); + ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n"); return -ENOMEM; } - fw->mem_log_crit = ivpu_bo_alloc_internal(vdev, 0, IVPU_FW_CRITICAL_BUFFER_SIZE, - DRM_IVPU_BO_CACHED); + ret = ivpu_mmu_context_set_pages_ro(vdev, &vdev->gctx, fw->read_only_addr, + fw->read_only_size); + if (ret) { + ivpu_err(vdev, "Failed to set firmware image read-only\n"); + goto err_free_fw_mem; + } + + fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); if (!fw->mem_log_crit) { - ivpu_err(vdev, "Failed to allocate critical log buffer\n"); + ivpu_err(vdev, "Failed to create critical log buffer\n"); ret = -ENOMEM; goto err_free_fw_mem; } - if (ivpu_log_level <= IVPU_FW_LOG_INFO) + if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO) log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE; else log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE; - fw->mem_log_verb = ivpu_bo_alloc_internal(vdev, 0, log_verb_size, DRM_IVPU_BO_CACHED); + fw->mem_log_verb = ivpu_bo_create_global(vdev, log_verb_size, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); if (!fw->mem_log_verb) { - ivpu_err(vdev, "Failed to allocate verbose log buffer\n"); + ivpu_err(vdev, "Failed to create verbose log buffer\n"); ret = -ENOMEM; goto err_free_log_crit; } if (fw->shave_nn_size) { - fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start, - fw->shave_nn_size, DRM_IVPU_BO_WC); + fw->mem_shave_nn = ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.shave, + fw->shave_nn_size, DRM_IVPU_BO_WC); if (!fw->mem_shave_nn) { - ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); + ivpu_err(vdev, "Failed to create shavenn buffer\n"); ret = -ENOMEM; goto err_free_log_verb; } @@ -296,11 +354,11 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) return 0; err_free_log_verb: - ivpu_bo_free_internal(fw->mem_log_verb); + ivpu_bo_free(fw->mem_log_verb); err_free_log_crit: - ivpu_bo_free_internal(fw->mem_log_crit); + ivpu_bo_free(fw->mem_log_crit); err_free_fw_mem: - ivpu_bo_free_internal(fw->mem); + ivpu_bo_free(fw->mem); return ret; } @@ -309,13 +367,13 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev) struct ivpu_fw_info *fw = vdev->fw; if (fw->mem_shave_nn) { - ivpu_bo_free_internal(fw->mem_shave_nn); + ivpu_bo_free(fw->mem_shave_nn); fw->mem_shave_nn = NULL; } - ivpu_bo_free_internal(fw->mem_log_verb); - ivpu_bo_free_internal(fw->mem_log_crit); - ivpu_bo_free_internal(fw->mem); + ivpu_bo_free(fw->mem_log_verb); + ivpu_bo_free(fw->mem_log_crit); + ivpu_bo_free(fw->mem); fw->mem_log_verb = NULL; fw->mem_log_crit = NULL; @@ -461,6 +519,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->punit_telemetry_sram_size); ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", boot_params->vpu_telemetry_enable); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_scheduling_mode = 0x%x\n", + boot_params->vpu_scheduling_mode); ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n", boot_params->dvfs_mode); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n", @@ -469,6 +529,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_residency_time_us); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -480,11 +542,14 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts); boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts; + boot_params->system_time_us = ktime_to_us(ktime_get_real()); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", boot_params->d0i3_residency_time_us); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; @@ -496,7 +561,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); + boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock @@ -519,8 +584,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2; boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; - boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; + boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + } /* Allow configuration for L2C_PAGE_TABLE with boot param value */ boot_params->autoconfig = 1; @@ -553,15 +620,19 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr; boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb); - boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); - boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); - boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); + boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev); + boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev); + boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev); + boot_params->vpu_scheduling_mode = vdev->fw->sched_mode; + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS; boot_params->dvfs_mode = vdev->fw->dvfs_mode; if (!IVPU_WA(disable_d0i3_msg)) boot_params->d0i3_delayed_entry = 1; boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; + boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ ivpu_fw_boot_params_print(vdev, boot_params); diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 66b60fa161b5..1d0b2bd9d65c 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -1,11 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ +#include "vpu_jsm_api.h" + +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_VERSION_STR_SIZE SZ_256 + struct ivpu_device; struct ivpu_bo; struct vpu_boot_params; @@ -13,6 +18,7 @@ struct vpu_boot_params; struct ivpu_fw_info { const struct firmware *file; const char *name; + char version[FW_VERSION_STR_SIZE]; struct ivpu_bo *mem; struct ivpu_bo *mem_shave_nn; struct ivpu_bo *mem_log_crit; @@ -28,6 +34,11 @@ struct ivpu_fw_info { u32 trace_destination_mask; u64 trace_hw_component_mask; u32 dvfs_mode; + u32 primary_preempt_buf_size; + u32 secondary_preempt_buf_size; + u64 read_only_addr; + u32 read_only_size; + u32 sched_mode; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c index f6770f5e82a2..337c906b0210 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.c +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/ctype.h> @@ -15,19 +15,19 @@ #include "ivpu_fw_log.h" #include "ivpu_gem.h" -#define IVPU_FW_LOG_LINE_LENGTH 256 +#define IVPU_FW_LOG_LINE_LENGTH 256 -unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR; -module_param(ivpu_log_level, uint, 0444); -MODULE_PARM_DESC(ivpu_log_level, - "VPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG) +unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR; +module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444); +MODULE_PARM_DESC(fw_log_level, + "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG) " info=" __stringify(IVPU_FW_LOG_INFO) " warn=" __stringify(IVPU_FW_LOG_WARN) " error=" __stringify(IVPU_FW_LOG_ERROR) " fatal=" __stringify(IVPU_FW_LOG_FATAL)); -static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, - struct vpu_tracing_buffer_header **log_header) +static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, + struct vpu_tracing_buffer_header **out_log) { struct vpu_tracing_buffer_header *log; @@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return -EINVAL; } - *log_header = log; + *out_log = log; *offset += log->size; ivpu_dbg(vdev, FW_BOOT, @@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return 0; } -static void buffer_print(char *buffer, u32 size, struct drm_printer *p) +static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) { char line[IVPU_FW_LOG_LINE_LENGTH]; u32 index = 0; @@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p) } line[index] = 0; if (index != 0) - drm_printf(p, "%s\n", line); + drm_printf(p, "%s", line); } -static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log, - const char *prefix, bool only_new_msgs, struct drm_printer *p) +static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix, + bool only_new_msgs, struct drm_printer *p) { - char *log_buffer = (void *)log + log->header_size; - u32 log_size = log->size - log->header_size; - u32 log_start = log->read_index; - u32 log_end = log->write_index; - - if (!(log->write_index || log->wrap_count) || - (log->write_index == log->read_index && only_new_msgs)) { - drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); - return; + char *log_data = (void *)log + log->header_size; + u32 data_size = log->size - log->header_size; + u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0; + u32 log_end = READ_ONCE(log->write_index); + + if (log->wrap_count == log->read_wrap_count) { + if (log_end <= log_start) { + drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); + return; + } + } else if (log->wrap_count == log->read_wrap_count + 1) { + if (log_end > log_start) + log_start = log_end; + } else { + log_start = log_end; } drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name); - if (log->write_index > log->read_index) { - buffer_print(log_buffer + log_start, log_end - log_start, p); + if (log_end > log_start) { + fw_log_print_lines(log_data + log_start, log_end - log_start, p); } else { - buffer_print(log_buffer + log_end, log_size - log_end, p); - buffer_print(log_buffer, log_end, p); + fw_log_print_lines(log_data + log_start, data_size - log_start, p); + fw_log_print_lines(log_data, log_end, p); } - drm_printf(p, "\x1b[0m"); + drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */ drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name); } -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +static void +fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name, + struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p) { - struct vpu_tracing_buffer_header *log_header; + struct vpu_tracing_buffer_header *log; u32 next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "VPU critical", only_new_msgs, p); + while (fw_log_from_bo(vdev, bo, &next, &log) == 0) + fw_log_print_buffer(log, name, only_new_msgs, p); +} + +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +{ + fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p); + fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p); +} + +void ivpu_fw_log_mark_read(struct ivpu_device *vdev) +{ + struct vpu_tracing_buffer_header *log; + u32 next; + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "VPU verbose", only_new_msgs, p); + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } } -void ivpu_fw_log_clear(struct ivpu_device *vdev) +void ivpu_fw_log_reset(struct ivpu_device *vdev) { - struct vpu_tracing_buffer_header *log_header; - u32 next = 0; + struct vpu_tracing_buffer_header *log; + u32 next; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } } diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h index 0b2573f6f315..8bb528a73cb7 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.h +++ b/drivers/accel/ivpu/ivpu_fw_log.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_LOG_H__ @@ -8,8 +8,6 @@ #include <linux/types.h> -#include <drm/drm_print.h> - #include "ivpu_drv.h" #define IVPU_FW_LOG_DEFAULT 0 @@ -19,20 +17,15 @@ #define IVPU_FW_LOG_ERROR 4 #define IVPU_FW_LOG_FATAL 5 -extern unsigned int ivpu_log_level; - #define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M #define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M #define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); -void ivpu_fw_log_clear(struct ivpu_device *vdev); +extern unsigned int ivpu_fw_log_level; -static inline void ivpu_fw_log_dump(struct ivpu_device *vdev) -{ - struct drm_printer p = drm_info_printer(vdev->drm.dev); +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); +void ivpu_fw_log_mark_read(struct ivpu_device *vdev); +void ivpu_fw_log_reset(struct ivpu_device *vdev); - ivpu_fw_log_print(vdev, false, &p); -} #endif /* __IVPU_FW_LOG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index e9ddbe9f50eb..16178054e629 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -172,8 +172,7 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz return &bo->base.base; } -static struct ivpu_bo * -ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) +static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags) { struct drm_gem_shmem_object *shmem; struct ivpu_bo *bo; @@ -201,7 +200,7 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) return bo; } -static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) +static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; @@ -224,7 +223,7 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range); } -static void ivpu_bo_free(struct drm_gem_object *obj) +static void ivpu_gem_bo_free(struct drm_gem_object *obj) { struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); @@ -245,8 +244,8 @@ static void ivpu_bo_free(struct drm_gem_object *obj) } static const struct drm_gem_object_funcs ivpu_gem_funcs = { - .free = ivpu_bo_free, - .open = ivpu_bo_open, + .free = ivpu_gem_bo_free, + .open = ivpu_gem_bo_open, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, @@ -272,9 +271,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (size == 0) return -EINVAL; - bo = ivpu_bo_create(vdev, size, args->flags); + bo = ivpu_bo_alloc(vdev, size, args->flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", + ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)", bo, file_priv->ctx.id, args->size, args->flags); return PTR_ERR(bo); } @@ -289,33 +288,28 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } struct ivpu_bo * -ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags) +ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags) { - const struct ivpu_addr_range *range; - struct ivpu_addr_range fixed_range; struct iosys_map map; struct ivpu_bo *bo; int ret; - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); + if (drm_WARN_ON(&vdev->drm, !range)) + return NULL; - if (vpu_addr) { - fixed_range.start = vpu_addr; - fixed_range.end = vpu_addr + size; - range = &fixed_range; - } else { - range = &vdev->hw->ranges.global; - } + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->start)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); - bo = ivpu_bo_create(vdev, size, flags); + bo = ivpu_bo_alloc(vdev, size, flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", - bo, vpu_addr, size, flags); + ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", + bo, range->start, size, flags); return NULL; } - ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range); + ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range); if (ret) goto err_put; @@ -323,11 +317,14 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla if (ret) goto err_put; - dma_resv_lock(bo->base.base.resv, NULL); - ret = drm_gem_shmem_vmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); - if (ret) - goto err_put; + if (flags & DRM_IVPU_BO_MAPPABLE) { + dma_resv_lock(bo->base.base.resv, NULL); + ret = drm_gem_shmem_vmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + + if (ret) + goto err_put; + } return bo; @@ -336,13 +333,20 @@ err_put: return NULL; } -void ivpu_bo_free_internal(struct ivpu_bo *bo) +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags) +{ + return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags); +} + +void ivpu_bo_free(struct ivpu_bo *bo) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); - dma_resv_lock(bo->base.base.resv, NULL); - drm_gem_shmem_vunmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); + if (bo->flags & DRM_IVPU_BO_MAPPABLE) { + dma_resv_lock(bo->base.base.resv, NULL); + drm_gem_shmem_vunmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + } drm_gem_object_put(&bo->base.base); } @@ -380,6 +384,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */ + timeout += 1; + obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -EINVAL; @@ -402,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) mutex_lock(&bo->lock); drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", - bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size, + bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size, bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index a8559211c70d..d975000abd78 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -28,8 +28,10 @@ int ivpu_bo_pin(struct ivpu_bo *bo); void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); -struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); -void ivpu_bo_free_internal(struct ivpu_bo *bo); +struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags); +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags); +void ivpu_bo_free(struct ivpu_bo *bo); int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -58,14 +60,17 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) return bo->flags & DRM_IVPU_BO_CACHE_MASK; } -static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) +static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) { - return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; + return to_ivpu_device(bo->base.base.dev); } -static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) +static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { - return to_ivpu_device(bo->base.base.dev); + if (ivpu_is_force_snoop_enabled(ivpu_bo_to_vdev(bo))) + return true; + + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c new file mode 100644 index 000000000000..4e1054f3466e --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - 2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" + +#include <linux/dmi.h> + +static char *platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "SILICON"; + case IVPU_PLATFORM_SIMICS: + return "SIMICS"; + case IVPU_PLATFORM_FPGA: + return "FPGA"; + default: + return "Invalid platform"; + } +} + +static const struct dmi_system_id dmi_platform_simulation[] = { + { + .ident = "Intel Simics", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"), + DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_BOARD_SERIAL, "123456789"), + }, + }, + { + .ident = "Intel Simics", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "Simics"), + }, + }, + { } +}; + +static void platform_init(struct ivpu_device *vdev) +{ + if (dmi_check_system(dmi_platform_simulation)) + vdev->platform = IVPU_PLATFORM_SIMICS; + else + vdev->platform = IVPU_PLATFORM_SILICON; + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", + platform_to_str(vdev->platform), vdev->platform); +} + +static void wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.clear_runtime_mem = false; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); + + if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + vdev->wa.disable_clock_relinquish = true; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->wa.wp0_during_power_up = true; + + IVPU_PRINT_WA(punit_disabled); + IVPU_PRINT_WA(clear_runtime_mem); + IVPU_PRINT_WA(interrupt_clear_with_0); + IVPU_PRINT_WA(disable_clock_relinquish); + IVPU_PRINT_WA(wp0_during_power_up); +} + +static void timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_test_mode & IVPU_TEST_MODE_DISABLE_TIMEOUTS) { + vdev->timeout.boot = -1; + vdev->timeout.jsm = -1; + vdev->timeout.tdr = -1; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = -1; + } else if (ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 100000; + vdev->timeout.jsm = 50000; + vdev->timeout.tdr = 2000000; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 500; + vdev->timeout.state_dump_msg = 10; + } else if (ivpu_is_simics(vdev)) { + vdev->timeout.boot = 50; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 10000; + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 100; + vdev->timeout.state_dump_msg = 10; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->timeout.autosuspend = 10; + else + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 5; + vdev->timeout.state_dump_msg = 10; + } +} + +static void memory_ranges_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x88000000, 511 * SZ_1M); + ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x180000000, SZ_2G); + ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_128G); + } else { + ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); + ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000, SZ_2G); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x100000000, SZ_256G); + vdev->hw->ranges.dma = vdev->hw->ranges.user; + } +} + +static int wp_enable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, true); +} + +static int wp_disable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, false); +} + +int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(wp0_during_power_up)) { + /* WP requests may fail when powering down, so issue WP 0 here */ + ret = wp_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable workpoint: %d\n", ret); + } + + ret = ivpu_hw_btrs_d0i3_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); + + ret = wp_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable workpoint: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) >= IVPU_HW_BTRS_LNL) { + if (IVPU_WA(disable_clock_relinquish)) + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + ivpu_hw_btrs_profiling_freq_reg_set_lnl(vdev); + ivpu_hw_btrs_ats_print_lnl(vdev); + } + + ret = ivpu_hw_ip_host_ss_configure(vdev); + if (ret) { + ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); + return ret; + } + + ivpu_hw_ip_idle_gen_disable(vdev); + + ret = ivpu_hw_btrs_wait_for_clock_res_own_ack(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for clock resource own ACK\n"); + return ret; + } + + ret = ivpu_hw_ip_pwr_domain_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); + return ret; + } + + ret = ivpu_hw_ip_host_ss_axi_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_LNL) + ivpu_hw_btrs_set_port_arbitration_weights_lnl(vdev); + + ret = ivpu_hw_ip_top_noc_enable(vdev); + if (ret) + ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); + + return ret; +} + +static void save_d0i3_entry_timestamp(struct ivpu_device *vdev) +{ + vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); + vdev->hw->d0i3_entry_vpu_ts = ivpu_hw_ip_read_perf_timer_counter(vdev); +} + +int ivpu_hw_reset(struct ivpu_device *vdev) +{ + int ret = 0; + + if (ivpu_hw_btrs_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU IP\n"); + ret = -EIO; + } + + if (wp_disable(vdev)) { + ivpu_err(vdev, "Failed to disable workpoint\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + int ret = 0; + + save_d0i3_entry_timestamp(vdev); + + if (!ivpu_hw_is_idle(vdev)) + ivpu_warn(vdev, "NPU not idle during power down\n"); + + if (ivpu_hw_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU\n"); + ret = -EIO; + } + + if (ivpu_hw_btrs_d0i3_enable(vdev)) { + ivpu_err(vdev, "Failed to enter D0I3\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_init(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_info_init(vdev); + ivpu_hw_btrs_freq_ratios_init(vdev); + memory_ranges_init(vdev); + platform_init(vdev); + wa_init(vdev); + timeouts_init(vdev); + atomic_set(&vdev->hw->firewall_irq_counter, 0); + + return 0; +} + +int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_ip_snoop_disable(vdev); + ivpu_hw_ip_tbu_mmu_enable(vdev); + ret = ivpu_hw_ip_soc_cpu_boot(vdev); + if (ret) + ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret); + + return ret; +} + +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + return; + } + + if (enable) + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; + else + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; +} + +void ivpu_irq_handlers_init(struct ivpu_device *vdev) +{ + INIT_KFIFO(vdev->hw->irq.fifo); + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx; + else + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_40xx; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_mtl; + else + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_lnl; +} + +void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + kfifo_reset(&vdev->hw->irq.fifo); + ivpu_hw_ip_irq_enable(vdev); + ivpu_hw_btrs_irq_enable(vdev); +} + +void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_irq_disable(vdev); + ivpu_hw_ip_irq_disable(vdev); +} + +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr) +{ + struct ivpu_device *vdev = ptr; + bool ip_handled, btrs_handled; + + ivpu_hw_btrs_global_int_disable(vdev); + + btrs_handled = ivpu_hw_btrs_irq_handler(vdev, irq); + if (!ivpu_hw_is_idle((vdev)) || !btrs_handled) + ip_handled = ivpu_hw_ip_irq_handler(vdev, irq); + else + ip_handled = false; + + /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ + ivpu_hw_btrs_global_int_enable(vdev); + + if (!kfifo_is_empty(&vdev->hw->irq.fifo)) + return IRQ_WAKE_THREAD; + if (ip_handled || btrs_handled) + return IRQ_HANDLED; + return IRQ_NONE; +} diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index b2909168a0a6..fc4dbfc980c8 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,38 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_HW_H__ #define __IVPU_HW_H__ +#include <linux/kfifo.h> + #include "ivpu_drv.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" -struct ivpu_hw_ops { - int (*info_init)(struct ivpu_device *vdev); - int (*power_up)(struct ivpu_device *vdev); - int (*boot_fw)(struct ivpu_device *vdev); - int (*power_down)(struct ivpu_device *vdev); - int (*reset)(struct ivpu_device *vdev); - bool (*is_idle)(struct ivpu_device *vdev); - int (*wait_for_idle)(struct ivpu_device *vdev); - void (*wdt_disable)(struct ivpu_device *vdev); - void (*diagnose_failure)(struct ivpu_device *vdev); - u32 (*profiling_freq_get)(struct ivpu_device *vdev); - void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable); - u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); - void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); - u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); - u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); - void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); - void (*irq_clear)(struct ivpu_device *vdev); - void (*irq_enable)(struct ivpu_device *vdev); - void (*irq_disable)(struct ivpu_device *vdev); - irqreturn_t (*irq_handler)(int irq, void *ptr); -}; +#define IVPU_HW_IRQ_FIFO_LENGTH 1024 + +#define IVPU_HW_IRQ_SRC_IPC 1 +#define IVPU_HW_IRQ_SRC_MMU_EVTQ 2 +#define IVPU_HW_IRQ_SRC_DCT 3 struct ivpu_addr_range { resource_size_t start; @@ -40,7 +24,11 @@ struct ivpu_addr_range { }; struct ivpu_hw_info { - const struct ivpu_hw_ops *ops; + struct { + bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq); + bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq); + DECLARE_KFIFO(fifo, u8, IVPU_HW_IRQ_FIFO_LENGTH); + } irq; struct { struct ivpu_addr_range global; struct ivpu_addr_range user; @@ -63,137 +51,110 @@ struct ivpu_hw_info { int dma_bits; ktime_t d0i3_entry_host_ts; u64 d0i3_entry_vpu_ts; + atomic_t firewall_irq_counter; }; -extern const struct ivpu_hw_ops ivpu_hw_37xx_ops; -extern const struct ivpu_hw_ops ivpu_hw_40xx_ops; +int ivpu_hw_init(struct ivpu_device *vdev); +int ivpu_hw_power_up(struct ivpu_device *vdev); +int ivpu_hw_power_down(struct ivpu_device *vdev); +int ivpu_hw_reset(struct ivpu_device *vdev); +int ivpu_hw_boot_fw(struct ivpu_device *vdev); +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable); +void ivpu_irq_handlers_init(struct ivpu_device *vdev); +void ivpu_hw_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_irq_disable(struct ivpu_device *vdev); +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr); -static inline int ivpu_hw_info_init(struct ivpu_device *vdev) +static inline u32 ivpu_hw_btrs_irq_handler(struct ivpu_device *vdev, int irq) { - return vdev->hw->ops->info_init(vdev); -}; - -static inline int ivpu_hw_power_up(struct ivpu_device *vdev) -{ - ivpu_dbg(vdev, PM, "HW power up\n"); - - return vdev->hw->ops->power_up(vdev); -}; + return vdev->hw->irq.btrs_irq_handler(vdev, irq); +} -static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ip_irq_handler(struct ivpu_device *vdev, int irq) { - return vdev->hw->ops->boot_fw(vdev); -}; + return vdev->hw->irq.ip_irq_handler(vdev, irq); +} -static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) +static inline void ivpu_hw_range_init(struct ivpu_addr_range *range, u64 start, u64 size) { - return vdev->hw->ops->is_idle(vdev); -}; + range->start = start; + range->end = start + size; +} -static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) { - return vdev->hw->ops->wait_for_idle(vdev); -}; + return range->end - range->start; +} -static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) { - ivpu_dbg(vdev, PM, "HW power down\n"); - - return vdev->hw->ops->power_down(vdev); -}; + return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); +} -static inline int ivpu_hw_reset(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - ivpu_dbg(vdev, PM, "HW reset\n"); - - return vdev->hw->ops->reset(vdev); -}; + ivpu_hw_ip_irq_clear(vdev); +} -static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) { - vdev->hw->ops->wdt_disable(vdev); -}; + return ivpu_hw_btrs_pll_freq_get(vdev); +} static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) { - return vdev->hw->ops->profiling_freq_get(vdev); -}; - -static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - return vdev->hw->ops->profiling_freq_drive(vdev, enable); -}; - -/* Register indirect accesses */ -static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_pll_freq_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_offset_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_size_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_enable_get(vdev); -}; + return vdev->hw->pll.profiling_freq; +} -static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) { - vdev->hw->ops->reg_db_set(vdev, db_id); -}; + ivpu_hw_ip_diagnose_failure(vdev); + ivpu_hw_btrs_diagnose_failure(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_offset_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); -}; + return ivpu_hw_btrs_telemetry_offset_get(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_size_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_count_get(vdev); -}; + return ivpu_hw_btrs_telemetry_size_get(vdev); +} -static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +static inline u32 ivpu_hw_telemetry_enable_get(struct ivpu_device *vdev) { - vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); -}; + return ivpu_hw_btrs_telemetry_enable_get(vdev); +} -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_clear(vdev); -}; + return ivpu_hw_btrs_is_idle(vdev); +} -static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_enable(vdev); -}; + return ivpu_hw_btrs_wait_for_idle(vdev); +} -static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +static inline void ivpu_hw_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) { - vdev->hw->ops->irq_disable(vdev); -}; + ivpu_hw_ip_ipc_tx_set(vdev, vpu_addr); +} -static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +static inline void ivpu_hw_db_set(struct ivpu_device *vdev, u32 db_id) { - range->start = start; - range->end = start + size; + ivpu_hw_ip_db_set(vdev, db_id); } -static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +static inline u32 ivpu_hw_ipc_rx_addr_get(struct ivpu_device *vdev) { - return range->end - range->start; + return ivpu_hw_ip_ipc_rx_addr_get(vdev); } -static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ipc_rx_count_get(struct ivpu_device *vdev) { - vdev->hw->ops->diagnose_failure(vdev); + return ivpu_hw_ip_ipc_rx_count_get(vdev); } #endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c deleted file mode 100644 index 89af1006df55..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ /dev/null @@ -1,1066 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#include "ivpu_drv.h" -#include "ivpu_fw.h" -#include "ivpu_hw_37xx_reg.h" -#include "ivpu_hw_reg_io.h" -#include "ivpu_hw.h" -#include "ivpu_ipc.h" -#include "ivpu_mmu.h" -#include "ivpu_pm.h" - -#define TILE_FUSE_ENABLE_BOTH 0x0 -#define TILE_SKU_BOTH_MTL 0x3630 - -/* Work point configuration values */ -#define CONFIG_1_TILE 0x01 -#define CONFIG_2_TILE 0x02 -#define PLL_RATIO_5_3 0x01 -#define PLL_RATIO_4_3 0x02 -#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) -#define WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_5_3) -#define WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_4_3) -#define WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_5_3) -#define WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_4_3) -#define WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) - -#define PLL_REF_CLK_FREQ (50 * 1000000) -#define PLL_SIMULATION_FREQ (10 * 1000000) -#define PLL_PROF_CLK_FREQ (38400 * 1000) -#define PLL_DEFAULT_EPP_VALUE 0x80 - -#define TIM_SAFE_ENABLE 0xf1d0dead -#define TIM_WATCHDOG_RESET_VALUE 0xffffffff - -#define TIMEOUT_US (150 * USEC_PER_MSEC) -#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) -#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) - -#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) - -#define ICB_1_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) - -#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) - -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) - -#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE))) - -#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) -#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) - -#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) - -static void ivpu_hw_wa_init(struct ivpu_device *vdev) -{ - vdev->wa.punit_disabled = false; - vdev->wa.clear_runtime_mem = false; - vdev->wa.d3hot_after_power_off = true; - - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); - if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { - /* Writing 1s does not clear the interrupt status register */ - vdev->wa.interrupt_clear_with_0 = true; - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); - } - - IVPU_PRINT_WA(punit_disabled); - IVPU_PRINT_WA(clear_runtime_mem); - IVPU_PRINT_WA(d3hot_after_power_off); - IVPU_PRINT_WA(interrupt_clear_with_0); -} - -static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) -{ - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = 10; - vdev->timeout.d0i3_entry_msg = 5; -} - -static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); -} - -/* Send KMD initiated workpoint change */ -static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, - u16 target_ratio, u16 config) -{ - int ret; - u32 val; - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) { - ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_CMD); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_CMD, val); - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) - ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); - - return ret; -} - -static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) -{ - u32 exp_val = enable ? 0x1 : 0x0; - - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); -} - -static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) -{ - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); -} - -static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio; - u32 fmin_fuse, fmax_fuse; - - fmin_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMIN_FUSE); - fuse_min_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); - fuse_pn_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); - - fmax_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMAX_FUSE); - fuse_max_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); - - hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); - hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); - hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); -} - -static int ivpu_hw_37xx_wait_for_vpuip_bar(struct ivpu_device *vdev) -{ - return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100); -} - -static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) -{ - struct ivpu_hw_info *hw = vdev->hw; - u16 target_ratio; - u16 config; - int ret; - - if (IVPU_WA(punit_disabled)) { - ivpu_dbg(vdev, PM, "Skipping PLL request\n"); - return 0; - } - - if (enable) { - target_ratio = hw->pll.pn_ratio; - config = hw->config; - } else { - target_ratio = 0; - config = 0; - } - - ivpu_dbg(vdev, PM, "PLL workpoint request: config 0x%04x pll ratio 0x%x\n", - config, target_ratio); - - ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); - if (ret) { - ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); - return ret; - } - - ret = ivpu_pll_wait_for_lock(vdev, enable); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL lock\n"); - return ret; - } - - if (enable) { - ret = ivpu_pll_wait_for_status_ready(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); - return ret; - } - - ret = ivpu_hw_37xx_wait_for_vpuip_bar(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for VPUIP bar\n"); - return ret; - } - } - - return 0; -} - -static int ivpu_pll_enable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, true); -} - -static int ivpu_pll_disable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, false); -} - -static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) -{ - u32 val = 0; - - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val); - - REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val); -} - -static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET); - - if (enable) { - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } - - REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val); -} - -static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET); - - if (enable) { - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } - - REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val); -} - -static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev) -{ - ivpu_boot_host_ss_rst_clr_assert(vdev); - - return ivpu_boot_noc_qreqn_check(vdev, 0x0); -} - -static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, 0x0); -} - -static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val); - - ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, true); -} - -static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); - if (enable) { - val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } else { - val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } - REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); - - ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, true); -} - -static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); -} - -static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val); -} - -static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) -{ - return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, - exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); -} - -static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val); -} - -static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); -} - -static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) -{ - int ret; - - ivpu_boot_pwr_island_trickle_drive(vdev, true); - ivpu_boot_pwr_island_drive(vdev, true); - - ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); - if (ret) { - ivpu_err(vdev, "Timed out waiting for power island status\n"); - return ret; - } - - ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qrenqn check %d\n", ret); - return ret; - } - - ivpu_boot_host_ss_clk_drive(vdev, true); - ivpu_boot_pwr_island_isolation_drive(vdev, false); - ivpu_boot_host_ss_rst_drive(vdev, true); - ivpu_boot_dpu_active_drive(vdev, true); - - return ret; -} - -static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); - - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); - - REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); -} - -static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV); - - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - - REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val); -} - -static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) -{ - u32 val; - - val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); - val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); - - val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = vdev->fw->entry_point >> 9; - REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); - - val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); - REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); - - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); -} - -static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL); - if (enable) - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - else - val = REG_CLR_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - - hw->tile_fuse = TILE_FUSE_ENABLE_BOTH; - hw->sku = TILE_SKU_BOTH_MTL; - hw->config = WP_CONFIG_2_TILE_4_3_RATIO; - - ivpu_pll_init_frequency_ratios(vdev); - - ivpu_hw_init_range(&hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_init_range(&hw->ranges.user, 0xc0000000, 255 * SZ_1M); - ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G); - ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G); - - vdev->platform = IVPU_PLATFORM_SILICON; - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - - return 0; -} - -static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev) -{ - int ret; - u32 val; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); - - return ret; -} - -static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) -{ - int ret = 0; - - if (ivpu_hw_37xx_ip_reset(vdev)) { - ivpu_err(vdev, "Failed to reset NPU\n"); - ret = -EIO; - } - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - return ret; -} - -static int ivpu_hw_37xx_d0i3_enable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, true); - if (ret) - ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); - - udelay(5); /* VPU requires 5 us to complete the transition */ - - return ret; -} - -static int ivpu_hw_37xx_d0i3_disable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, false); - if (ret) - ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) -{ - int ret; - - /* PLL requests may fail when powering down, so issue WP 0 here */ - ret = ivpu_pll_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret); - - ret = ivpu_hw_37xx_d0i3_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); - - ret = ivpu_pll_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_configure(vdev); - if (ret) { - ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); - return ret; - } - - /* - * The control circuitry for vpu_idle indication logic powers up active. - * To ensure unnecessary low power mode signal from LRT during bring up, - * KMD disables the circuitry prior to bringing up the Main Power island. - */ - ivpu_boot_vpu_idle_gen_disable(vdev); - - ret = ivpu_boot_pwr_domain_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_axi_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_top_noc_enable(vdev); - if (ret) - ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_boot_fw(struct ivpu_device *vdev) -{ - ivpu_boot_no_snoop_enable(vdev); - ivpu_boot_tbu_mmu_enable(vdev); - ivpu_boot_soc_cpu_boot(vdev); - - return 0; -} - -static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev) -{ - u32 val; - - if (IVPU_WA(punit_disabled)) - return true; - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_STATUS); - return REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, val) && - REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val); -} - -static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); -} - -static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) -{ - vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); - vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); -} - -static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev) -{ - int ret = 0; - - ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev); - - if (!ivpu_hw_37xx_is_idle(vdev)) - ivpu_warn(vdev, "VPU not idle during power down\n"); - - if (ivpu_hw_37xx_reset(vdev)) { - ivpu_err(vdev, "Failed to reset VPU\n"); - ret = -EIO; - } - - if (ivpu_hw_37xx_d0i3_enable(vdev)) { - ivpu_err(vdev, "Failed to enter D0I3\n"); - ret = -EIO; - } - - return ret; -} - -static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev) -{ - u32 val; - - /* Enable writing and set non-zero WDT value */ - REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); - - /* Enable writing and disable watchdog timer */ - REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); - - /* Now clear the timeout interrupt */ - val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); -} - -static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev) -{ - return PLL_PROF_CLK_FREQ; -} - -static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */ -} - -static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -/* Register indirect accesses */ -static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL); - pll_curr_ratio &= VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config); -} - -static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET); -} - -static u32 ivpu_hw_37xx_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE); -} - -static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE); -} - -static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id) -{ - u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); - - REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); -} - -static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev) -{ - return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM); -} - -static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev) -{ - u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); - - return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); -} - -static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) -{ - REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); -} - -static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev) -{ - REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); -} - -static void ivpu_hw_37xx_irq_enable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); - REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); - REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); -} - -static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev) -{ - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); - REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull); - REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0); -} - -static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); -} - -static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev) -{ - ivpu_hw_wdt_disable(vdev); - ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); -} - -static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); -} - -/* Handler for IRQs from VPU core (irqV) */ -static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) -{ - u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - - if (!status) - return false; - - REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) - ivpu_mmu_irq_evtq_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev, wake_thread); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) - ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) - ivpu_mmu_irq_gerr_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) - ivpu_hw_37xx_irq_wdt_mss_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) - ivpu_hw_37xx_irq_wdt_nce_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) - ivpu_hw_37xx_irq_noc_firewall_handler(vdev); - - return true; -} - -/* Handler for IRQs from Buttress core (irqB) */ -static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) -{ - u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - bool schedule_recovery = false; - - if (!status) - return false; - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL)); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0)); - REGB_WR32(VPU_37XX_BUTTRESS_ATS_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { - u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - REGB_WR32(VPU_37XX_BUTTRESS_UFI_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - /* This must be done after interrupts are cleared at the source. */ - if (IVPU_WA(interrupt_clear_with_0)) - /* - * Writing 1 triggers an interrupt, so we can't perform read update write. - * Clear local interrupt status by writing 0 to all bits. - */ - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); - else - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status); - - if (schedule_recovery) - ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); - - return true; -} - -static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr) -{ - struct ivpu_device *vdev = ptr; - bool irqv_handled, irqb_handled, wake_thread = false; - - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - - irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread); - irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq); - - /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - - if (wake_thread) - return IRQ_WAKE_THREAD; - if (irqv_handled || irqb_handled) - return IRQ_HANDLED; - return IRQ_NONE; -} - -static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev) -{ - u32 irqv = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - u32 irqb = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (ivpu_hw_37xx_reg_ipc_rx_count_get(vdev)) - ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) - ivpu_err(vdev, "WDT MSS timeout detected\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) - ivpu_err(vdev, "WDT NCE timeout detected\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) - ivpu_err(vdev, "NOC Firewall irq detected\n"); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0)); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) { - u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - } -} - -const struct ivpu_hw_ops ivpu_hw_37xx_ops = { - .info_init = ivpu_hw_37xx_info_init, - .power_up = ivpu_hw_37xx_power_up, - .is_idle = ivpu_hw_37xx_is_idle, - .wait_for_idle = ivpu_hw_37xx_wait_for_idle, - .power_down = ivpu_hw_37xx_power_down, - .reset = ivpu_hw_37xx_reset, - .boot_fw = ivpu_hw_37xx_boot_fw, - .wdt_disable = ivpu_hw_37xx_wdt_disable, - .diagnose_failure = ivpu_hw_37xx_diagnose_failure, - .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get, - .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive, - .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get, - .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get, - .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get, - .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get, - .reg_db_set = ivpu_hw_37xx_reg_db_set, - .reg_ipc_rx_addr_get = ivpu_hw_37xx_reg_ipc_rx_addr_get, - .reg_ipc_rx_count_get = ivpu_hw_37xx_reg_ipc_rx_count_get, - .reg_ipc_tx_set = ivpu_hw_37xx_reg_ipc_tx_set, - .irq_clear = ivpu_hw_37xx_irq_clear, - .irq_enable = ivpu_hw_37xx_irq_enable, - .irq_disable = ivpu_hw_37xx_irq_disable, - .irq_handler = ivpu_hw_37xx_irq_handler, -}; diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h index f6fec1919202..cf5e2f01049c 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h @@ -8,78 +8,6 @@ #include <linux/bits.h> -#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE 0x00000000u - -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT 0x00000004u -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_WP_REQ_CMD 0x00000014u -#define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) - -#define VPU_37XX_BUTTRESS_WP_DOWNLOAD 0x00000018u -#define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_CURRENT_PLL 0x0000001cu -#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_PLL_ENABLE 0x00000020u - -#define VPU_37XX_BUTTRESS_FMIN_FUSE 0x00000024u -#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) - -#define VPU_37XX_BUTTRESS_FMAX_FUSE 0x00000028u -#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) - -#define VPU_37XX_BUTTRESS_TILE_FUSE 0x0000002cu -#define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) - -#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK 0x00000030u -#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK 0x00000034u - -#define VPU_37XX_BUTTRESS_PLL_STATUS 0x00000040u -#define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) - -#define VPU_37XX_BUTTRESS_VPU_STATUS 0x00000044u -#define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) - -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) - -#define VPU_37XX_BUTTRESS_VPU_IP_RESET 0x00000050u -#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) - -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u - -#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u -#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u -#define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u - -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG 0x000000b0u -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) - -#define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u - #define VPU_37XX_HOST_SS_CPR_CLK_SET 0x00000084u #define VPU_37XX_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) #define VPU_37XX_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c deleted file mode 100644 index a1523d0b1ef3..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ /dev/null @@ -1,1244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#include "ivpu_drv.h" -#include "ivpu_fw.h" -#include "ivpu_hw.h" -#include "ivpu_hw_40xx_reg.h" -#include "ivpu_hw_reg_io.h" -#include "ivpu_ipc.h" -#include "ivpu_mmu.h" -#include "ivpu_pm.h" - -#include <linux/dmi.h> - -#define TILE_MAX_NUM 6 -#define TILE_MAX_MASK 0x3f - -#define LNL_HW_ID 0x4040 - -#define SKU_TILE_SHIFT 0u -#define SKU_TILE_MASK 0x0000ffffu -#define SKU_HW_ID_SHIFT 16u -#define SKU_HW_ID_MASK 0xffff0000u - -#define PLL_CONFIG_DEFAULT 0x0 -#define PLL_CDYN_DEFAULT 0x80 -#define PLL_EPP_DEFAULT 0x80 -#define PLL_REF_CLK_FREQ (50 * 1000000) -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) - -#define PLL_PROFILING_FREQ_DEFAULT 38400000 -#define PLL_PROFILING_FREQ_HIGH 400000000 - -#define TIM_SAFE_ENABLE 0xf1d0dead -#define TIM_WATCHDOG_RESET_VALUE 0xffffffff - -#define TIMEOUT_US (150 * USEC_PER_MSEC) -#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) -#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) - -#define WEIGHTS_DEFAULT 0xf711f711u -#define WEIGHTS_ATS_DEFAULT 0x0000f711u - -#define ICB_0_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) - -#define ICB_1_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) - -#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) - -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR))) - -#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) -#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) - -#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) - -static char *ivpu_platform_to_str(u32 platform) -{ - switch (platform) { - case IVPU_PLATFORM_SILICON: - return "IVPU_PLATFORM_SILICON"; - case IVPU_PLATFORM_SIMICS: - return "IVPU_PLATFORM_SIMICS"; - case IVPU_PLATFORM_FPGA: - return "IVPU_PLATFORM_FPGA"; - default: - return "Invalid platform"; - } -} - -static const struct dmi_system_id ivpu_dmi_platform_simulation[] = { - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), - DMI_MATCH(DMI_BOARD_SERIAL, "123456789"), - }, - }, - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "Simics"), - }, - }, - { } -}; - -static void ivpu_hw_read_platform(struct ivpu_device *vdev) -{ - if (dmi_check_system(ivpu_dmi_platform_simulation)) - vdev->platform = IVPU_PLATFORM_SIMICS; - else - vdev->platform = IVPU_PLATFORM_SILICON; - - ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", - ivpu_platform_to_str(vdev->platform), vdev->platform); -} - -static void ivpu_hw_wa_init(struct ivpu_device *vdev) -{ - vdev->wa.punit_disabled = ivpu_is_fpga(vdev); - vdev->wa.clear_runtime_mem = false; - - if (ivpu_hw_gen(vdev) == IVPU_HW_40XX) - vdev->wa.disable_clock_relinquish = true; - - IVPU_PRINT_WA(punit_disabled); - IVPU_PRINT_WA(clear_runtime_mem); - IVPU_PRINT_WA(disable_clock_relinquish); -} - -static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) -{ - if (ivpu_is_fpga(vdev)) { - vdev->timeout.boot = 100000; - vdev->timeout.jsm = 50000; - vdev->timeout.tdr = 2000000; - vdev->timeout.reschedule_suspend = 1000; - vdev->timeout.autosuspend = -1; - vdev->timeout.d0i3_entry_msg = 500; - } else if (ivpu_is_simics(vdev)) { - vdev->timeout.boot = 50; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 10000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = -1; - vdev->timeout.d0i3_entry_msg = 100; - } else { - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = 10; - vdev->timeout.d0i3_entry_msg = 5; - } -} - -static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); -} - -static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, - u16 target_ratio, u16 epp, u16 config, u16 cdyn) -{ - int ret; - u32 val; - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) { - ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, epp, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CDYN, cdyn, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_CMD); - val = REG_SET_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_CMD, val); - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) - ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); - - return ret; -} - -static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); -} - -static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev) -{ - if (ivpu_is_simics(vdev)) - return 0; - - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); -} - -static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u8 fuse_min_ratio, fuse_pn_ratio, fuse_max_ratio; - u32 fmin_fuse, fmax_fuse; - - fmin_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMIN_FUSE); - fuse_min_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); - fuse_pn_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); - - fmax_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMAX_FUSE); - fuse_max_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); - - hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); - hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); - hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); -} - -static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) -{ - u16 config = enable ? PLL_CONFIG_DEFAULT : 0; - u16 cdyn = enable ? PLL_CDYN_DEFAULT : 0; - u16 epp = enable ? PLL_EPP_DEFAULT : 0; - struct ivpu_hw_info *hw = vdev->hw; - u16 target_ratio = hw->pll.pn_ratio; - int ret; - - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, epp: 0x%x, config: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(target_ratio), epp, config, cdyn); - - ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, - target_ratio, epp, config, cdyn); - if (ret) { - ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); - return ret; - } - - if (enable) { - ret = ivpu_pll_wait_for_status_ready(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); - return ret; - } - } - - return 0; -} - -static int ivpu_pll_enable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, true); -} - -static int ivpu_pll_disable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, false); -} - -static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN); - - if (enable) { - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); - } - - REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val); -} - -static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN); - - if (enable) { - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); - } - - REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val); -} - -static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static void ivpu_boot_idle_gen_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val); -} - -static int ivpu_boot_host_ss_check(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_noc_qreqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qreqn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qacceptn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val); - - ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - return ret; - } - - if (enable) { - REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT); - REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT); - } - - return ret; -} - -static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, true); -} - -static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); - if (enable) { - val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } else { - val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } - REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val); - - ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, true); -} - -static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); - - if (enable) - ndelay(500); -} - -static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); - - if (!enable) - ndelay(500); -} - -static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) -{ - if (ivpu_is_fpga(vdev)) - return 0; - - return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, - exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); -} - -static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val); -} - -static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); - - val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); - val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); - - REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); -} - -static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV); - - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - - REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val); -} - -static int ivpu_boot_cpu_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_cpu_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_wait_for_clock_own_resource_ack(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n"); - return ret; - } - - ivpu_boot_pwr_island_trickle_drive(vdev, true); - ivpu_boot_pwr_island_drive(vdev, true); - - ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); - if (ret) { - ivpu_err(vdev, "Timed out waiting for power island status\n"); - return ret; - } - - ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qrenqn check %d\n", ret); - return ret; - } - - ivpu_boot_host_ss_clk_drive(vdev, true); - ivpu_boot_host_ss_rst_drive(vdev, true); - ivpu_boot_pwr_island_isolation_drive(vdev, false); - - return ret; -} - -static int ivpu_boot_soc_cpu_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); - else - val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); - REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val); - - ret = ivpu_boot_cpu_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_cpu_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_soc_cpu_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_soc_cpu_drive(vdev, true); -} - -static int ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) -{ - int ret; - u32 val; - u64 val64; - - ret = ivpu_boot_soc_cpu_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); - return ret; - } - - val64 = vdev->fw->entry_point; - val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; - REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); - - val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); - val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); - REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); - - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); - - return 0; -} - -static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_D0I3_CONTROL); - if (enable) - val = REG_SET_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val); - else - val = REG_CLR_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val); - REGB_WR32(VPU_40XX_BUTTRESS_D0I3_CONTROL, val); - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); - return ret; - } - - return 0; -} - -static bool ivpu_tile_disable_check(u32 config) -{ - /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */ - if (config == 0) - return true; - - if (config > BIT(TILE_MAX_NUM - 1)) - return false; - - if ((config & (config - 1)) == 0) - return true; - - return false; -} - -static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u32 tile_disable; - u32 fuse; - - fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE); - if (!REG_TEST_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, VALID, fuse)) { - ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse); - return -EIO; - } - - tile_disable = REG_GET_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, CONFIG, fuse); - if (!ivpu_tile_disable_check(tile_disable)) { - ivpu_err(vdev, "Fuse: Invalid tile disable config (0x%x)\n", tile_disable); - return -EIO; - } - - if (tile_disable) - ivpu_dbg(vdev, MISC, "Fuse: %d tiles enabled. Tile number %d disabled\n", - TILE_MAX_NUM - 1, ffs(tile_disable) - 1); - else - ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM); - - hw->tile_fuse = tile_disable; - hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; - - ivpu_pll_init_frequency_ratios(vdev); - - ivpu_hw_init_range(&vdev->hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_init_range(&vdev->hw->ranges.user, 0x80000000, SZ_256M); - ivpu_hw_init_range(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M); - ivpu_hw_init_range(&vdev->hw->ranges.dma, 0x200000000, SZ_8G); - - ivpu_hw_read_platform(vdev); - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - - return 0; -} - -static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Wait for *_TRIGGER timed out\n"); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_IP_RESET); - val = REG_SET_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_40XX_BUTTRESS_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); - - return ret; -} - -static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) -{ - int ret = 0; - - if (ivpu_hw_40xx_ip_reset(vdev)) { - ivpu_err(vdev, "Failed to reset VPU IP\n"); - ret = -EIO; - } - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - return ret; -} - -static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev) -{ - int ret; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = ivpu_boot_d0i3_drive(vdev, true); - if (ret) - ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); - - udelay(5); /* VPU requires 5 us to complete the transition */ - - return ret; -} - -static int ivpu_hw_40xx_d0i3_disable(struct ivpu_device *vdev) -{ - int ret; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = ivpu_boot_d0i3_drive(vdev, false); - if (ret) - ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); - - return ret; -} - -static void ivpu_hw_40xx_profiling_freq_reg_set(struct ivpu_device *vdev) -{ - u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - - if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) - val = REG_CLR_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val); - else - val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val); - - REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val); -} - -static void ivpu_hw_40xx_ats_print(struct ivpu_device *vdev) -{ - ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n", - REGB_RD32(VPU_40XX_BUTTRESS_HM_ATS) ? "Enable" : "Disable"); -} - -static void ivpu_hw_40xx_clock_relinquish_disable(struct ivpu_device *vdev) -{ - u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - - val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, DISABLE_CLK_RELINQUISH, val); - REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val); -} - -static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_hw_40xx_d0i3_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); - - ret = ivpu_pll_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); - return ret; - } - - if (IVPU_WA(disable_clock_relinquish)) - ivpu_hw_40xx_clock_relinquish_disable(vdev); - ivpu_hw_40xx_profiling_freq_reg_set(vdev); - ivpu_hw_40xx_ats_print(vdev); - - ret = ivpu_boot_host_ss_check(vdev); - if (ret) { - ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); - return ret; - } - - ivpu_boot_idle_gen_drive(vdev, false); - - ret = ivpu_boot_pwr_domain_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_axi_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_top_noc_enable(vdev); - if (ret) - ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); - - return ret; -} - -static int ivpu_hw_40xx_boot_fw(struct ivpu_device *vdev) -{ - int ret; - - ivpu_boot_no_snoop_enable(vdev); - ivpu_boot_tbu_mmu_enable(vdev); - - ret = ivpu_boot_soc_cpu_boot(vdev); - if (ret) - ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret); - - return ret; -} - -static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev) -{ - u32 val; - - if (IVPU_WA(punit_disabled)) - return true; - - val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - return REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, val) && - REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val); -} - -static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); -} - -static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) -{ - vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); - vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); -} - -static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev) -{ - int ret = 0; - - ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev); - - if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev)) - ivpu_warn(vdev, "Failed to reset the VPU\n"); - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - if (ivpu_hw_40xx_d0i3_enable(vdev)) { - ivpu_err(vdev, "Failed to enter D0I3\n"); - ret = -EIO; - } - - return ret; -} - -static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev) -{ - u32 val; - - REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); - - REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0); - - val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); -} - -static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev) -{ - return vdev->hw->pll.profiling_freq; -} - -static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - if (enable) - vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; - else - vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; -} - -/* Register indirect accesses */ -static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_40XX_BUTTRESS_PLL_FREQ); - pll_curr_ratio &= VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET); -} - -static u32 ivpu_hw_40xx_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE); -} - -static u32 ivpu_hw_40xx_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE); -} - -static void ivpu_hw_40xx_reg_db_set(struct ivpu_device *vdev, u32 db_id) -{ - u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET); - - REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); -} - -static u32 ivpu_hw_40xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev) -{ - return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM); -} - -static u32 ivpu_hw_40xx_reg_ipc_rx_count_get(struct ivpu_device *vdev) -{ - u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); - - return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); -} - -static void ivpu_hw_40xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) -{ - REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); -} - -static void ivpu_hw_40xx_irq_clear(struct ivpu_device *vdev) -{ - REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); -} - -static void ivpu_hw_40xx_irq_enable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); - REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); - REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); -} - -static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev) -{ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); - REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull); - REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul); -} - -static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev) -{ - /* TODO: For LNN hang consider engine reset instead of full recovery */ - ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); -} - -static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev) -{ - ivpu_hw_wdt_disable(vdev); - ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); -} - -static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); -} - -/* Handler for IRQs from VPU core (irqV) */ -static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) -{ - u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - - if (!status) - return false; - - REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) - ivpu_mmu_irq_evtq_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev, wake_thread); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) - ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) - ivpu_mmu_irq_gerr_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) - ivpu_hw_40xx_irq_wdt_mss_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) - ivpu_hw_40xx_irq_wdt_nce_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) - ivpu_hw_40xx_irq_noc_firewall_handler(vdev); - - return true; -} - -/* Handler for IRQs from Buttress core (irqB) */ -static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) -{ - bool schedule_recovery = false; - u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (!status) - return false; - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { - ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1), - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2)); - REGB_WR32(VPU_40XX_BUTTRESS_ATS_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, status)) { - ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG)); - REGB_WR32(VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, status)) { - ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG)); - REGB_WR32(VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, status)) { - ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH)); - REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, status)) { - ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH)); - REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, status)) { - ivpu_err(vdev, "Survivability error detected\n"); - schedule_recovery = true; - } - - /* This must be done after interrupts are cleared at the source. */ - REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); - - if (schedule_recovery) - ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); - - return true; -} - -static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr) -{ - bool irqv_handled, irqb_handled, wake_thread = false; - struct ivpu_device *vdev = ptr; - - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - - irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread); - irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq); - - /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - - if (wake_thread) - return IRQ_WAKE_THREAD; - if (irqv_handled || irqb_handled) - return IRQ_HANDLED; - return IRQ_NONE; -} - -static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev) -{ - u32 irqv = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - u32 irqb = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (ivpu_hw_40xx_reg_ipc_rx_count_get(vdev)) - ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) - ivpu_err(vdev, "WDT MSS timeout detected\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) - ivpu_err(vdev, "WDT NCE timeout detected\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) - ivpu_err(vdev, "NOC Firewall irq detected\n"); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) { - ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1), - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2)); - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, irqb)) - ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, irqb)) - ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, irqb)) - ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, irqb)) - ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, irqb)) - ivpu_err(vdev, "Survivability error detected\n"); -} - -const struct ivpu_hw_ops ivpu_hw_40xx_ops = { - .info_init = ivpu_hw_40xx_info_init, - .power_up = ivpu_hw_40xx_power_up, - .is_idle = ivpu_hw_40xx_is_idle, - .wait_for_idle = ivpu_hw_40xx_wait_for_idle, - .power_down = ivpu_hw_40xx_power_down, - .reset = ivpu_hw_40xx_reset, - .boot_fw = ivpu_hw_40xx_boot_fw, - .wdt_disable = ivpu_hw_40xx_wdt_disable, - .diagnose_failure = ivpu_hw_40xx_diagnose_failure, - .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get, - .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive, - .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get, - .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get, - .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get, - .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get, - .reg_db_set = ivpu_hw_40xx_reg_db_set, - .reg_ipc_rx_addr_get = ivpu_hw_40xx_reg_ipc_rx_addr_get, - .reg_ipc_rx_count_get = ivpu_hw_40xx_reg_ipc_rx_count_get, - .reg_ipc_tx_set = ivpu_hw_40xx_reg_ipc_tx_set, - .irq_clear = ivpu_hw_40xx_irq_clear, - .irq_enable = ivpu_hw_40xx_irq_enable, - .irq_disable = ivpu_hw_40xx_irq_disable, - .irq_handler = ivpu_hw_40xx_irq_handler, -}; diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index ff4a5d4f5821..fc0ee8d637f9 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -8,91 +8,6 @@ #include <linux/bits.h> -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT 0x00000000u -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6) - -#define VPU_40XX_BUTTRESS_LOCAL_INT_MASK 0x00000004u -#define VPU_40XX_BUTTRESS_GLOBAL_INT_MASK 0x00000008u - -#define VPU_40XX_BUTTRESS_HM_ATS 0x0000000cu - -#define VPU_40XX_BUTTRESS_ATS_ERR_LOG1 0x00000010u -#define VPU_40XX_BUTTRESS_ATS_ERR_LOG2 0x00000014u -#define VPU_40XX_BUTTRESS_ATS_ERR_CLEAR 0x00000018u - -#define VPU_40XX_BUTTRESS_CFI0_ERR_LOG 0x0000001cu -#define VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR 0x00000020u - -#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u - -#define VPU_40XX_BUTTRESS_CFI1_ERR_LOG 0x00000040u -#define VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR 0x00000044u - -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW 0x00000048u -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH 0x0000004cu -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR 0x00000050u - -#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS 0x00000054u - -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW 0x00000058u -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH 0x0000005cu -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR 0x00000060u - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000130u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1 0x00000134u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000138u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_CMD 0x0000013cu -#define VPU_40XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) - -#define VPU_40XX_BUTTRESS_PLL_FREQ 0x00000148u -#define VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK GENMASK(15, 0) - -#define VPU_40XX_BUTTRESS_TILE_FUSE 0x00000150u -#define VPU_40XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_TILE_FUSE_CONFIG_MASK GENMASK(6, 1) - -#define VPU_40XX_BUTTRESS_VPU_STATUS 0x00000154u -#define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) -#define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) -#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) -#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) -#define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) -#define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) - -#define VPU_40XX_BUTTRESS_IP_RESET 0x00000160u -#define VPU_40XX_BUTTRESS_IP_RESET_TRIGGER_MASK BIT_MASK(0) - -#define VPU_40XX_BUTTRESS_D0I3_CONTROL 0x00000164u -#define VPU_40XX_BUTTRESS_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_D0I3_CONTROL_I3_MASK BIT_MASK(2) - -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000168u -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x0000016cu -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000170u - -#define VPU_40XX_BUTTRESS_FMIN_FUSE 0x00000174u -#define VPU_40XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define VPU_40XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) - -#define VPU_40XX_BUTTRESS_FMAX_FUSE 0x00000178u -#define VPU_40XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) - #define VPU_40XX_HOST_SS_CPR_CLK_EN 0x00000080u #define VPU_40XX_HOST_SS_CPR_CLK_EN_TOP_NOC_MASK BIT_MASK(1) #define VPU_40XX_HOST_SS_CPR_CLK_EN_DSS_MAS_MASK BIT_MASK(10) @@ -198,6 +113,14 @@ #define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu #define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0_CSS_CPU_MASK BIT_MASK(3) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16) + +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) + #define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u #define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1) @@ -205,6 +128,9 @@ #define VPU_40XX_HOST_SS_AON_DPU_ACTIVE 0x00030204u #define VPU_40XX_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE 0x00030210u +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE_REQ_OVERRIDE_MASK BIT_MASK(0) + #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO 0x00040040u #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_DONE_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c new file mode 100644 index 000000000000..3212c99f3682 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -0,0 +1,890 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_btrs_lnl_reg.h" +#include "ivpu_hw_btrs_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_pm.h" + +#define BTRS_MTL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR))) + +#define BTRS_LNL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR))) + +#define BTRS_MTL_ALL_IRQ_MASK (BTRS_MTL_IRQ_MASK | (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, \ + FREQ_CHANGE))) + +#define BTRS_IRQ_DISABLE_MASK ((u32)-1) + +#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) + +#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) +#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) +#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) +#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) +#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) + +#define PLL_CDYN_DEFAULT 0x80 +#define PLL_EPP_DEFAULT 0x80 +#define PLL_CONFIG_DEFAULT 0x0 +#define PLL_SIMULATION_FREQ 10000000 +#define PLL_REF_CLK_FREQ 50000000 +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) +#define TIMEOUT_US (150 * USEC_PER_MSEC) + +/* Work point configuration values */ +#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) +#define MTL_CONFIG_1_TILE 0x01 +#define MTL_CONFIG_2_TILE 0x02 +#define MTL_PLL_RATIO_5_3 0x01 +#define MTL_PLL_RATIO_4_3 0x02 +#define BTRS_MTL_TILE_FUSE_ENABLE_BOTH 0x0 +#define BTRS_MTL_TILE_SKU_BOTH 0x3630 + +#define BTRS_LNL_TILE_MAX_NUM 6 +#define BTRS_LNL_TILE_MAX_MASK 0x3f + +#define WEIGHTS_DEFAULT 0xf711f711u +#define WEIGHTS_ATS_DEFAULT 0x0000f711u + +#define DCT_REQ 0x2 +#define DCT_ENABLE 0x1 +#define DCT_DISABLE 0x0 + +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); + if (REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) == BTRS_MTL_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + return true; + } + + return false; +} + +static void freq_ratios_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +static void freq_ratios_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + freq_ratios_init_mtl(vdev); + else + freq_ratios_init_lnl(vdev); + + hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.pn_ratio = clamp_t(u8, hw->pll.pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); +} + +static bool tile_disable_check(u32 config) +{ + /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */ + if (config == 0) + return true; + + if (config > BIT(BTRS_LNL_TILE_MAX_NUM - 1)) + return false; + + if ((config & (config - 1)) == 0) + return true; + + return false; +} + +static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config) +{ + u32 fuse; + u32 config; + + fuse = REGB_RD32(VPU_HW_BTRS_LNL_TILE_FUSE); + if (!REG_TEST_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, VALID, fuse)) { + ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse); + return -EIO; + } + + config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse); + if (!tile_disable_check(config)) + ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config); + + ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config); + + *tile_fuse_config = config; + return 0; +} + +static int info_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; + hw->sku = BTRS_MTL_TILE_SKU_BOTH; + hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + + return 0; +} + +static int info_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 tile_fuse_config; + int ret; + + ret = read_tile_config_fuse(vdev, &tile_fuse_config); + if (ret) + return ret; + + hw->tile_fuse = tile_fuse_config; + hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + + return 0; +} + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return info_init_mtl(vdev); + else + return info_init_lnl(vdev); +} + +static int wp_request_sync(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +static int wait_for_status_ready(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); +} + +struct wp_request { + u16 min; + u16 max; + u16 target; + u16 cfg; + u16 epp; + u16 cdyn; +}; + +static void wp_request_mtl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, EPP, PLL_EPP_DEFAULT, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_CMD, val); +} + +static void wp_request_lnl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, EPP, wp->epp, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CDYN, wp->cdyn, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_CMD, val); +} + +static void wp_request(struct ivpu_device *vdev, struct wp_request *wp) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + wp_request_mtl(vdev, wp); + else + wp_request_lnl(vdev, wp); +} + +static int wp_request_send(struct ivpu_device *vdev, struct wp_request *wp) +{ + int ret; + + ret = wp_request_sync(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before workpoint request: %d\n", ret); + return ret; + } + + wp_request(vdev, wp); + + ret = wp_request_sync(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after workpoint request: %d\n", ret); + + return ret; +} + +static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, bool enable) +{ + struct ivpu_hw_info *hw = vdev->hw; + + wp->min = hw->pll.min_ratio; + wp->max = hw->pll.max_ratio; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + wp->target = enable ? hw->pll.pn_ratio : 0; + wp->cfg = enable ? hw->config : 0; + wp->cdyn = 0; + wp->epp = 0; + } else { + wp->target = hw->pll.pn_ratio; + wp->cfg = enable ? PLL_CONFIG_DEFAULT : 0; + wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0; + wp->epp = enable ? PLL_EPP_DEFAULT : 0; + } +} + +static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (ivpu_hw_btrs_gen(vdev) != IVPU_HW_BTRS_MTL) + return 0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) +{ + struct wp_request wp; + int ret; + + if (IVPU_WA(punit_disabled)) { + ivpu_dbg(vdev, PM, "Skipping workpoint request\n"); + return 0; + } + + prepare_wp_request(vdev, &wp, enable); + + ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + + ret = wp_request_send(vdev, &wp); + if (ret) { + ivpu_err(vdev, "Failed to send workpoint request: %d\n", ret); + return ret; + } + + ret = wait_for_pll_lock(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL lock\n"); + return ret; + } + + ret = wait_for_status_ready(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU ready status\n"); + return ret; + } + + return 0; +} + +static int d0i3_drive_mtl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + + return ret; +} + +static int d0i3_drive_lnl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_LNL_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + return ret; + } + + return 0; +} + +static int d0i3_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return d0i3_drive_mtl(vdev, enable); + else + return d0i3_drive_lnl(vdev, enable); +} + +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, true); + if (ret) + ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); + + udelay(5); /* VPU requires 5 us to complete the transition */ + + return ret; +} + +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, false); + if (ret) + ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); + + return ret; +} + +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); +} + +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT); + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT); +} + +static int ip_reset_mtl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +static int ip_reset_lnl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Wait for *_TRIGGER timed out\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_LNL_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return ip_reset_mtl(vdev); + else + return ip_reset_lnl(vdev); +} + +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + else + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n", + REGB_RD32(VPU_HW_BTRS_LNL_HM_ATS) ? "Enable" : "Disable"); +} + +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, DISABLE_CLK_RELINQUISH, val); + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev) +{ + u32 val; + + if (IVPU_WA(punit_disabled)) + return true; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, val); + } else { + val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, val); + } +} + +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", + REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + REGB_WR32(VPU_HW_BTRS_MTL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, status)) { + u32 ufi_log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, ufi_log)); + REGB_WR32(VPU_HW_BTRS_MTL_UFI_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + if (IVPU_WA(interrupt_clear_with_0)) + /* + * Writing 1 triggers an interrupt, so we can't perform read update write. + * Clear local interrupt status by writing 0 to all bits. + */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + else + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_LNL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) { + ivpu_dbg(vdev, IRQ, "Survivability IRQ\n"); + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_DCT)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + REGB_WR32(VPU_HW_BTRS_LNL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, status)) { + ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, status)) { + ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + REGB_WR32(VPU_HW_BTRS_LNL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW); + u32 cmd = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, CMD, val); + u32 param1 = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, PARAM1, val); + + if (cmd != DCT_REQ) { + ivpu_err_ratelimited(vdev, "Unsupported PCODE command: 0x%x\n", cmd); + return -EBADR; + } + + switch (param1) { + case DCT_ENABLE: + *enable = true; + return 0; + case DCT_DISABLE: + *enable = false; + return 0; + default: + ivpu_err_ratelimited(vdev, "Invalid PARAM1 value: %u\n", param1); + return -EINVAL; + } +} + +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 active_percent) +{ + u32 val = 0; + u32 cmd = enable ? DCT_ENABLE : DCT_DISABLE; + + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, CMD, DCT_REQ, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM1, cmd, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); + + REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); +} + +static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) +{ + u32 pll_clock = PLL_REF_CLK_FREQ * ratio; + u32 cpu_clock; + + if ((config & 0xff) == MTL_PLL_RATIO_4_3) + cpu_clock = pll_clock * 2 / 4; + else + cpu_clock = pll_clock * 2 / 5; + + return cpu_clock; +} + +u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +{ + struct ivpu_hw_info *hw = vdev->hw; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_freq_mtl(ratio, hw->config); + else + return PLL_RATIO_TO_FREQ(ratio); +} + +static u32 pll_freq_get_mtl(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); + pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; + + if (!ivpu_is_silicon(vdev)) + return PLL_SIMULATION_FREQ; + + return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); +} + +static u32 pll_freq_get_lnl(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); + pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; + + return PLL_RATIO_TO_FREQ(pll_curr_ratio); +} + +u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_freq_get_mtl(vdev); + else + return pll_freq_get_lnl(vdev); +} + +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET); +} + +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE); +} + +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE); +} + +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); +} + +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); +} + +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, (u32)(~BTRS_MTL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, (u32)(~BTRS_LNL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); + } +} + +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } +} + +static void diagnose_failure_mtl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, reg)) + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, reg)) { + u32 log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, log)); + } +} + +static void diagnose_failure_lnl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, reg)) { + ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, reg)) + ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, reg)) + ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, reg)) + ivpu_err(vdev, "Survivability IRQ\n"); +} + +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return diagnose_failure_mtl(vdev); + else + return diagnose_failure_lnl(vdev); +} diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h new file mode 100644 index 000000000000..04f14f50fed6 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_H__ +#define __IVPU_HW_BTRS_H__ + +#include "ivpu_drv.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_reg_io.h" + +#define PLL_PROFILING_FREQ_DEFAULT 38400000 +#define PLL_PROFILING_FREQ_HIGH 400000000 +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + +#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_PERIOD_US 35300u + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev); +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev); +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable); +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev); +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); +u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_BTRS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h new file mode 100644 index 000000000000..fc51f3098f97 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_LNL_REG_H__ +#define __IVPU_HW_BTRS_LNL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT 0x00000000u +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6) + +#define VPU_HW_BTRS_LNL_LOCAL_INT_MASK 0x00000004u +#define VPU_HW_BTRS_LNL_GLOBAL_INT_MASK 0x00000008u + +#define VPU_HW_BTRS_LNL_HM_ATS 0x0000000cu + +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG1 0x00000010u +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG2 0x00000014u +#define VPU_HW_BTRS_LNL_ATS_ERR_CLEAR 0x00000018u + +#define VPU_HW_BTRS_LNL_CFI0_ERR_LOG 0x0000001cu +#define VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR 0x00000020u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u + +#define VPU_HW_BTRS_LNL_CFI1_ERR_LOG 0x00000040u +#define VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR 0x00000044u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW 0x00000048u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH 0x0000004cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR 0x00000050u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS 0x00000054u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW 0x00000058u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH 0x0000005cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR 0x00000060u + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS 0x00000070u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW 0x00000074u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0 0x00000130u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1 0x00000134u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2 0x00000138u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_CMD 0x0000013cu +#define VPU_HW_BTRS_LNL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_PLL_FREQ 0x00000148u +#define VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_LNL_TILE_FUSE 0x00000150u +#define VPU_HW_BTRS_LNL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_TILE_FUSE_CONFIG_MASK GENMASK(6, 1) + +#define VPU_HW_BTRS_LNL_VPU_STATUS 0x00000154u +#define VPU_HW_BTRS_LNL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_VPU_STATUS_IDLE_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) +#define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) +#define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) + +#define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u +#define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_D0I3_CONTROL 0x00000164u +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET 0x00000168u +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE 0x0000016cu +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE 0x00000170u + +#define VPU_HW_BTRS_LNL_FMIN_FUSE 0x00000174u +#define VPU_HW_BTRS_LNL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_LNL_FMAX_FUSE 0x00000178u +#define VPU_HW_BTRS_LNL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#endif /* __IVPU_HW_BTRS_LNL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h new file mode 100644 index 000000000000..e93d539e066f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_MTL_REG_H__ +#define __IVPU_HW_BTRS_MTL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_MTL_INTERRUPT_TYPE 0x00000000u + +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT 0x00000004u +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0 0x00000008u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1 0x0000000cu +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2 0x00000010u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_WP_REQ_CMD 0x00000014u +#define VPU_HW_BTRS_MTL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD 0x00000018u +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_CURRENT_PLL 0x0000001cu +#define VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_PLL_ENABLE 0x00000020u + +#define VPU_HW_BTRS_MTL_FMIN_FUSE 0x00000024u +#define VPU_HW_BTRS_MTL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_MTL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_MTL_FMAX_FUSE 0x00000028u +#define VPU_HW_BTRS_MTL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#define VPU_HW_BTRS_MTL_TILE_FUSE 0x0000002cu +#define VPU_HW_BTRS_MTL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_TILE_FUSE_SKU_MASK GENMASK(3, 2) + +#define VPU_HW_BTRS_MTL_LOCAL_INT_MASK 0x00000030u +#define VPU_HW_BTRS_MTL_GLOBAL_INT_MASK 0x00000034u + +#define VPU_HW_BTRS_MTL_PLL_STATUS 0x00000040u +#define VPU_HW_BTRS_MTL_PLL_STATUS_LOCK_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_STATUS 0x00000044u +#define VPU_HW_BTRS_MTL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_STATUS_IDLE_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL 0x00000060u +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_VPU_IP_RESET 0x00000050u +#define VPU_HW_BTRS_MTL_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET 0x00000080u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE 0x00000084u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE 0x00000088u + +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_0 0x000000a0u +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_1 0x000000a4u +#define VPU_HW_BTRS_MTL_ATS_ERR_CLEAR 0x000000a8u + +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG 0x000000b0u +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) + +#define VPU_HW_BTRS_MTL_UFI_ERR_CLEAR 0x000000b4u + +#endif /* __IVPU_HW_BTRS_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c new file mode 100644 index 000000000000..029dd065614b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -0,0 +1,1188 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define ICB_0_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_37XX ((((u64)ICB_1_IRQ_MASK_37XX) << 32) | ICB_0_IRQ_MASK_37XX) + +#define ICB_0_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_40XX ((((u64)ICB_1_IRQ_MASK_40XX) << 32) | ICB_0_IRQ_MASK_40XX) + +#define ITF_FIREWALL_VIOLATION_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +#define ITF_FIREWALL_VIOLATION_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static int wait_for_ip_bar(struct ivpu_device *vdev) +{ + return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100); +} + +static void host_ss_rst_clr(struct ivpu_device *vdev) +{ + u32 val = 0; + + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val); + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val); +} + +static int host_ss_noc_qreqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qreqn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qreqn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qacceptn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qdeny_check_37xx(vdev, exp_val); + else + return host_ss_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_qrenqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qrenqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qrenqn_check_37xx(vdev, exp_val); + else + return top_noc_qrenqn_check_40xx(vdev, exp_val); +} + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev) +{ + int ret; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ret = wait_for_ip_bar(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU IP bar\n"); + return ret; + } + host_ss_rst_clr(vdev); + } + + ret = host_ss_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qreqn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qacceptn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check %d\n", ret); + + return ret; +} + +static void idle_gen_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, val); +} + +static void idle_gen_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val); +} + +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, true); + else + idle_gen_drive_40xx(vdev, true); +} + +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, false); + else + idle_gen_drive_40xx(vdev, false); +} + +static void +pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status) +{ + u32 val; + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val); + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, STATUS_DLY, status, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, val); +} + +static void pwr_island_trickle_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + pwr_island_trickle_drive_37xx(vdev, true); + ndelay(500); + pwr_island_drive_37xx(vdev, true); + } else { + pwr_island_trickle_drive_40xx(vdev, true); + ndelay(500); + pwr_island_drive_40xx(vdev, true); + } +} + +static int wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); + else + return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); +} + +static void pwr_island_isolation_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + pwr_island_isolation_drive_37xx(vdev, enable); + else + pwr_island_isolation_drive_40xx(vdev, enable); +} + +static void pwr_island_isolation_disable(struct ivpu_device *vdev) +{ + pwr_island_isolation_drive(vdev, false); +} + +static void host_ss_clk_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val); +} + +static void host_ss_clk_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val); +} + +static void host_ss_clk_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_clk_drive_37xx(vdev, enable); + else + host_ss_clk_drive_40xx(vdev, enable); +} + +static void host_ss_clk_enable(struct ivpu_device *vdev) +{ + host_ss_clk_drive(vdev, true); +} + +static void host_ss_rst_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val); +} + +static void host_ss_rst_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val); +} + +static void host_ss_rst_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_rst_drive_37xx(vdev, enable); + else + host_ss_rst_drive_40xx(vdev, enable); +} + +static void host_ss_rst_enable(struct ivpu_device *vdev) +{ + host_ss_rst_drive(vdev, true); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_noc_qreqn_top_socmmio_drive_37xx(vdev, enable); + else + host_ss_noc_qreqn_top_socmmio_drive_40xx(vdev, enable); +} + +static int host_ss_axi_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + host_ss_noc_qreqn_top_socmmio_drive(vdev, enable); + + ret = host_ss_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed HOST SS NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed HOST SS NOC QDENY check: %d\n", ret); + + return ret; +} + +static void top_noc_qreqn_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + top_noc_qreqn_drive_37xx(vdev, enable); + else + top_noc_qreqn_drive_40xx(vdev, enable); +} + +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev) +{ + return host_ss_axi_drive(vdev, true); +} + +static int top_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qacceptn_check_37xx(vdev, exp_val); + else + return top_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int top_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qdeny_check_37xx(vdev, exp_val); + else + return top_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + top_noc_qreqn_drive(vdev, enable); + + ret = top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = top_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed TOP NOC QDENY check: %d\n", ret); + + return ret; +} + +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev) +{ + return top_noc_drive(vdev, true); +} + +static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); +} + +static void pwr_island_delay_set(struct ivpu_device *vdev) +{ + bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH; + u32 post, post1, post2, status; + + if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX) + return; + + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_PTL_P: + post = high ? 18 : 0; + post1 = 0; + post2 = 0; + status = high ? 46 : 3; + break; + + default: + dump_stack(); + ivpu_err(vdev, "Unknown device ID\n"); + return; + } + + pwr_island_delay_set_50xx(vdev, post, post1, post2, status); +} + +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev) +{ + int ret; + + pwr_island_delay_set(vdev); + pwr_island_enable(vdev); + + ret = wait_for_pwr_island_status(vdev, 0x1); + if (ret) { + ivpu_err(vdev, "Timed out waiting for power island status\n"); + return ret; + } + + ret = top_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QREQN check %d\n", ret); + return ret; + } + + host_ss_clk_enable(vdev); + pwr_island_isolation_disable(vdev); + host_ss_rst_enable(vdev); + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + dpu_active_drive_37xx(vdev, true); + + return ret; +} + +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); + else + return REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); +} + +static void ivpu_hw_ip_snoop_disable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + else + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + + REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +static void ivpu_hw_ip_snoop_disable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + + REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_snoop_disable_37xx(vdev); + else + return ivpu_hw_ip_snoop_disable_40xx(vdev); +} + +static void ivpu_hw_ip_tbu_mmu_enable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val); +} + +static void ivpu_hw_ip_tbu_mmu_enable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val); +} + +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_tbu_mmu_enable_37xx(vdev); + else + return ivpu_hw_ip_tbu_mmu_enable_40xx(vdev); +} + +static int soc_cpu_boot_37xx(struct ivpu_device *vdev) +{ + u32 val; + + val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = vdev->fw->entry_point >> 9; + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); + + return 0; +} + +static int cpu_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int cpu_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static void cpu_noc_top_mmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val); +} + +static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + int ret; + + cpu_noc_top_mmio_drive_40xx(vdev, enable); + + ret = cpu_noc_qacceptn_check_40xx(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = cpu_noc_qdeny_check_40xx(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int soc_cpu_enable(struct ivpu_device *vdev) +{ + return soc_cpu_drive_40xx(vdev, true); +} + +static int soc_cpu_boot_40xx(struct ivpu_device *vdev) +{ + int ret; + u32 val; + u64 val64; + + ret = soc_cpu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); + return ret; + } + + val64 = vdev->fw->entry_point; + val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; + REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); + + val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); + val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); + + return 0; +} + +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return soc_cpu_boot_37xx(vdev); + else + return soc_cpu_boot_40xx(vdev); +} + +static void wdt_disable_37xx(struct ivpu_device *vdev) +{ + u32 val; + + /* Enable writing and set non-zero WDT value */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + /* Enable writing and disable watchdog timer */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); + + /* Now clear the timeout interrupt */ + val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +static void wdt_disable_40xx(struct ivpu_device *vdev) +{ + u32 val; + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0); + + val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return wdt_disable_37xx(vdev); + else + return wdt_disable_40xx(vdev); +} + +static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ipc_rx_count_get_37xx(vdev); + else + return ipc_rx_count_get_40xx(vdev); +} + +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_37XX); + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_37XX); + } else { + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_40XX); + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_40XX); + } +} + +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0); + } else { + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul); + } +} + +static void diagnose_failure_37xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (ipc_rx_count_get_37xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +static void diagnose_failure_40xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (ipc_rx_count_get_40xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + diagnose_failure_37xx(vdev); + else + diagnose_failure_40xx(vdev); +} + +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_37XX); + else + REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_40XX); +} + +static void irq_wdt_nce_handler(struct ivpu_device *vdev) +{ + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); +} + +static void irq_wdt_mss_handler(struct ivpu_device *vdev) +{ + ivpu_hw_ip_wdt_disable(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); +} + +static void irq_noc_firewall_handler(struct ivpu_device *vdev) +{ + atomic_inc(&vdev->hw->firewall_irq_counter); + + ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n", + atomic_read(&vdev->hw->firewall_irq_counter)); +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (!status) + return false; + + REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (!status) + return false; + + REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +static void db_set_37xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +static void db_set_40xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + db_set_37xx(vdev, db_id); + else + db_set_40xx(vdev, db_id); +} + +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM); + else + return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM); +} + +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); + else + REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); +} diff --git a/drivers/accel/ivpu/ivpu_hw_ip.h b/drivers/accel/ivpu/ivpu_hw_ip.h new file mode 100644 index 000000000000..5b1b391aa577 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_IP_H__ +#define __IVPU_HW_IP_H__ + +#include "ivpu_drv.h" + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev); +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev); +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev); +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev); +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev); +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq); +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq); +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id); +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev); +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr); +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_enable_50xx(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_disable_50xx(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_IP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index fa66c39b57ec..01ebf88fe6ef 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/genalloc.h> @@ -15,6 +15,7 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" #define IPC_MAX_RX_MSG 128 @@ -58,8 +59,8 @@ static void ivpu_ipc_mem_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - ivpu_bo_free_internal(ipc->mem_rx); - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_rx); + ivpu_bo_free(ipc->mem_tx); } static int @@ -129,7 +130,7 @@ static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr) static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) { - ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); + ivpu_hw_ipc_tx_set(vdev, vpu_addr); } static void @@ -210,8 +211,7 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); } -static int -ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) { struct ivpu_ipc_info *ipc = vdev->ipc; int ret; @@ -228,6 +228,7 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v goto unlock; ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + trace_jsm("[tx]", req); unlock: mutex_unlock(&ipc->lock); @@ -279,12 +280,13 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { - ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } if (jsm_msg) memcpy(jsm_msg, rx_msg->jsm_msg, size); + trace_jsm("[rx]", rx_msg->jsm_msg); } ivpu_ipc_rx_msg_del(vdev, rx_msg); @@ -292,15 +294,16 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, return ret; } -static int +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms) { struct ivpu_ipc_consumer cons; int ret; + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ret = ivpu_ipc_send(vdev, &cons, req); @@ -326,19 +329,21 @@ consumer_del: return ret; } -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; int ret, hb_ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) - return ret; + goto rpm_put; hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, @@ -346,21 +351,33 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r if (hb_ret == -ETIMEDOUT) ivpu_pm_trigger_recovery(vdev, "IPC timeout"); +rpm_put: + ivpu_rpm_put(vdev); return ret; } -int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms) { + struct ivpu_ipc_consumer cons; int ret; ret = ivpu_rpm_get(vdev); if (ret < 0) return ret; - ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); + + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + msleep(timeout_ms); +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); ivpu_rpm_put(vdev); return ret; } @@ -378,7 +395,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons return false; } -void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) +void ivpu_ipc_irq_handler(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons; @@ -392,8 +409,8 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt. * Without purge IPC FIFO to 0 next IPC interrupts won't be generated. */ - while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { - vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); + while (ivpu_hw_ipc_rx_count_get(vdev)) { + vpu_addr = ivpu_hw_ipc_rx_addr_get(vdev); if (vpu_addr == REG_IO_ERROR) { ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n"); return; @@ -442,11 +459,12 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) } } - if (wake_thread) - *wake_thread = !list_empty(&ipc->cb_msg_list); + if (!list_empty(&ipc->cb_msg_list)) + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_IPC)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); } -irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) +void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg, *r; @@ -462,8 +480,6 @@ irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); ivpu_ipc_rx_msg_del(vdev, rx_msg); } - - return IRQ_HANDLED; } int ivpu_ipc_init(struct ivpu_device *vdev) @@ -471,13 +487,13 @@ int ivpu_ipc_init(struct ivpu_device *vdev) struct ivpu_ipc_info *ipc = vdev->ipc; int ret; - ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + ipc->mem_tx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!ipc->mem_tx) { ivpu_err(vdev, "Failed to allocate mem_tx\n"); return -ENOMEM; } - ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + ipc->mem_rx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!ipc->mem_rx) { ivpu_err(vdev, "Failed to allocate mem_rx\n"); ret = -ENOMEM; @@ -501,14 +517,18 @@ int ivpu_ipc_init(struct ivpu_device *vdev) spin_lock_init(&ipc->cons_lock); INIT_LIST_HEAD(&ipc->cons_list); INIT_LIST_HEAD(&ipc->cb_msg_list); - drmm_mutex_init(&vdev->drm, &ipc->lock); + ret = drmm_mutex_init(&vdev->drm, &ipc->lock); + if (ret) { + ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret); + goto err_free_rx; + } ivpu_ipc_reset(vdev); return 0; err_free_rx: - ivpu_bo_free_internal(ipc->mem_rx); + ivpu_bo_free(ipc->mem_rx); err_free_tx: - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_tx); return ret; } @@ -516,7 +536,6 @@ void ivpu_ipc_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - drm_WARN_ON(&vdev->drm, ipc->on); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 40ca3cc4e61f..b4dfb504679b 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_IPC_H__ @@ -89,22 +89,25 @@ void ivpu_ipc_enable(struct ivpu_device *vdev); void ivpu_ipc_disable(struct ivpu_device *vdev); void ivpu_ipc_reset(struct ivpu_device *vdev); -void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread); -irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel, ivpu_ipc_rx_callback_t callback); void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct vpu_jsm_msg *req); int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms); - -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index e70cfb859339..7149312f16e1 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <drm/drm_file.h> @@ -12,50 +12,106 @@ #include <uapi/drm/ivpu_accel.h> #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" #define CMD_BUF_IDX 0 -#define JOB_ID_JOB_MASK GENMASK(7, 0) -#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) { - ivpu_hw_reg_db_set(vdev, cmdq->db_id); + ivpu_hw_db_set(vdev, cmdq->db_id); } -static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE); + u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE); + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW || + ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE) + return 0; + + cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, + primary_size, DRM_IVPU_BO_WC); + if (!cmdq->primary_preempt_buf) { + ivpu_err(vdev, "Failed to create primary preemption buffer\n"); + return -ENOMEM; + } + + cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, + secondary_size, DRM_IVPU_BO_WC); + if (!cmdq->secondary_preempt_buf) { + ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); + goto err_free_primary; + } + + return 0; + +err_free_primary: + ivpu_bo_free(cmdq->primary_preempt_buf); + cmdq->primary_preempt_buf = NULL; + return -ENOMEM; +} + +static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + return; + + if (cmdq->primary_preempt_buf) + ivpu_bo_free(cmdq->primary_preempt_buf); + if (cmdq->secondary_preempt_buf) + ivpu_bo_free(cmdq->secondary_preempt_buf); +} + +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; - struct vpu_job_queue_header *jobq_header; struct ivpu_cmdq *cmdq; + int ret; cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); if (!cmdq) return NULL; - cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); - if (!cmdq->mem) - goto cmdq_free; + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); + goto err_free_cmdq; + } - cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); - cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / - sizeof(struct vpu_job_queue_entry)); + ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, + &file_priv->cmdq_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret); + goto err_erase_db_xa; + } - cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); - jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = engine; - jobq_header->head = 0; - jobq_header->tail = 0; - wmb(); /* Flush WC buffer for jobq->header */ + cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!cmdq->mem) + goto err_erase_cmdq_xa; + + ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); + if (ret) + ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n"); return cmdq; -cmdq_free: +err_erase_cmdq_xa: + xa_erase(&file_priv->cmdq_xa, cmdq->id); +err_erase_db_xa: + xa_erase(&vdev->db_xa, cmdq->db_id); +err_free_cmdq: kfree(cmdq); return NULL; } @@ -65,91 +121,173 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c if (!cmdq) return; - ivpu_bo_free_internal(cmdq->mem); + ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); + ivpu_bo_free(cmdq->mem); + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); kfree(cmdq); } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, + u8 priority) +{ + struct ivpu_device *vdev = file_priv->vdev; + int ret; + + ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, + task_pid_nr(current), engine, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + if (ret) + return ret; + + ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, + priority); + if (ret) + return ret; + + return 0; +} + +static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { struct ivpu_device *vdev = file_priv->vdev; - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + int ret; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + else + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n", + cmdq->db_id, cmdq->id, file_priv->ctx.id); + + return ret; +} + +static int +ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct vpu_job_queue_header *jobq_header; int ret; lockdep_assert_held(&file_priv->lock); - if (!cmdq) { - cmdq = ivpu_cmdq_alloc(file_priv, engine); - if (!cmdq) - return NULL; - file_priv->cmdq[engine] = cmdq; + if (cmdq->db_registered) + return 0; + + cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / + sizeof(struct vpu_job_queue_entry)); + + cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); + jobq_header = &cmdq->jobq->header; + jobq_header->engine_idx = VPU_ENGINE_COMPUTE; + jobq_header->head = 0; + jobq_header->tail = 0; + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { + ivpu_dbg(vdev, JOB, "Turbo mode enabled"); + jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; } - if (cmdq->db_registered) - return cmdq; + wmb(); /* Flush WC buffer for jobq->header */ - ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, - cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority); + if (ret) + return ret; + } + + ret = ivpu_register_db(file_priv, cmdq); if (ret) - return NULL; + return ret; cmdq->db_registered = true; - return cmdq; + return 0; } -static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_device *vdev = file_priv->vdev; + int ret; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - file_priv->cmdq[engine] = NULL; - if (cmdq->db_registered) - ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + if (!cmdq->db_registered) + return 0; - ivpu_cmdq_free(file_priv, cmdq); + cmdq->db_registered = false; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); + if (!ret) + ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id); } + + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + + return 0; } -void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority) { - int i; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + int ret; lockdep_assert_held(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_release_locked(file_priv, i); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + if (cmdq->priority == priority) + break; + + if (!cmdq) { + cmdq = ivpu_cmdq_alloc(file_priv); + if (!cmdq) + return NULL; + cmdq->priority = priority; + } + + ret = ivpu_cmdq_init(file_priv, cmdq, priority); + if (ret) + return NULL; + + return cmdq; } -/* - * Mark the doorbell as unregistered and reset job queue pointers. - * This function needs to be called when the VPU hardware is restarted - * and FW loses job queue state. The next time job queue is used it - * will be registered again. - */ -static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - cmdq->db_registered = false; - cmdq->jobq->header.head = 0; - cmdq->jobq->header.tail = 0; - wmb(); /* Flush WC buffer for jobq header */ + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { + xa_erase(&file_priv->cmdq_xa, cmdq_id); + ivpu_cmdq_fini(file_priv, cmdq); + ivpu_cmdq_free(file_priv, cmdq); } } -static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +/* + * Mark the doorbell as unregistered + * This function needs to be called when the VPU hardware is restarted + * and FW loses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) { - int i; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; mutex_lock(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_reset_locked(file_priv, i); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + cmdq->db_registered = false; mutex_unlock(&file_priv->lock); } @@ -162,10 +300,30 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) mutex_lock(&vdev->context_list_lock); xa_for_each(&vdev->context_xa, ctx_id, file_priv) - ivpu_cmdq_reset_all(file_priv); + ivpu_cmdq_reset(file_priv); mutex_unlock(&vdev->context_list_lock); +} + +static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv) +{ + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_fini(file_priv, cmdq); +} + +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) +{ + struct ivpu_device *vdev = file_priv->vdev; + + lockdep_assert_held(&file_priv->lock); + + ivpu_cmdq_fini_all(file_priv); + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) + ivpu_jsm_context_release(vdev, file_priv->ctx.id); } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) @@ -178,17 +336,31 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) /* Check if there is space left in job queue */ if (next_entry == header->head) { - ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", - job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", + job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); return -EBUSY; } - entry = &cmdq->jobq->job[tail]; + entry = &cmdq->jobq->slot[tail].job; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + if (cmdq->primary_preempt_buf) { + entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; + entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); + } + + if (cmdq->secondary_preempt_buf) { + entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; + entry->secondary_preempt_buf_size = + ivpu_bo_size(cmdq->secondary_preempt_buf); + } + } + wmb(); /* Ensure that tail is updated after filling entry */ header->tail = next_entry; wmb(); /* Flush WC buffer for jobq header */ @@ -277,6 +449,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); + trace_job("create", job); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); return job; @@ -285,11 +458,28 @@ err_free_job: return NULL; } +static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id) +{ + struct ivpu_job *job; + + xa_lock(&vdev->submitted_jobs_xa); + job = __xa_erase(&vdev->submitted_jobs_xa, job_id); + + if (xa_empty(&vdev->submitted_jobs_xa) && job) { + vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), + vdev->busy_time); + } + + xa_unlock(&vdev->submitted_jobs_xa); + + return job; +} + static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) { struct ivpu_job *job; - job = xa_erase(&vdev->submitted_jobs_xa, job_id); + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); if (!job) return -ENOENT; @@ -299,6 +489,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job->bos[CMD_BUF_IDX]->job_status = job_status; dma_fence_signal(job->done_fence); + trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); @@ -318,12 +509,12 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); } -static int ivpu_job_submit(struct ivpu_job *job) +static int ivpu_job_submit(struct ivpu_job *job, u8 priority) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; - struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; + bool is_first_job; int ret; ret = ivpu_rpm_get(vdev); @@ -332,20 +523,19 @@ static int ivpu_job_submit(struct ivpu_job *job) mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + cmdq = ivpu_cmdq_acquire(file_priv, priority); if (!cmdq) { - ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n", - file_priv->ctx.id, job->engine_idx); + ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", + file_priv->ctx.id, job->engine_idx, priority); ret = -EINVAL; goto err_unlock_file_priv; } - job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - xa_lock(&vdev->submitted_jobs_xa); - ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); - if (ret) { + is_first_job = xa_empty(&vdev->submitted_jobs_xa); + ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; @@ -363,10 +553,13 @@ static int ivpu_job_submit(struct ivpu_job *job) wmb(); /* Flush WC buffer for jobq header */ } else { ivpu_cmdq_ring_db(vdev, cmdq); + if (is_first_job) + vdev->busy_start_ts = ktime_get(); } - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, + trace_job("submit", job); + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); xa_unlock(&vdev->submitted_jobs_xa); @@ -454,6 +647,14 @@ unlock_reservations: return ret; } +static inline u8 ivpu_job_to_hws_priority(struct ivpu_file_priv *file_priv, u8 priority) +{ + if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) + return DRM_IVPU_JOB_PRIORITY_NORMAL; + + return priority - 1; +} + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -462,8 +663,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct ivpu_job *job; u32 *buf_handles; int idx, ret; + u8 priority; - if (params->engine > DRM_IVPU_ENGINE_COPY) + if (params->engine != DRM_IVPU_ENGINE_COMPUTE) return -EINVAL; if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) @@ -515,8 +717,10 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_destroy_job; } + priority = ivpu_job_to_hws_priority(file_priv, params->priority); + down_read(&vdev->pm->reset_lock); - ret = ivpu_job_submit(job); + ret = ivpu_job_submit(job, priority); up_read(&vdev->pm->reset_lock); if (ret) goto err_signal_fence; diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index ca4984071cc7..8b19e3f8b4cf 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_JOB_H__ @@ -24,10 +24,14 @@ struct ivpu_file_priv; */ struct ivpu_cmdq { struct vpu_job_queue *jobq; + struct ivpu_bo *primary_preempt_buf; + struct ivpu_bo *secondary_preempt_buf; struct ivpu_bo *mem; u32 entry_count; + u32 id; u32 db_id; bool db_registered; + u8 priority; }; /** @@ -55,6 +59,8 @@ struct ivpu_job { int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv); + void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 8cea0dd731b9..30a40be76930 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include "ivpu_drv.h" @@ -48,9 +48,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); - IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); @@ -103,14 +104,10 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_err_ratelimited(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) @@ -123,14 +120,10 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) @@ -139,7 +132,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.query_engine_hb.engine_idx = engine; @@ -162,7 +155,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_reset.engine_idx = engine; @@ -181,7 +174,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_preempt.engine_idx = engine; @@ -204,7 +197,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN); ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm); if (ret) ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); @@ -255,11 +248,16 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid) { struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE }; struct vpu_jsm_msg resp; + int ret; req.payload.ssid_release.host_ssid = host_ssid; - return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to release context: %d\n", ret); + + return ret; } int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) @@ -273,11 +271,294 @@ int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) req.payload.pwr_d0i3_enter.send_response = 1; - ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, - &resp, VPU_IPC_CHAN_GEN_CMD, - vdev->timeout.d0i3_entry_msg); + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp, + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg); if (ret) return ret; return ivpu_hw_wait_for_idle(vdev); } + +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_CREATE_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_create_cmdq.host_ssid = ctx_id; + req.payload.hws_create_cmdq.process_id = pid; + req.payload.hws_create_cmdq.engine_idx = engine; + req.payload.hws_create_cmdq.cmdq_group = cmdq_group; + req.payload.hws_create_cmdq.cmdq_id = cmdq_id; + req.payload.hws_create_cmdq.cmdq_base = cmdq_base; + req.payload.hws_create_cmdq.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to create command queue: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DESTROY_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_destroy_cmdq.host_ssid = ctx_id; + req.payload.hws_destroy_cmdq.cmdq_id = cmdq_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to destroy command queue: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_REGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.hws_register_db.db_id = db_id; + req.payload.hws_register_db.host_ssid = ctx_id; + req.payload.hws_register_db.cmdq_id = cmdq_id; + req.payload.hws_register_db.cmdq_base = cmdq_base; + req.payload.hws_register_db.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); + + return ret; +} + +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_ENGINE_RESUME }; + struct vpu_jsm_msg resp; + int ret; + + if (engine != VPU_ENGINE_COMPUTE) + return -EINVAL; + + req.payload.hws_resume_engine.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_context_sched_properties.host_ssid = ctx_id; + req.payload.hws_set_context_sched_properties.cmdq_id = cmdq_id; + req.payload.hws_set_context_sched_properties.priority_band = priority; + req.payload.hws_set_context_sched_properties.realtime_priority_level = 0; + req.payload.hws_set_context_sched_properties.in_process_priority = 0; + req.payload.hws_set_context_sched_properties.context_quantum = 20000; + req.payload.hws_set_context_sched_properties.grace_period_same_priority = 10000; + req.payload.hws_set_context_sched_properties.grace_period_lower_priority = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set context sched properties: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_scheduling_log.engine_idx = engine_idx; + req.payload.hws_set_scheduling_log.host_ssid = host_ssid; + req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va; + req.payload.hws_set_scheduling_log.notify_index = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set scheduling log: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP }; + struct vpu_jsm_msg resp; + int ret; + + /* Idle */ + req.payload.hws_priority_band_setup.grace_period[0] = 0; + req.payload.hws_priority_band_setup.process_grace_period[0] = 50000; + req.payload.hws_priority_band_setup.process_quantum[0] = 160000; + /* Normal */ + req.payload.hws_priority_band_setup.grace_period[1] = 50000; + req.payload.hws_priority_band_setup.process_grace_period[1] = 50000; + req.payload.hws_priority_band_setup.process_quantum[1] = 300000; + /* Focus */ + req.payload.hws_priority_band_setup.grace_period[2] = 50000; + req.payload.hws_priority_band_setup.process_grace_period[2] = 50000; + req.payload.hws_priority_band_setup.process_quantum[2] = 200000; + /* Realtime */ + req.payload.hws_priority_band_setup.grace_period[3] = 0; + req.payload.hws_priority_band_setup.process_grace_period[3] = 50000; + req.payload.hws_priority_band_setup.process_quantum[3] = 200000; + + req.payload.hws_priority_band_setup.normal_band_percentage = 10; + + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, + &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.sampling_rate = sampling_rate; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret); + return ret; + } + + return ret; +} + +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_update.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_update.buffer_addr = buffer_addr; + req.payload.metric_streamer_update.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret); + return ret; + } + + if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) { + ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n", + resp.payload.metric_streamer_done.bytes_written, buffer_size); + return -EOVERFLOW; + } + + *bytes_written = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret); + return ret; + } + + if (!resp.payload.metric_streamer_done.sample_size) { + ivpu_warn_ratelimited(vdev, "Invalid sample size\n"); + return -EBADMSG; + } + + if (sample_size) + *sample_size = resp.payload.metric_streamer_done.sample_size; + if (info_size) + *info_size = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_ENABLE }; + struct vpu_jsm_msg resp; + + req.payload.pwr_dct_control.dct_active_us = active_us; + req.payload.pwr_dct_control.dct_inactive_us = inactive_us; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_dct_disable(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE }; + struct vpu_jsm_msg resp; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_state_dump(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; + + return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.state_dump_msg); +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index ae75e5dbcc41..9e84d3526a14 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_JSM_MSG_H__ @@ -23,4 +23,26 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra u64 trace_hw_component_mask); int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid); int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev); +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id); +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine); +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority); +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va); +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev); +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size); +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask); +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written); +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size); +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); +int ivpu_jsm_dct_disable(struct ivpu_device *vdev); +int ivpu_jsm_state_dump(struct ivpu_device *vdev); + #endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 91bd640655ab..26ef52fbb93e 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/circ_buf.h> @@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd) case IVPU_MMU_EVT_F_VMS_FETCH: return "Fetch of VMS caused external abort"; default: - return "Unknown CMDQ command"; + return "Unknown event"; } } @@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err) { switch (err) { case IVPU_MMU_CERROR_NONE: - return "No CMDQ Error"; + return "No error"; case IVPU_MMU_CERROR_ILL: return "Illegal command"; case IVPU_MMU_CERROR_ABT: - return "External abort on CMDQ read"; + return "External abort on command queue read"; case IVPU_MMU_CERROR_ATC_INV_SYNC: return "Sync failed to complete ATS invalidation"; default: - return "Unknown CMDQ Error"; + return "Unknown error"; } } @@ -519,7 +519,8 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) if (ret) return ret; - clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod); ret = ivpu_mmu_cmdq_wait_for_cons(vdev); @@ -567,7 +568,8 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) int ret; memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); - clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); mmu->cmdq.prod = 0; mmu->cmdq.cons = 0; @@ -661,7 +663,8 @@ static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) WRITE_ONCE(entry[1], str[1]); WRITE_ONCE(entry[0], str[0]); - clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); } @@ -693,7 +696,7 @@ unlock: return ret; } -static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid) { struct ivpu_mmu_info *mmu = vdev->mmu; struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; @@ -705,40 +708,40 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) return -EINVAL; entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); - - if (cd_dma != 0) { - cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | - IVPU_MMU_CD_0_TCR_EPD1 | - IVPU_MMU_CD_0_AA64 | - IVPU_MMU_CD_0_R | - IVPU_MMU_CD_0_ASET | - IVPU_MMU_CD_0_V; - cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; - cd[2] = 0; - cd[3] = 0x0000000000007444; - - /* For global context generate memory fault on VPU */ - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - cd[0] |= IVPU_MMU_CD_0_A; - } else { - memset(cd, 0, sizeof(cd)); - } + drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid); + + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global context generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + + if (valid) + cd[0] |= IVPU_MMU_CD_0_V; WRITE_ONCE(entry[1], cd[1]); WRITE_ONCE(entry[2], cd[2]); WRITE_ONCE(entry[3], cd[3]); WRITE_ONCE(entry[0], cd[0]); - clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); - ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); mutex_lock(&mmu->lock); if (!mmu->on) @@ -746,38 +749,18 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); if (ret) - goto unlock; + goto err_invalidate; ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err_invalidate; unlock: mutex_unlock(&mmu->lock); - return ret; -} - -static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); - if (ret) - ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); - - return ret; -} - -static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) -{ - int ret; - - if (ssid == 0) { - ivpu_err(vdev, "Invalid SSID: %u\n", ssid); - return -EINVAL; - } - - ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); - if (ret) - ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + return 0; +err_invalidate: + WRITE_ONCE(entry[0], 0); + mutex_unlock(&mmu->lock); return ret; } @@ -804,12 +787,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev) return ret; } - ret = ivpu_mmu_cd_add_gbl(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); - return ret; - } - ret = ivpu_mmu_enable(vdev); if (ret) { ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); @@ -874,8 +851,9 @@ static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) u64 in_addr = ((u64)event[5]) << 32 | event[4]; u32 sid = event[1]; - ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", - op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); + ivpu_err_ratelimited(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", + op, ivpu_mmu_event_to_str(op), ssid, sid, + event[2], event[3], in_addr, fetch_addr); } static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) @@ -911,6 +889,9 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_mmu_user_context_mark_invalid(vdev, ssid); REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); } + + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); } void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) @@ -958,12 +939,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val); } -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) { - return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); + return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true); } -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid) { - ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ + ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false); } diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index 6fa35c240710..7afea9cd8731 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -40,8 +40,8 @@ struct ivpu_mmu_info { int ivpu_mmu_init(struct ivpu_device *vdev); void ivpu_mmu_disable(struct ivpu_device *vdev); int ivpu_mmu_enable(struct ivpu_device *vdev); -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid); int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index fe6161299236..0af614dfb6f9 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -6,6 +6,7 @@ #include <linux/bitfield.h> #include <linux/highmem.h> #include <linux/set_memory.h> +#include <linux/vmalloc.h> #include <drm/drm_cache.h> @@ -23,6 +24,7 @@ #define IVPU_MMU_ENTRY_FLAG_CONT BIT(52) #define IVPU_MMU_ENTRY_FLAG_NG BIT(11) #define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_RO BIT(7) #define IVPU_MMU_ENTRY_FLAG_USER BIT(6) #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) @@ -88,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_ } } -static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) -{ - dma_addr_t pgd_dma; - - pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); - if (!pgtable->pgd_dma_ptr) - return -ENOMEM; - - pgtable->pgd_dma = pgd_dma; - - return 0; -} - static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { int pgd_idx, pud_idx, pmd_idx; @@ -138,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt } ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); + pgtable->pgd_dma_ptr = NULL; + pgtable->pgd_dma = 0; +} + +static u64* +ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr; + dma_addr_t pgd_dma; + + if (pgd_dma_ptr) + return pgd_dma_ptr; + + pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); + if (!pgd_dma_ptr) + return NULL; + + pgtable->pgd_dma_ptr = pgd_dma_ptr; + pgtable->pgd_dma = pgd_dma; + + return pgd_dma_ptr; } static u64* @@ -235,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID); + + /* Allocate PGD - first level page table if needed */ + if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable)) + return -ENOMEM; + /* Allocate PUD - second level page table if needed */ if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx)) return -ENOMEM; @@ -318,6 +334,91 @@ ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ct return 0; } +static void ivpu_mmu_context_set_page_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] |= IVPU_MMU_ENTRY_FLAG_RO; +} + +static void ivpu_mmu_context_split_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] &= ~IVPU_MMU_ENTRY_FLAG_CONT; +} + +static void ivpu_mmu_context_split_64k_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + u64 start = ALIGN_DOWN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 end = ALIGN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 offset = 0; + + ivpu_dbg(vdev, MMU_MAP, "Split 64K page ctx: %u vpu_addr: 0x%llx\n", ctx->id, vpu_addr); + + while (start + offset < end) { + ivpu_mmu_context_split_page(vdev, ctx, start + offset); + offset += IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + size_t size) +{ + u64 end = vpu_addr + size; + size_t size_left = size; + int ret; + + if (size == 0) + return 0; + + if (drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr | size, IVPU_MMU_PAGE_SIZE))) + return -EINVAL; + + mutex_lock(&ctx->lock); + + ivpu_dbg(vdev, MMU_MAP, "Set read-only pages ctx: %u vpu_addr: 0x%llx size: %lu\n", + ctx->id, vpu_addr, size); + + if (!ivpu_disable_mmu_cont_pages) { + /* Split 64K contiguous page at the beginning if needed */ + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr); + + /* Split 64K contiguous page at the end if needed */ + if (!IS_ALIGNED(vpu_addr + size, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr + size); + } + + while (size_left) { + if (vpu_addr < end) + ivpu_mmu_context_set_page_ro(vdev, ctx, vpu_addr); + + vpu_addr += IVPU_MMU_PAGE_SIZE; + size_left -= IVPU_MMU_PAGE_SIZE; + } + + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); + + mutex_unlock(&ctx->lock); + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + + return 0; +} + static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) { while (size) { @@ -331,6 +432,7 @@ int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) { + size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; int ret; u64 prot; @@ -361,20 +463,36 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); - mutex_unlock(&ctx->lock); - return ret; + goto err_unmap_pages; } vpu_addr += size; } + if (!ctx->is_cd_valid) { + ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n", + ctx->id, ret); + goto err_unmap_pages; + } + ctx->is_cd_valid = true; + } + /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); - mutex_unlock(&ctx->lock); - ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + goto err_unmap_pages; + } + + mutex_unlock(&ctx->lock); + return 0; + +err_unmap_pages: + ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr); + mutex_unlock(&ctx->lock); return ret; } @@ -443,65 +561,79 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n mutex_unlock(&ctx->lock); } -static int -ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) { u64 start, end; - int ret; mutex_init(&ctx->lock); - ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret); - return ret; - } - if (!context_id) { start = vdev->hw->ranges.global.start; end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user.start; - end = vdev->hw->ranges.dma.end; + start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start); + end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end); } drm_mm_init(&ctx->mm, start, end - start); ctx->id = context_id; - - return 0; } -static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr)) - return; + if (ctx->is_cd_valid) { + ivpu_mmu_cd_clear(vdev, ctx->id); + ctx->is_cd_valid = false; + } mutex_destroy(&ctx->lock); ivpu_mmu_pgtables_free(vdev, &ctx->pgtable); drm_mm_takedown(&ctx->mm); - - ctx->pgtable.pgd_dma_ptr = NULL; - ctx->pgtable.pgd_dma = 0; } -int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +void ivpu_mmu_global_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); + ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); } void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_context_fini(vdev, &vdev->gctx); + ivpu_mmu_context_fini(vdev, &vdev->gctx); } int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + int ret; + + ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + + mutex_lock(&vdev->rctx.lock); + + if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) { + ivpu_err(vdev, "Failed to allocate root page table for reserved context\n"); + ret = -ENOMEM; + goto err_ctx_fini; + } + + ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for reserved context\n"); + goto err_ctx_fini; + } + + mutex_unlock(&vdev->rctx.lock); + return ret; + +err_ctx_fini: + mutex_unlock(&vdev->rctx.lock); + ivpu_mmu_context_fini(vdev, &vdev->rctx); + return ret; } void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_fini(vdev, &vdev->rctx); + ivpu_mmu_cd_clear(vdev, vdev->rctx.id); + ivpu_mmu_context_fini(vdev, &vdev->rctx); } void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) @@ -516,36 +648,3 @@ void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) xa_unlock(&vdev->context_xa); } - -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) -{ - int ret; - - drm_WARN_ON(&vdev->drm, !ctx_id); - - ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); - if (ret) { - ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret); - return ret; - } - - ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret); - goto err_context_fini; - } - - return 0; - -err_context_fini: - ivpu_mmu_context_fini(vdev, ctx); - return ret; -} - -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) -{ - drm_WARN_ON(&vdev->drm, !ctx->id); - - ivpu_mmu_clear_pgtable(vdev, ctx->id); - ivpu_mmu_context_fini(vdev, ctx); -} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 535db3a1fc74..8042fc067062 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -23,19 +23,20 @@ struct ivpu_mmu_pgtable { }; struct ivpu_mmu_context { - struct mutex lock; /* Protects: mm, pgtable */ + struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */ struct drm_mm mm; struct ivpu_mmu_pgtable pgtable; + bool is_cd_valid; u32 id; }; -int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id); +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_global_context_init(struct ivpu_device *vdev); void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, @@ -46,5 +47,7 @@ int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context * u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt); +int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, size_t size); #endif /* __IVPU_MMU_CONTEXT_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c new file mode 100644 index 000000000000..ffe7b10f8a76 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <drm/drm_file.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ms.h" +#include "ivpu_pm.h" + +#define MS_INFO_BUFFER_SIZE SZ_64K +#define MS_NUM_BUFFERS 2 +#define MS_READ_PERIOD_MULTIPLIER 2 +#define MS_MIN_SAMPLE_PERIOD_NS 1000000 + +static struct ivpu_ms_instance * +get_instance_by_mask(struct ivpu_file_priv *file_priv, u64 metric_mask) +{ + struct ivpu_ms_instance *ms; + + lockdep_assert_held(&file_priv->ms_lock); + + list_for_each_entry(ms, &file_priv->ms_instance_list, ms_instance_node) + if (ms->mask == metric_mask) + return ms; + + return NULL; +} + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_start *args = data; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 single_buff_size; + u32 sample_size; + int ret; + + if (!args->metric_group_mask || !args->read_period_samples || + args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + if (get_instance_by_mask(file_priv, args->metric_group_mask)) { + ivpu_err(vdev, "Instance already exists (mask %#llx)\n", args->metric_group_mask); + ret = -EALREADY; + goto unlock; + } + + ms = kzalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) { + ret = -ENOMEM; + goto unlock; + } + + ms->mask = args->metric_group_mask; + + ret = ivpu_jsm_metric_streamer_info(vdev, ms->mask, 0, 0, &sample_size, NULL); + if (ret) + goto err_free_ms; + + single_buff_size = sample_size * + ((u64)args->read_period_samples * MS_READ_PERIOD_MULTIPLIER); + ms->bo = ivpu_bo_create_global(vdev, PAGE_ALIGN(single_buff_size * MS_NUM_BUFFERS), + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!ms->bo) { + ivpu_err(vdev, "Failed to allocate MS buffer (size %llu)\n", single_buff_size); + ret = -ENOMEM; + goto err_free_ms; + } + + ms->buff_size = ivpu_bo_size(ms->bo) / MS_NUM_BUFFERS; + ms->active_buff_vpu_addr = ms->bo->vpu_addr; + ms->inactive_buff_vpu_addr = ms->bo->vpu_addr + ms->buff_size; + ms->active_buff_ptr = ivpu_bo_vaddr(ms->bo); + ms->inactive_buff_ptr = ivpu_bo_vaddr(ms->bo) + ms->buff_size; + + ret = ivpu_jsm_metric_streamer_start(vdev, ms->mask, args->sampling_period_ns, + ms->active_buff_vpu_addr, ms->buff_size); + if (ret) + goto err_free_bo; + + args->sample_size = sample_size; + args->max_data_size = ivpu_bo_size(ms->bo); + list_add_tail(&ms->ms_instance_node, &file_priv->ms_instance_list); + goto unlock; + +err_free_bo: + ivpu_bo_free(ms->bo); +err_free_ms: + kfree(ms); +unlock: + mutex_unlock(&file_priv->ms_lock); + return ret; +} + +static int +copy_leftover_bytes(struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 copy_bytes; + + if (ms->leftover_bytes) { + copy_bytes = min(user_size - *user_bytes_copied, ms->leftover_bytes); + if (copy_to_user(user_ptr + *user_bytes_copied, ms->leftover_addr, copy_bytes)) + return -EFAULT; + + ms->leftover_bytes -= copy_bytes; + ms->leftover_addr += copy_bytes; + *user_bytes_copied += copy_bytes; + } + + return 0; +} + +static int +copy_samples_to_user(struct ivpu_device *vdev, struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 bytes_written; + int ret; + + *user_bytes_copied = 0; + + ret = copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); + if (ret) + return ret; + + if (*user_bytes_copied == user_size) + return 0; + + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, ms->inactive_buff_vpu_addr, + ms->buff_size, &bytes_written); + if (ret) + return ret; + + swap(ms->active_buff_vpu_addr, ms->inactive_buff_vpu_addr); + swap(ms->active_buff_ptr, ms->inactive_buff_ptr); + + ms->leftover_bytes = bytes_written; + ms->leftover_addr = ms->inactive_buff_ptr; + + return copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); +} + +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 bytes_written; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (!ms) { + ivpu_err(vdev, "Instance doesn't exist for mask: %#llx\n", args->metric_group_mask); + ret = -EINVAL; + goto unlock; + } + + if (!args->buffer_size) { + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, 0, 0, &bytes_written); + if (ret) + goto unlock; + args->data_size = bytes_written + ms->leftover_bytes; + goto unlock; + } + + if (!args->buffer_ptr) { + ret = -EINVAL; + goto unlock; + } + + ret = copy_samples_to_user(vdev, ms, u64_to_user_ptr(args->buffer_ptr), + args->buffer_size, &args->data_size); +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +static void free_instance(struct ivpu_file_priv *file_priv, struct ivpu_ms_instance *ms) +{ + lockdep_assert_held(&file_priv->ms_lock); + + list_del(&ms->ms_instance_node); + ivpu_jsm_metric_streamer_stop(file_priv->vdev, ms->mask); + ivpu_bo_free(ms->bo); + kfree(ms); +} + +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_ms_instance *ms; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (ms) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); + + return ms ? 0 : -EINVAL; +} + +static inline struct ivpu_bo *get_ms_info_bo(struct ivpu_file_priv *file_priv) +{ + lockdep_assert_held(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) + return file_priv->ms_info_bo; + + file_priv->ms_info_bo = ivpu_bo_create_global(file_priv->vdev, MS_INFO_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + return file_priv->ms_info_bo; +} + +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_bo *bo; + u64 info_size; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + if (!args->buffer_size) + return ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, + 0, 0, NULL, &args->data_size); + if (!args->buffer_ptr) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + bo = get_ms_info_bo(file_priv); + if (!bo) { + ret = -ENOMEM; + goto unlock; + } + + ret = ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, bo->vpu_addr, + ivpu_bo_size(bo), NULL, &info_size); + if (ret) + goto unlock; + + if (args->buffer_size < info_size) { + ret = -ENOSPC; + goto unlock; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer_ptr), ivpu_bo_vaddr(bo), info_size)) + ret = -EFAULT; + + args->data_size = info_size; +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) +{ + struct ivpu_ms_instance *ms, *tmp; + + mutex_lock(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) { + ivpu_bo_free(file_priv->ms_info_bo); + file_priv->ms_info_bo = NULL; + } + + list_for_each_entry_safe(ms, tmp, &file_priv->ms_instance_list, ms_instance_node) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); +} + +void ivpu_ms_cleanup_all(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + ivpu_ms_cleanup(file_priv); + + mutex_unlock(&vdev->context_list_lock); +} diff --git a/drivers/accel/ivpu/ivpu_ms.h b/drivers/accel/ivpu/ivpu_ms.h new file mode 100644 index 000000000000..fbd5ebebc3d9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ +#ifndef __IVPU_MS_H__ +#define __IVPU_MS_H__ + +#include <linux/list.h> + +struct drm_device; +struct drm_file; +struct ivpu_bo; +struct ivpu_device; +struct ivpu_file_priv; + +struct ivpu_ms_instance { + struct ivpu_bo *bo; + struct list_head ms_instance_node; + u64 mask; + u64 buff_size; + u64 active_buff_vpu_addr; + u64 inactive_buff_vpu_addr; + void *active_buff_ptr; + void *inactive_buff_ptr; + u64 leftover_bytes; + void *leftover_addr; +}; + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv); +void ivpu_ms_cleanup_all(struct ivpu_device *vdev); + +#endif /* __IVPU_MS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 5f73854234ba..5060c5dd40d1 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/highmem.h> @@ -9,20 +9,25 @@ #include <linux/pm_runtime.h> #include <linux/reboot.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_drv.h" -#include "ivpu_hw.h" #include "ivpu_fw.h" #include "ivpu_fw_log.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" static bool ivpu_disable_recovery; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); -MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); +MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected"); +#endif static unsigned long ivpu_tdr_timeout_ms; module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); @@ -36,6 +41,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_cmdq_reset_all_contexts(vdev); ivpu_ipc_reset(vdev); + ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; } @@ -58,14 +64,11 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; - /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ - pci_save_state(to_pci_dev(vdev->drm.dev)); + ivpu_prepare_for_reset(vdev); ret = ivpu_shutdown(vdev); if (ret) - ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret); return ret; } @@ -74,10 +77,10 @@ static int ivpu_resume(struct ivpu_device *vdev) { int ret; +retry: pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); pci_restore_state(to_pci_dev(vdev->drm.dev)); -retry: ret = ivpu_hw_power_up(vdev); if (ret) { ivpu_err(vdev, "Failed to power up HW: %d\n", ret); @@ -100,6 +103,7 @@ err_mmu_disable: ivpu_mmu_disable(vdev); err_power_down: ivpu_hw_power_down(vdev); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); if (!ivpu_fw_is_cold_boot(vdev)) { ivpu_pm_prepare_cold_boot(vdev); @@ -111,39 +115,57 @@ err_power_down: return ret; } -static void ivpu_pm_recovery_work(struct work_struct *work) +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) { - struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; - char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - - ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); - - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (ret) - ivpu_err(vdev, "Failed to resume VPU: %d\n", ret); - - ivpu_fw_log_dump(vdev); + pm_runtime_disable(vdev->drm.dev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; - ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } up_write(&vdev->pm->reset_lock); atomic_set(&vdev->pm->reset_pending, 0); - kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) @@ -195,6 +217,7 @@ int ivpu_pm_suspend_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); unsigned long timeout; + trace_pm("suspend"); ivpu_dbg(vdev, PM, "Suspend..\n"); timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); @@ -212,6 +235,7 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_pm_prepare_warm_boot(vdev); ivpu_dbg(vdev, PM, "Suspend done.\n"); + trace_pm("suspend done"); return 0; } @@ -222,6 +246,7 @@ int ivpu_pm_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("resume"); ivpu_dbg(vdev, PM, "Resume..\n"); ret = ivpu_resume(vdev); @@ -229,6 +254,7 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to resume: %d\n", ret); ivpu_dbg(vdev, PM, "Resume done.\n"); + trace_pm("resume done"); return ret; } @@ -237,42 +263,40 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - bool hw_is_idle = true; - int ret; + int ret, ret_d0i3; + bool is_idle; drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + trace_pm("runtime suspend"); ivpu_dbg(vdev, PM, "Runtime suspend..\n"); - if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EAGAIN; - } + ivpu_mmu_disable(vdev); + + is_idle = ivpu_hw_is_idle(vdev) || vdev->pm->dct_active_percent; + if (!is_idle) + ivpu_err(vdev, "NPU is not idle before autosuspend\n"); - if (!vdev->pm->suspend_reschedule_counter) - hw_is_idle = false; - else if (ivpu_jsm_pwr_d0i3_enter(vdev)) - hw_is_idle = false; + ret_d0i3 = ivpu_jsm_pwr_d0i3_enter(vdev); + if (ret_d0i3) + ivpu_err(vdev, "Failed to prepare for d0i3: %d\n", ret_d0i3); ret = ivpu_suspend(vdev); if (ret) - ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); + ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret); - if (!hw_is_idle) { - ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n"); - ivpu_fw_log_dump(vdev); + if (!is_idle || ret_d0i3) { + ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); + atomic_inc(&vdev->pm->reset_counter); + ivpu_dev_coredump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + trace_pm("runtime suspend done"); return 0; } @@ -283,6 +307,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("runtime resume"); ivpu_dbg(vdev, PM, "Runtime resume..\n"); ret = ivpu_resume(vdev); @@ -290,6 +315,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + trace_pm("runtime resume done"); return ret; } @@ -299,18 +325,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev) int ret; ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (!drm_WARN_ON(&vdev->drm, ret < 0)) - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - - return ret; -} - -int ivpu_rpm_get_if_active(struct ivpu_device *vdev) -{ - int ret; - - ret = pm_runtime_get_if_active(vdev->drm.dev, false); - drm_WARN_ON(&vdev->drm, ret < 0); + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } @@ -326,33 +344,26 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) struct ivpu_device *vdev = pci_get_drvdata(pdev); ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->reset_pending, 1); - pm_runtime_get_sync(vdev->drm.dev); - down_write(&vdev->pm->reset_lock); - ivpu_prepare_for_reset(vdev); - ivpu_hw_reset(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - up_write(&vdev->pm->reset_lock); - atomic_set(&vdev->pm->reset_pending, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } void ivpu_pm_init(struct ivpu_device *vdev) @@ -362,7 +373,6 @@ void ivpu_pm_init(struct ivpu_device *vdev) int delay; pm->vdev = vdev; - pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; init_rwsem(&pm->reset_lock); atomic_set(&pm->reset_pending, 0); @@ -378,6 +388,7 @@ void ivpu_pm_init(struct ivpu_device *vdev) pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, delay); + pm_runtime_set_active(dev); ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); } @@ -392,7 +403,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; - pm_runtime_set_active(dev); pm_runtime_allow(dev); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); @@ -403,3 +413,68 @@ void ivpu_pm_disable(struct ivpu_device *vdev) pm_runtime_get_noresume(vdev->drm.dev); pm_runtime_forbid(vdev->drm.dev); } + +int ivpu_pm_dct_init(struct ivpu_device *vdev) +{ + if (vdev->pm->dct_active_percent) + return ivpu_pm_dct_enable(vdev, vdev->pm->dct_active_percent); + + return 0; +} + +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) +{ + u32 active_us, inactive_us; + int ret; + + if (active_percent == 0 || active_percent > 100) + return -EINVAL; + + active_us = (DCT_PERIOD_US * active_percent) / 100; + inactive_us = DCT_PERIOD_US - active_us; + + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); + return ret; + } + + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + return 0; +} + +int ivpu_pm_dct_disable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_jsm_dct_disable(vdev); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); + return ret; + } + + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT disabled\n"); + return 0; +} + +void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev) +{ + bool enable; + int ret; + + if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) + return; + + if (vdev->pm->dct_active_percent) + ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); + else + ret = ivpu_pm_dct_disable(vdev); + + if (!ret) + ivpu_hw_btrs_dct_set_status(vdev, enable, vdev->pm->dct_active_percent); +} diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index ec60fbeefefc..b70efe6c36e4 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_PM_H__ @@ -19,7 +19,7 @@ struct ivpu_pm_info { atomic_t reset_counter; atomic_t reset_pending; bool is_warmboot; - u32 suspend_reschedule_counter; + u8 dct_active_percent; }; void ivpu_pm_init(struct ivpu_device *vdev); @@ -36,11 +36,15 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev); void ivpu_pm_reset_done_cb(struct pci_dev *pdev); int __must_check ivpu_rpm_get(struct ivpu_device *vdev); -int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason); void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); +int ivpu_pm_dct_init(struct ivpu_device *vdev); +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent); +int ivpu_pm_dct_disable(struct ivpu_device *vdev); +void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev); + #endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c new file mode 100644 index 000000000000..616477fc17fa --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Intel Corporation + */ + +#include <linux/device.h> +#include <linux/err.h> + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_sysfs.h" + +/* + * npu_busy_time_us is the time that the device spent executing jobs. + * The time is counted when and only when there are jobs submitted to firmware. + * + * This time can be used to measure the utilization of NPU, either by calculating + * npu_busy_time_us difference between two timepoints (i.e. measuring the time + * that the NPU was active during some workload) or monitoring utilization percentage + * by reading npu_busy_time_us periodically. + * + * When reading the value periodically, it shouldn't be read too often as it may have + * an impact on job submission performance. Recommended period is 1 second. + */ +static ssize_t +npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + ktime_t total, now = 0; + + xa_lock(&vdev->submitted_jobs_xa); + total = vdev->busy_time; + if (!xa_empty(&vdev->submitted_jobs_xa)) + now = ktime_sub(ktime_get(), vdev->busy_start_ts); + xa_unlock(&vdev->submitted_jobs_xa); + + return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); +} + +static DEVICE_ATTR_RO(npu_busy_time_us); + +/** + * DOC: sched_mode + * + * The sched_mode is used to report current NPU scheduling mode. + * + * It returns following strings: + * - "HW" - Hardware Scheduler mode + * - "OS" - Operating System Scheduler mode + * + */ +static ssize_t +sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + + return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS"); +} + +static DEVICE_ATTR_RO(sched_mode); + +static struct attribute *ivpu_dev_attrs[] = { + &dev_attr_npu_busy_time_us.attr, + &dev_attr_sched_mode.attr, + NULL, +}; + +static struct attribute_group ivpu_dev_attr_group = { + .attrs = ivpu_dev_attrs, +}; + +void ivpu_sysfs_init(struct ivpu_device *vdev) +{ + int ret; + + ret = devm_device_add_group(vdev->drm.dev, &ivpu_dev_attr_group); + if (ret) + ivpu_warn(vdev, "Failed to add group to device, ret %d", ret); +} diff --git a/drivers/accel/ivpu/ivpu_sysfs.h b/drivers/accel/ivpu/ivpu_sysfs.h new file mode 100644 index 000000000000..9836f09b35a3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Intel Corporation + */ + +#ifndef __IVPU_SYSFS_H__ +#define __IVPU_SYSFS_H__ + +#include "ivpu_drv.h" + +void ivpu_sysfs_init(struct ivpu_device *vdev); + +#endif /* __IVPU_SYSFS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h new file mode 100644 index 000000000000..eb792038e701 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __IVPU_TRACE_H__ + +#include <linux/tracepoint.h> +#include "ivpu_drv.h" +#include "ivpu_job.h" +#include "vpu_jsm_api.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ipc.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vpu +#define TRACE_INCLUDE_FILE ivpu_trace + +TRACE_EVENT(pm, + TP_PROTO(const char *event), + TP_ARGS(event), + TP_STRUCT__entry(__field(const char *, event)), + TP_fast_assign(__entry->event = event;), + TP_printk("%s", __entry->event) +); + +TRACE_EVENT(job, + TP_PROTO(const char *event, struct ivpu_job *job), + TP_ARGS(event, job), + TP_STRUCT__entry(__field(const char *, event) + __field(u32, ctx_id) + __field(u32, engine_id) + __field(u32, job_id) + ), + TP_fast_assign(__entry->event = event; + __entry->ctx_id = job->file_priv->ctx.id; + __entry->engine_id = job->engine_idx; + __entry->job_id = job->job_id;), + TP_printk("%s context:%d engine:%d job:%d", + __entry->event, + __entry->ctx_id, + __entry->engine_id, + __entry->job_id) +); + +TRACE_EVENT(jsm, + TP_PROTO(const char *event, struct vpu_jsm_msg *msg), + TP_ARGS(event, msg), + TP_STRUCT__entry(__field(const char *, event) + __field(const char *, type) + __field(enum vpu_ipc_msg_status, status) + __field(u32, request_id) + __field(u32, result) + ), + TP_fast_assign(__entry->event = event; + __entry->type = ivpu_jsm_msg_type_to_str(msg->type); + __entry->status = msg->status; + __entry->request_id = msg->request_id; + __entry->result = msg->result;), + TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x", + __entry->event, + __entry->type, + __entry->status, + __entry->request_id, + __entry->result) +); + +#endif /* __IVPU_TRACE_H__ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c new file mode 100644 index 000000000000..f8fb99de0de3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "ivpu_trace.h" +#endif diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 04c954258563..908e68ea1c39 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,14 +1,13 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2024, Intel Corporation. */ #ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H /* - * =========== FW API version information beginning ================ - * The bellow values will be used to construct the version info this way: + * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes @@ -27,21 +26,20 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 20 +#define VPU_BOOT_API_VER_MINOR 26 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 4 +#define VPU_BOOT_API_VER_PATCH 3 /* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 -/* ------------ FW API version information end ---------------------*/ -#pragma pack(push, 1) +#pragma pack(push, 4) /* * Firmware image header format @@ -66,12 +64,25 @@ struct vpu_firmware_header { /* Size of memory require for firmware execution */ u32 runtime_size; u32 shave_nn_fw_size; - /* Size of primary preemption buffer. */ + /* + * Size of primary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ u32 preemption_buffer_1_size; - /* Size of secondary preemption buffer. */ + /* + * Size of secondary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ u32 preemption_buffer_2_size; /* Space reserved for future preemption-related fields. */ u32 preemption_reserved[6]; + /* FW image read only section start address, 4KB aligned */ + u64 ro_section_start_address; + /* FW image read only section size, 4KB aligned */ + u32 ro_section_size; + u32 reserved; }; /* @@ -151,8 +162,6 @@ enum vpu_trace_destination { /* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ #define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT #define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST struct vpu_boot_l2_cache_config { u8 use; @@ -181,10 +190,21 @@ struct vpu_warm_boot_section { #define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000 /** - * Macros to enable various operation modes within the VPU. + * Macros to enable various power profiles within the NPU. * To be defined as part of 32 bit mask. */ -#define VPU_OP_MODE_SURVIVABILITY 0x1 +#define POWER_PROFILE_SURVIVABILITY 0x1 + +/** + * Enum for dvfs_mode boot param. + */ +enum vpu_governor { + VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ + VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ + VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ + VPU_GOV_POWER_SAVE = 3, /* Power save governor */ + VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ +}; struct vpu_boot_params { u32 magic; @@ -288,7 +308,14 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ + /** + * DVFS Mode: + * 0 - Default, DVFS mode selected by the firmware + * 1 - Max Performance + * 2 - On Demand + * 3 - Power Save + * 4 - On Demand Priority Aware + */ u32 dvfs_mode; /** * Depending on DVFS Mode: @@ -317,7 +344,22 @@ struct vpu_boot_params { u64 d0i3_residency_time_us; /* Value of VPU perf counter at the time of entering D0i3 state . */ u64 d0i3_entry_vpu_ts; - u32 pad4[20]; + /* + * The system time of the host operating system in microseconds. + * E.g the number of microseconds since 1st of January 1970, or whatever + * date the host operating system uses to maintain system time. + * This value will be used to track system time on the VPU. + * The KMD is required to update this value on every VPU reset. + */ + u64 system_time_us; + u32 pad4[2]; + /* + * The delta between device monotonic time and the current value of the + * HW timestamp register, in ticks. Written by the firmware during boot. + * Can be used by the KMD to calculate device time. + */ + u64 device_time_delta_ticks; + u32 pad7[14]; /* Warm boot information: 0x400 - 0x43F */ u32 warm_boot_sections_count; u32 warm_boot_start_address_reference; @@ -344,16 +386,17 @@ struct vpu_boot_params { u32 vpu_focus_present_timer_ms; /* VPU ECC Signaling */ u32 vpu_uses_ecc_mca_signal; - /* Values defined by VPU_OP_MODE* macros */ - u32 vpu_operation_mode; - /* Unused/reserved: 0x480 - 0xFFF */ - u32 pad6[736]; + /* Values defined by POWER_PROFILE* macros */ + u32 power_profile; + /* Microsecond value for DCT active cycle */ + u32 dct_active_us; + /* Microsecond value for DCT inactive cycle */ + u32 dct_inactive_us; + /* Unused/reserved: 0x488 - 0xFFF */ + u32 pad6[734]; }; -/* - * Magic numbers set between host and vpu to detect corruptio of tracing init - */ - +/* Magic numbers set between host and vpu to detect corruption of tracing init */ #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) /* Tracing buffer message format definitions */ @@ -373,7 +416,9 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - u32 pad_to_cache_line_size_0[14]; + /* keeps track of wrapping on the reader side */ + u32 read_wrap_count; + u32 pad_to_cache_line_size_0[13]; /* End of first cache line */ /** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7da7622742be..7215c144158c 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2024, Intel Corporation. */ /** @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 15 +#define VPU_JSM_API_VER_MINOR 25 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -36,15 +36,18 @@ /* * Number of Priority Bands for Hardware Scheduling - * Bands: RealTime, Focus, Normal, Idle + * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) */ #define VPU_HWS_NUM_PRIORITY_BANDS 4 /* Max number of impacted contexts that can be dealt with the engine reset command */ #define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 -/** Pack the API structures for now, once alignment issues are fixed this can be removed */ -#pragma pack(push, 1) +/* + * Pack the API structures to enforce binary compatibility + * Align to 8 bytes for optimal performance + */ +#pragma pack(push, 8) /* * Engine indexes. @@ -71,6 +74,7 @@ #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU /* Job status returned when the job was preempted mid-inference */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU +#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU /* * Host <-> VPU IPC channels. @@ -83,18 +87,58 @@ /* * Job flags bit masks. */ -#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 -#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 +enum { + /* + * Null submission mask. + * When set, batch buffer's commands are not processed but returned as + * successful immediately, except fences and timestamps. + * When cleared, batch buffer's commands are processed normally. + * Used for testing and profiling purposes. + */ + VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), + /* + * Inline command mask. + * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). + * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). + */ + VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), + /* + * VPU private data mask. + * Reserved for the VPU to store private data about the job (or inline command) + * while being processed. + */ + VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U +}; /* - * Sizes of the reserved areas in jobs, in bytes. + * Job queue flags bit masks. */ -#define VPU_JOB_RESERVED_BYTES 8 +enum { + /* + * No job done notification mask. + * When set, indicates that no job done notification should be sent for any + * job from this queue. When cleared, indicates that job done notification + * should be sent for every job completed from this queue. + */ + VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), + /* + * Native fence usage mask. + * When set, indicates that job queue uses native fences (as inline commands + * in job queue). Such queues may also use legacy fences (as commands in batch buffers). + * When cleared, indicates the job queue only uses legacy fences. + * NOTE: For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + */ + VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), -/* - * Sizes of the reserved areas in job queues, in bytes. - */ -#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + /* + * Enable turbo mode for testing NPU performance; not recommended for regular usage. + */ + VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U) +}; /* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -125,23 +169,125 @@ #define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U /* + * vpu_jsm_engine_reset_context flag definitions + */ +#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK BIT(0) +#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0 +#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1 + +/* + * Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group + * in this API. + */ +#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL + +/* + * Inline commands types. + */ +/* + * NOP. + * VPU does nothing other than consuming the inline command object. + */ +#define VPU_INLINE_CMD_TYPE_NOP 0x0 +/* + * Fence wait. + * VPU waits for the fence current value to reach monitored value. + * Fence wait operations are executed upon job dispatching. While waiting for + * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. + * Jobs present in the queue prior to the fence wait object may be processed + * concurrently. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 +/* + * Fence signal. + * VPU sets the fence current value to the provided value. If new current value + * is equal to or higher than monitored value, VPU sends fence signalled notification + * to the host. Fence signal operations are executed upon completion of all the jobs + * present in the queue prior to them, and in-order relative to each other in the queue. + * But jobs in-between them may be processed concurrently and may complete out-of-order. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 + +/* + * Job scheduling priority bands for both hardware scheduling and OS scheduling. + */ +enum vpu_job_scheduling_priority_band { + VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, + VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, + VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, + VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, + VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, +}; + +/* * Job format. + * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ - u32 job_id; /**< Job ID */ - u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u64 root_page_table_addr; /**< Address of root page table to use for this job */ - u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 primary_preempt_buf_addr; + /**< Address of VPU commands batch buffer */ + u64 batch_buf_addr; + /**< Job ID */ + u32 job_id; + /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u32 flags; + /** + * Doorbell ring timestamp taken by KMD from SoC's global system clock, in + * microseconds. NPU can convert this value to its own fixed clock's timebase, + * to match other profiling timestamps. + */ + u64 doorbell_timestamp; + /**< Extra id for job tracking, used only in the firmware perf traces */ + u64 host_tracking_id; /**< Address of the primary preemption buffer to use for this job */ - u32 primary_preempt_buf_size; + u64 primary_preempt_buf_addr; /**< Size of the primary preemption buffer to use for this job */ - u32 secondary_preempt_buf_size; + u32 primary_preempt_buf_size; /**< Size of secondary preemption buffer to use for this job */ - u64 secondary_preempt_buf_addr; + u32 secondary_preempt_buf_size; /**< Address of secondary preemption buffer to use for this job */ - u8 reserved_0[VPU_JOB_RESERVED_BYTES]; + u64 secondary_preempt_buf_addr; + u64 reserved_0; +}; + +/* + * Inline command format. + * Inline commands are the commands executed at scheduler level (typically, + * synchronization directives). Inline command and job objects must be of + * the same size and have flags field at same offset. + */ +struct vpu_inline_cmd { + u64 reserved_0; + /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ + u32 type; + /* Flags bit field, see VPU_JOB_FLAGS_* above. */ + u32 flags; + /* Inline command payload. Depends on inline command type. */ + union { + /* Fence (wait and signal) commands' payload. */ + struct { + /* Fence object handle. */ + u64 fence_handle; + /* User VA of the current fence value. */ + u64 current_value_va; + /* User VA of the monitored fence value (read-only). */ + u64 monitored_value_va; + /* Value to wait for or write in fence location. */ + u64 value; + /* User VA of the log buffer in which to add log entry on completion. */ + u64 log_buffer_va; + } fence; + /* Other commands do not have a payload. */ + /* Payload definition for future inline commands can be inserted here. */ + u64 reserved_1[6]; + } payload; +}; + +/* + * Job queue slots can be populated either with job objects or inline command objects. + */ +union vpu_jobq_slot { + struct vpu_job_queue_entry job; + struct vpu_inline_cmd inline_cmd; }; /* @@ -151,7 +297,21 @@ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; + u32 flags; + /* Set to 1 to indicate priority_band field is valid */ + u32 priority_band_valid; + /* + * Priority for the work of this job queue, valid only if the HWS is NOT used + * and the `priority_band_valid` is set to 1. It is applied only during + * the VPU_JSM_MSG_REGISTER_DB message processing. + * The device firmware might use the `priority_band` to optimize the power + * management logic, but it will not affect the order of jobs. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ + u32 priority_band; + /* Inside realtime band assigns a further priority, limited to 0..31 range */ + u32 realtime_priority_level; + u32 reserved_0[9]; }; /* @@ -159,7 +319,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - struct vpu_job_queue_entry job[]; + union vpu_jobq_slot slot[]; }; /** @@ -181,9 +341,7 @@ enum vpu_trace_entity_type { struct vpu_hws_log_buffer_header { /* Written by VPU after adding a log entry. Initialised by host to 0. */ u32 first_free_entry_index; - /* Incremented by VPU every time the VPU overwrites the 0th entry; - * initialised by host to 0. - */ + /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ u32 wraparound_count; /* * This is the number of buffers that can be stored in the log buffer provided by the host. @@ -214,14 +372,80 @@ struct vpu_hws_log_buffer_entry { u64 operation_data[2]; }; +/* Native fence log buffer types. */ +enum vpu_hws_native_fence_log_type { + VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, + VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 +}; + +/* HWS native fence log buffer header. */ +struct vpu_hws_native_fence_log_header { + union { + struct { + /* Index of the first free entry in buffer. */ + u32 first_free_entry_idx; + /* Incremented each time NPU wraps around the buffer to write next entry. */ + u32 wraparound_count; + }; + /* Field allowing atomic update of both fields above. */ + u64 atomic_wraparound_and_entry_idx; + }; + /* Log buffer type, see enum vpu_hws_native_fence_log_type. */ + u64 type; + /* Allocated number of entries in the log buffer. */ + u64 entry_nb; + u64 reserved[2]; +}; + +/* Native fence log operation types. */ +enum vpu_hws_native_fence_log_op { + VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, + VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 +}; + +/* HWS native fence log entry. */ +struct vpu_hws_native_fence_log_entry { + /* Newly signaled/unblocked fence value. */ + u64 fence_value; + /* Native fence object handle to which this operation belongs. */ + u64 fence_handle; + /* Operation type, see enum vpu_hws_native_fence_log_op. */ + u64 op_type; + u64 reserved_0; + /* + * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence + * wait was started (in NPU SysTime). + */ + u64 fence_wait_start_ts; + u64 reserved_1; + /* Timestamp at which fence operation was completed (in NPU SysTime). */ + u64 fence_end_ts; +}; + +/* Native fence log buffer. */ +struct vpu_hws_native_fence_log_buffer { + struct vpu_hws_native_fence_log_header header; + struct vpu_hws_native_fence_log_entry entry[]; +}; + /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + /** + * Preempt engine. The NPU stops (preempts) all the jobs currently + * executing on the target engine making the engine become idle and ready to + * execute new jobs. + * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of + * the target engine, but it stops processing them (until the queue doorbell + * is rung again); the host is responsible to reset the job queue, either + * after preemption or when resubmitting jobs to the queue. + */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, VPU_JSM_MSG_REGISTER_DB = 0x1102, VPU_JSM_MSG_UNREGISTER_DB = 0x1103, @@ -307,9 +531,10 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC commands before this one. * */ VPU_JSM_MSG_STATE_DUMP = 0x11FF, + /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. @@ -319,8 +544,12 @@ enum vpu_ipc_msg_type { * Perform the save procedure for the D0i3 entry */ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, + /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Fence signalled */ + VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, + /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, @@ -406,6 +635,7 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC responses before this one. * */ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, + /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, @@ -584,11 +814,6 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; }; -struct vpu_ipc_msg_payload_blob_deinit { - /* 64-bit unique ID for the blob to be de-initialized. */ - u64 blob_id; -}; - struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ u32 engine_idx; @@ -606,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done { u64 cmdq_id; }; +/* + * Notification message upon native fence signalling. + * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED + */ +struct vpu_ipc_msg_payload_native_fence_signalled { + /* Engine ID. */ + u32 engine_idx; + /* Host SSID. */ + u32 host_ssid; + /* CMDQ ID */ + u64 cmdq_id; + /* Fence object handle. */ + u64 fence_handle; +}; + struct vpu_jsm_engine_reset_context { /* Host SSID */ u32 host_ssid; @@ -613,7 +853,7 @@ struct vpu_jsm_engine_reset_context { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Flags: 0: cause of hang; 1: collateral damage of reset */ + /* See VPU_ENGINE_RESET_CONTEXT_* defines */ u64 flags; }; @@ -684,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; }; -struct vpu_ipc_msg_payload_blob_deinit_done { - /* 64-bit unique ID for the blob de-initialized. */ - u64 blob_id; -}; - /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* @@ -730,11 +965,7 @@ struct vpu_ipc_msg_payload_hws_create_cmdq { u32 host_ssid; /* Engine for which queue is being created */ u32 engine_idx; - /* - * Cmdq group may be set to 0 or equal to - * cmdq_id while each priority band contains - * only single engine instances. - */ + /* Cmdq group: only used for HWS logging of state changes */ u64 cmdq_group; /* Command queue id */ u64 cmdq_id; @@ -782,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Priority band to assign to work of this context */ + /* + * Priority band to assign to work of this context. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ u32 priority_band; /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; @@ -856,6 +1090,12 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { * is generated when an event log is written to this index. */ u64 notify_index; + /* + * Field is now deprecated, will be removed when KMD is updated to support removal + */ + u32 enable_extra_events; + /* Zero Padding */ + u32 reserved_0; }; /* @@ -1223,10 +1463,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; - struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -1234,7 +1474,6 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; - struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name; diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile index 3f7f6dfde7f2..35e883515629 100644 --- a/drivers/accel/qaic/Makefile +++ b/drivers/accel/qaic/Makefile @@ -10,4 +10,7 @@ qaic-y := \ qaic_control.o \ qaic_data.o \ qaic_drv.o \ - qaic_timesync.o + qaic_timesync.o \ + sahara.o + +qaic-$(CONFIG_DEBUG_FS) += qaic_debugfs.o diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c index cb77d048ed54..8ab82e78dd94 100644 --- a/drivers/accel/qaic/mhi_controller.c +++ b/drivers/accel/qaic/mhi_controller.c @@ -20,7 +20,7 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */ module_param(mhi_timeout_ms, uint, 0600); MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value"); -static struct mhi_channel_config aic100_channels[] = { +static const struct mhi_channel_config aic100_channels[] = { { .name = "QAIC_LOOPBACK", .num = 0, @@ -358,8 +358,8 @@ static struct mhi_channel_config aic100_channels[] = { .wake_capable = false, }, { - .num = 21, .name = "QAIC_TIMESYNC", + .num = 21, .num_elements = 32, .local_elements = 0, .event_ring = 0, @@ -390,8 +390,8 @@ static struct mhi_channel_config aic100_channels[] = { .wake_capable = false, }, { - .num = 23, .name = "QAIC_TIMESYNC_PERIODIC", + .num = 23, .num_elements = 32, .local_elements = 0, .event_ring = 0, @@ -405,6 +405,38 @@ static struct mhi_channel_config aic100_channels[] = { .auto_queue = false, .wake_capable = false, }, + { + .name = "IPCR", + .num = 24, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 25, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = true, + .wake_capable = false, + }, }; static struct mhi_event_config aic100_events[] = { diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index 582836f9538f..02561b6cecc6 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -30,6 +30,7 @@ #define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm) #define to_drm(qddev) (&(qddev)->drm) #define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */ +#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev) enum __packed dev_states { /* Device is offline or will be very soon */ @@ -152,6 +153,14 @@ struct qaic_device { struct mhi_device *qts_ch; /* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */ struct workqueue_struct *qts_wq; + /* Head of list of page allocated by MHI bootlog device */ + struct list_head bootlog; + /* MHI bootlog channel device */ + struct mhi_device *bootlog_ch; + /* Work queue for tasks related to MHI bootlog device */ + struct workqueue_struct *bootlog_wq; + /* Synchronizes access of pages in MHI bootlog device */ + struct mutex bootlog_mutex; }; struct qaic_drm_device { @@ -191,8 +200,6 @@ struct qaic_bo { u32 nr_slice; /* Number of slice that have been transferred by DMA engine */ u32 nr_slice_xfer_done; - /* true = BO is queued for execution, true = BO is not queued */ - bool queued; /* * If true then user has attached slicing information to this BO by * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl. @@ -281,6 +288,7 @@ int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); void release_dbc(struct qaic_device *qdev, u32 dbc_id); +void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail); void wake_all_cntl(struct qaic_device *qdev); void qaic_dev_reset_clean_local_state(struct qaic_device *qdev); diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index 9e8a8cbadf6b..d8bdab69f800 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -496,7 +496,7 @@ static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wr nents = sgt->nents; nents_dma = nents; *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans); - for_each_sgtable_sg(sgt, sg, i) { + for_each_sgtable_dma_sg(sgt, sg, i) { *size -= sizeof(*asp); /* Save 1K for possible follow-up transactions. */ if (*size < SZ_1K) { diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 03c9a793da35..43aba57b48f0 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -141,6 +141,11 @@ struct dbc_rsp { __le16 status; } __packed; +static inline bool bo_queued(struct qaic_bo *bo) +{ + return !list_empty(&bo->xfer_list); +} + inline int get_dbc_req_elem_size(void) { return sizeof(struct dbc_req); @@ -167,9 +172,10 @@ static void free_slice(struct kref *kref) static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out, struct sg_table *sgt_in, u64 size, u64 offset) { - int total_len, len, nents, offf = 0, offl = 0; struct scatterlist *sg, *sgn, *sgf, *sgl; + unsigned int len, nents, offf, offl; struct sg_table *sgt; + size_t total_len; int ret, j; /* find out number of relevant nents needed for this mem */ @@ -177,9 +183,11 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl sgf = NULL; sgl = NULL; nents = 0; + offf = 0; + offl = 0; size = size ? size : PAGE_SIZE; - for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) { + for_each_sgtable_dma_sg(sgt_in, sg, j) { len = sg_dma_len(sg); if (!len) @@ -216,7 +224,7 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl /* copy relevant sg node and fix page and length */ sgn = sgf; - for_each_sgtable_sg(sgt, sg, j) { + for_each_sgtable_dma_sg(sgt, sg, j) { memcpy(sg, sgn, sizeof(*sg)); if (sgn == sgf) { sg_dma_address(sg) += offf; @@ -296,7 +304,7 @@ static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice, * fence. */ dev_addr = req->dev_addr; - for_each_sgtable_sg(slice->sgt, sg, i) { + for_each_sgtable_dma_sg(slice->sgt, sg, i) { slice->reqs[i].cmd = cmd; slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? sg_dma_address(sg) : dev_addr); @@ -549,6 +557,7 @@ static bool invalid_sem(struct qaic_sem *sem) static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent, u32 count, u64 total_size) { + u64 total; int i; for (i = 0; i < count; i++) { @@ -558,7 +567,8 @@ static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_ invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3)) return -EINVAL; - if (slice_ent[i].offset + slice_ent[i].size > total_size) + if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) || + total > total_size) return -EINVAL; } @@ -569,6 +579,9 @@ static void qaic_free_sgt(struct sg_table *sgt) { struct scatterlist *sg; + if (!sgt) + return; + for (sg = sgt->sgl; sg; sg = sg_next(sg)) if (sg_page(sg)) __free_pages(sg_page(sg), get_order(sg->length)); @@ -648,6 +661,7 @@ static void qaic_init_bo(struct qaic_bo *bo, bool reinit) } complete_all(&bo->xfer_done); INIT_LIST_HEAD(&bo->slices); + INIT_LIST_HEAD(&bo->xfer_list); } static struct qaic_bo *qaic_alloc_init_bo(void) @@ -709,9 +723,13 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (ret) goto free_bo; + ret = drm_gem_create_mmap_offset(obj); + if (ret) + goto free_bo; + ret = drm_gem_handle_create(file_priv, obj, &args->handle); if (ret) - goto free_sgt; + goto free_bo; bo->handle = args->handle; drm_gem_object_put(obj); @@ -720,10 +738,8 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi return 0; -free_sgt: - qaic_free_sgt(bo->sgt); free_bo: - kfree(bo); + drm_gem_object_put(obj); unlock_dev_srcu: srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); unlock_usr_srcu: @@ -738,7 +754,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file struct drm_gem_object *obj; struct qaic_device *qdev; struct qaic_user *usr; - int ret; + int ret = 0; usr = file_priv->driver_priv; usr_rcu_id = srcu_read_lock(&usr->qddev_lock); @@ -760,9 +776,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file goto unlock_dev_srcu; } - ret = drm_gem_create_mmap_offset(obj); - if (ret == 0) - args->offset = drm_vma_node_offset_addr(&obj->vma_node); + args->offset = drm_vma_node_offset_addr(&obj->vma_node); drm_gem_object_put(obj); @@ -828,9 +842,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h struct sg_table *sgt; int ret; - if (obj->import_attach->dmabuf->size < hdr->size) - return -EINVAL; - sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); @@ -847,9 +858,6 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo, { int ret; - if (bo->base.size < hdr->size) - return -EINVAL; - ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0); if (ret) return -EFAULT; @@ -950,9 +958,6 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi if (arg_size / args->hdr.count != sizeof(*slice_ent)) return -EINVAL; - if (args->hdr.size == 0) - return -EINVAL; - if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) return -EINVAL; @@ -992,16 +997,16 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi goto free_slice_ent; } - ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size); - if (ret) - goto free_slice_ent; - obj = drm_gem_object_lookup(file_priv, args->hdr.handle); if (!obj) { ret = -ENOENT; goto free_slice_ent; } + ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size); + if (ret) + goto put_bo; + bo = to_qaic_bo(obj); ret = mutex_lock_interruptible(&bo->lock); if (ret) @@ -1173,7 +1178,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil struct bo_slice *slice; unsigned long flags; struct qaic_bo *bo; - bool queued; int i, j; int ret; @@ -1205,9 +1209,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil } spin_lock_irqsave(&dbc->xfer_lock, flags); - queued = bo->queued; - bo->queued = true; - if (queued) { + if (bo_queued(bo)) { spin_unlock_irqrestore(&dbc->xfer_lock, flags); ret = -EINVAL; goto unlock_bo; @@ -1230,7 +1232,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil else ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail); if (ret) { - bo->queued = false; spin_unlock_irqrestore(&dbc->xfer_lock, flags); goto unlock_bo; } @@ -1253,8 +1254,7 @@ failed_to_send_bo: spin_lock_irqsave(&dbc->xfer_lock, flags); bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list); obj = &bo->base; - bo->queued = false; - list_del(&bo->xfer_list); + list_del_init(&bo->xfer_list); spin_unlock_irqrestore(&dbc->xfer_lock, flags); dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); drm_gem_object_put(obj); @@ -1615,8 +1615,7 @@ read_fifo: */ dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); bo->nr_slice_xfer_done = 0; - bo->queued = false; - list_del(&bo->xfer_list); + list_del_init(&bo->xfer_list); bo->perf_stats.req_processed_ts = ktime_get_ns(); complete_all(&bo->xfer_done); drm_gem_object_put(&bo->base); @@ -1875,7 +1874,7 @@ int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi /* Check if BO is committed to H/W for DMA */ spin_lock_irqsave(&dbc->xfer_lock, flags); - if (bo->queued) { + if (bo_queued(bo)) { spin_unlock_irqrestore(&dbc->xfer_lock, flags); ret = -EBUSY; goto unlock_ch_srcu; @@ -1905,8 +1904,7 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db spin_lock_irqsave(&dbc->xfer_lock, flags); while (!list_empty(&dbc->xfer_list)) { bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list); - bo->queued = false; - list_del(&bo->xfer_list); + list_del_init(&bo->xfer_list); spin_unlock_irqrestore(&dbc->xfer_lock, flags); bo->nr_slice_xfer_done = 0; bo->req_id = 0; @@ -1988,3 +1986,12 @@ void release_dbc(struct qaic_device *qdev, u32 dbc_id) dbc->in_use = false; wake_up(&dbc->dbc_release); } + +void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail) +{ + if (!dbc || !head || !tail) + return; + + *head = readl(dbc->dbc_base + REQHP_OFF); + *tail = readl(dbc->dbc_base + REQTP_OFF); +} diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c new file mode 100644 index 000000000000..ba0cf2f94732 --- /dev/null +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/mhi.h> +#include <linux/mutex.h> +#include <linux/overflow.h> +#include <linux/pci.h> +#include <linux/seq_file.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "qaic.h" +#include "qaic_debugfs.h" + +#define BOOTLOG_POOL_SIZE 16 +#define BOOTLOG_MSG_SIZE 512 +#define QAIC_DBC_DIR_NAME 9 + +struct bootlog_msg { + /* Buffer for bootlog messages */ + char str[BOOTLOG_MSG_SIZE]; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Work struct to schedule work coming on QAIC_LOGGING channel */ + struct work_struct work; +}; + +struct bootlog_page { + /* Node in list of bootlog pages maintained by root device struct */ + struct list_head node; + /* Total size of the buffer that holds the bootlogs. It is PAGE_SIZE */ + unsigned int size; + /* Offset for the next bootlog */ + unsigned int offset; +}; + +static int bootlog_show(struct seq_file *s, void *unused) +{ + struct bootlog_page *page; + struct qaic_device *qdev; + void *page_end; + void *log; + + qdev = s->private; + mutex_lock(&qdev->bootlog_mutex); + list_for_each_entry(page, &qdev->bootlog, node) { + log = page + 1; + page_end = (void *)page + page->offset; + while (log < page_end) { + seq_printf(s, "%s", (char *)log); + log += strlen(log) + 1; + } + } + mutex_unlock(&qdev->bootlog_mutex); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(bootlog); + +static int fifo_size_show(struct seq_file *s, void *unused) +{ + struct dma_bridge_chan *dbc = s->private; + + seq_printf(s, "%u\n", dbc->nelem); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(fifo_size); + +static int queued_show(struct seq_file *s, void *unused) +{ + struct dma_bridge_chan *dbc = s->private; + u32 tail = 0, head = 0; + + qaic_data_get_fifo_info(dbc, &head, &tail); + + if (head == U32_MAX || tail == U32_MAX) + seq_printf(s, "%u\n", 0); + else if (head > tail) + seq_printf(s, "%u\n", dbc->nelem - head + tail); + else + seq_printf(s, "%u\n", tail - head); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(queued); + +void qaic_debugfs_init(struct qaic_drm_device *qddev) +{ + struct qaic_device *qdev = qddev->qdev; + struct dentry *debugfs_root; + struct dentry *debugfs_dir; + char name[QAIC_DBC_DIR_NAME]; + u32 i; + + debugfs_root = to_drm(qddev)->debugfs_root; + + debugfs_create_file("bootlog", 0400, debugfs_root, qdev, &bootlog_fops); + /* + * 256 dbcs per device is likely the max we will ever see and lets static checking see a + * reasonable range. + */ + for (i = 0; i < qdev->num_dbc && i < 256; ++i) { + snprintf(name, QAIC_DBC_DIR_NAME, "dbc%03u", i); + debugfs_dir = debugfs_create_dir(name, debugfs_root); + debugfs_create_file("fifo_size", 0400, debugfs_dir, &qdev->dbc[i], &fifo_size_fops); + debugfs_create_file("queued", 0400, debugfs_dir, &qdev->dbc[i], &queued_fops); + } +} + +static struct bootlog_page *alloc_bootlog_page(struct qaic_device *qdev) +{ + struct bootlog_page *page; + + page = (struct bootlog_page *)devm_get_free_pages(&qdev->pdev->dev, GFP_KERNEL, 0); + if (!page) + return page; + + page->size = PAGE_SIZE; + page->offset = sizeof(*page); + list_add_tail(&page->node, &qdev->bootlog); + + return page; +} + +static int reset_bootlog(struct qaic_device *qdev) +{ + struct bootlog_page *page; + struct bootlog_page *i; + + mutex_lock(&qdev->bootlog_mutex); + list_for_each_entry_safe(page, i, &qdev->bootlog, node) { + list_del(&page->node); + devm_free_pages(&qdev->pdev->dev, (unsigned long)page); + } + + page = alloc_bootlog_page(qdev); + mutex_unlock(&qdev->bootlog_mutex); + if (!page) + return -ENOMEM; + + return 0; +} + +static void *bootlog_get_space(struct qaic_device *qdev, unsigned int size) +{ + struct bootlog_page *page; + + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); + + if (size_add(size, sizeof(*page)) > page->size) + return NULL; + + if (page->offset + size > page->size) { + page = alloc_bootlog_page(qdev); + if (!page) + return NULL; + } + + return (void *)page + page->offset; +} + +static void bootlog_commit(struct qaic_device *qdev, unsigned int size) +{ + struct bootlog_page *page; + + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); + + page->offset += size; +} + +static void bootlog_log(struct work_struct *work) +{ + struct bootlog_msg *msg = container_of(work, struct bootlog_msg, work); + unsigned int len = strlen(msg->str) + 1; + struct qaic_device *qdev = msg->qdev; + void *log; + + mutex_lock(&qdev->bootlog_mutex); + log = bootlog_get_space(qdev, len); + if (log) { + memcpy(log, msg, len); + bootlog_commit(qdev, len); + } + mutex_unlock(&qdev->bootlog_mutex); + + if (mhi_queue_buf(qdev->bootlog_ch, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT)) + devm_kfree(&qdev->pdev->dev, msg); +} + +static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct bootlog_msg *msg; + int i, ret; + + qdev->bootlog_wq = alloc_ordered_workqueue("qaic_bootlog", 0); + if (!qdev->bootlog_wq) { + ret = -ENOMEM; + goto out; + } + + ret = reset_bootlog(qdev); + if (ret) + goto destroy_workqueue; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + goto destroy_workqueue; + + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { + msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto mhi_unprepare; + } + + msg->qdev = qdev; + INIT_WORK(&msg->work, bootlog_log); + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT); + if (ret) + goto mhi_unprepare; + } + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->bootlog_ch = mhi_dev; + return 0; + +mhi_unprepare: + mhi_unprepare_from_transfer(mhi_dev); +destroy_workqueue: + flush_workqueue(qdev->bootlog_wq); + destroy_workqueue(qdev->bootlog_wq); +out: + return ret; +} + +static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + + mhi_unprepare_from_transfer(qdev->bootlog_ch); + flush_workqueue(qdev->bootlog_wq); + destroy_workqueue(qdev->bootlog_wq); + qdev->bootlog_ch = NULL; +} + +static void qaic_bootlog_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ +} + +static void qaic_bootlog_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct bootlog_msg *msg = mhi_result->buf_addr; + + if (mhi_result->transaction_status) { + devm_kfree(&qdev->pdev->dev, msg); + return; + } + + /* Force a null at the end of the transferred string */ + msg->str[mhi_result->bytes_xferd - 1] = 0; + + queue_work(qdev->bootlog_wq, &msg->work); +} + +static const struct mhi_device_id qaic_bootlog_mhi_match_table[] = { + { .chan = "QAIC_LOGGING", }, + {}, +}; + +static struct mhi_driver qaic_bootlog_mhi_driver = { + .id_table = qaic_bootlog_mhi_match_table, + .remove = qaic_bootlog_mhi_remove, + .probe = qaic_bootlog_mhi_probe, + .ul_xfer_cb = qaic_bootlog_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_bootlog_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_bootlog", + }, +}; + +int qaic_bootlog_register(void) +{ + return mhi_driver_register(&qaic_bootlog_mhi_driver); +} + +void qaic_bootlog_unregister(void) +{ + mhi_driver_unregister(&qaic_bootlog_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_debugfs.h b/drivers/accel/qaic/qaic_debugfs.h new file mode 100644 index 000000000000..05e74f84cf9f --- /dev/null +++ b/drivers/accel/qaic/qaic_debugfs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#ifndef __QAIC_DEBUGFS_H__ +#define __QAIC_DEBUGFS_H__ + +#include <drm/drm_file.h> + +#ifdef CONFIG_DEBUG_FS +int qaic_bootlog_register(void); +void qaic_bootlog_unregister(void); +void qaic_debugfs_init(struct qaic_drm_device *qddev); +#else +static inline int qaic_bootlog_register(void) { return 0; } +static inline void qaic_bootlog_unregister(void) {} +static inline void qaic_debugfs_init(struct qaic_drm_device *qddev) {} +#endif /* CONFIG_DEBUG_FS */ +#endif /* __QAIC_DEBUGFS_H__ */ diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 2a313eb69b12..81819b9ef8d4 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -28,10 +28,13 @@ #include "mhi_controller.h" #include "qaic.h" +#include "qaic_debugfs.h" #include "qaic_timesync.h" +#include "sahara.h" -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); +#define PCI_DEV_AIC080 0xa080 #define PCI_DEV_AIC100 0xa100 #define QAIC_NAME "qaic" #define QAIC_DESC "Qualcomm Cloud AI Accelerators" @@ -44,6 +47,53 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); static bool link_up; static DEFINE_IDA(qaic_usrs); +static void qaicm_wq_release(struct drm_device *dev, void *res) +{ + struct workqueue_struct *wq = res; + + destroy_workqueue(wq); +} + +static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *name) +{ + struct workqueue_struct *wq; + int ret; + + wq = alloc_workqueue("%s", WQ_UNBOUND, 0, name); + if (!wq) + return ERR_PTR(-ENOMEM); + ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq); + if (ret) + return ERR_PTR(ret); + + return wq; +} + +static void qaicm_srcu_release(struct drm_device *dev, void *res) +{ + struct srcu_struct *lock = res; + + cleanup_srcu_struct(lock); +} + +static int qaicm_srcu_init(struct drm_device *dev, struct srcu_struct *lock) +{ + int ret; + + ret = init_srcu_struct(lock); + if (ret) + return ret; + + return drmm_add_action_or_reset(dev, qaicm_srcu_release, lock); +} + +static void qaicm_pci_release(struct drm_device *dev, void *res) +{ + struct qaic_device *qdev = to_qaic_device(dev); + + pci_set_drvdata(qdev->pdev, NULL); +} + static void free_usr(struct kref *kref) { struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count); @@ -158,7 +208,6 @@ static const struct drm_driver qaic_accel_driver = { .name = QAIC_NAME, .desc = QAIC_DESC, - .date = "20190618", .fops = &qaic_accel_fops, .open = qaic_open, @@ -182,8 +231,12 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) qddev->partition_id = partition_id; ret = drm_dev_register(drm, 0); - if (ret) + if (ret) { pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); + return ret; + } + + qaic_debugfs_init(qddev); return ret; } @@ -299,74 +352,77 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) release_dbc(qdev, i); } -static void cleanup_qdev(struct qaic_device *qdev) -{ - int i; - - for (i = 0; i < qdev->num_dbc; ++i) - cleanup_srcu_struct(&qdev->dbc[i].ch_lock); - cleanup_srcu_struct(&qdev->dev_lock); - pci_set_drvdata(qdev->pdev, NULL); - destroy_workqueue(qdev->cntl_wq); - destroy_workqueue(qdev->qts_wq); -} - static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id) { + struct device *dev = &pdev->dev; struct qaic_drm_device *qddev; struct qaic_device *qdev; - int i; + struct drm_device *drm; + int i, ret; - qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL); + qdev = devm_kzalloc(dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return NULL; qdev->dev_state = QAIC_OFFLINE; - if (id->device == PCI_DEV_AIC100) { + if (id->device == PCI_DEV_AIC080 || id->device == PCI_DEV_AIC100) { qdev->num_dbc = 16; - qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); + qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); if (!qdev->dbc) return NULL; } - qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0); - if (!qdev->cntl_wq) + qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm); + if (IS_ERR(qddev)) return NULL; - qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0); - if (!qdev->qts_wq) { - destroy_workqueue(qdev->cntl_wq); + drm = to_drm(qddev); + pci_set_drvdata(pdev, qdev); + + ret = drmm_mutex_init(drm, &qddev->users_mutex); + if (ret) + return NULL; + ret = drmm_add_action_or_reset(drm, qaicm_pci_release, NULL); + if (ret) + return NULL; + ret = drmm_mutex_init(drm, &qdev->cntl_mutex); + if (ret) + return NULL; + ret = drmm_mutex_init(drm, &qdev->bootlog_mutex); + if (ret) return NULL; - } - pci_set_drvdata(pdev, qdev); + qdev->cntl_wq = qaicm_wq_init(drm, "qaic_cntl"); + if (IS_ERR(qdev->cntl_wq)) + return NULL; + qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts"); + if (IS_ERR(qdev->qts_wq)) + return NULL; + + ret = qaicm_srcu_init(drm, &qdev->dev_lock); + if (ret) + return NULL; + + qdev->qddev = qddev; qdev->pdev = pdev; + qddev->qdev = qdev; - mutex_init(&qdev->cntl_mutex); INIT_LIST_HEAD(&qdev->cntl_xfer_list); - init_srcu_struct(&qdev->dev_lock); + INIT_LIST_HEAD(&qdev->bootlog); + INIT_LIST_HEAD(&qddev->users); for (i = 0; i < qdev->num_dbc; ++i) { spin_lock_init(&qdev->dbc[i].xfer_lock); qdev->dbc[i].qdev = qdev; qdev->dbc[i].id = i; INIT_LIST_HEAD(&qdev->dbc[i].xfer_list); - init_srcu_struct(&qdev->dbc[i].ch_lock); + ret = qaicm_srcu_init(drm, &qdev->dbc[i].ch_lock); + if (ret) + return NULL; init_waitqueue_head(&qdev->dbc[i].dbc_release); INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); } - qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm); - if (IS_ERR(qddev)) { - cleanup_qdev(qdev); - return NULL; - } - - drmm_mutex_init(to_drm(qddev), &qddev->users_mutex); - INIT_LIST_HEAD(&qddev->users); - qddev->qdev = qdev; - qdev->qddev = qddev; - return qdev; } @@ -391,9 +447,7 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) return ret; - ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX); - if (ret) - return ret; + dma_set_max_seg_size(&pdev->dev, UINT_MAX); qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]); if (IS_ERR(qdev->bar_0)) @@ -472,35 +526,28 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = init_pci(qdev, pdev); if (ret) - goto cleanup_qdev; + return ret; for (i = 0; i < qdev->num_dbc; ++i) qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i); mhi_irq = init_msi(qdev, pdev); - if (mhi_irq < 0) { - ret = mhi_irq; - goto cleanup_qdev; - } + if (mhi_irq < 0) + return mhi_irq; ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); if (ret) - goto cleanup_qdev; + return ret; qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq, qdev->single_msi); if (IS_ERR(qdev->mhi_cntrl)) { ret = PTR_ERR(qdev->mhi_cntrl); - goto cleanup_drm_dev; + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); + return ret; } return 0; - -cleanup_drm_dev: - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); -cleanup_qdev: - cleanup_qdev(qdev); - return ret; } static void qaic_pci_remove(struct pci_dev *pdev) @@ -511,9 +558,8 @@ static void qaic_pci_remove(struct pci_dev *pdev) return; qaic_dev_reset_clean_local_state(qdev); - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); - cleanup_qdev(qdev); + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); } static void qaic_pci_shutdown(struct pci_dev *pdev) @@ -561,6 +607,7 @@ static struct mhi_driver qaic_mhi_driver = { }; static const struct pci_device_id qaic_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC080), }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), }, { } }; @@ -597,12 +644,24 @@ static int __init qaic_init(void) goto free_pci; } + ret = sahara_register(); + if (ret) { + pr_debug("qaic: sahara_register failed %d\n", ret); + goto free_mhi; + } + ret = qaic_timesync_init(); if (ret) pr_debug("qaic: qaic_timesync_init failed %d\n", ret); + ret = qaic_bootlog_register(); + if (ret) + pr_debug("qaic: qaic_bootlog_register failed %d\n", ret); + return 0; +free_mhi: + mhi_driver_unregister(&qaic_mhi_driver); free_pci: pci_unregister_driver(&qaic_pci_driver); return ret; @@ -626,7 +685,9 @@ static void __exit qaic_exit(void) * reinitializing the link_up state after the cleanup is done. */ link_up = true; + qaic_bootlog_unregister(); qaic_timesync_deinit(); + sahara_unregister(); mhi_driver_unregister(&qaic_mhi_driver); pci_unregister_driver(&qaic_pci_driver); } diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c new file mode 100644 index 000000000000..21d58aed0deb --- /dev/null +++ b/drivers/accel/qaic/sahara.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> +#include <linux/limits.h> +#include <linux/mhi.h> +#include <linux/minmax.h> +#include <linux/mod_devicetable.h> +#include <linux/overflow.h> +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <linux/workqueue.h> + +#include "sahara.h" + +#define SAHARA_HELLO_CMD 0x1 /* Min protocol version 1.0 */ +#define SAHARA_HELLO_RESP_CMD 0x2 /* Min protocol version 1.0 */ +#define SAHARA_READ_DATA_CMD 0x3 /* Min protocol version 1.0 */ +#define SAHARA_END_OF_IMAGE_CMD 0x4 /* Min protocol version 1.0 */ +#define SAHARA_DONE_CMD 0x5 /* Min protocol version 1.0 */ +#define SAHARA_DONE_RESP_CMD 0x6 /* Min protocol version 1.0 */ +#define SAHARA_RESET_CMD 0x7 /* Min protocol version 1.0 */ +#define SAHARA_RESET_RESP_CMD 0x8 /* Min protocol version 1.0 */ +#define SAHARA_MEM_DEBUG_CMD 0x9 /* Min protocol version 2.0 */ +#define SAHARA_MEM_READ_CMD 0xa /* Min protocol version 2.0 */ +#define SAHARA_CMD_READY_CMD 0xb /* Min protocol version 2.1 */ +#define SAHARA_SWITCH_MODE_CMD 0xc /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_CMD 0xd /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_RESP_CMD 0xe /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_DATA_CMD 0xf /* Min protocol version 2.1 */ +#define SAHARA_MEM_DEBUG64_CMD 0x10 /* Min protocol version 2.5 */ +#define SAHARA_MEM_READ64_CMD 0x11 /* Min protocol version 2.5 */ +#define SAHARA_READ_DATA64_CMD 0x12 /* Min protocol version 2.8 */ +#define SAHARA_RESET_STATE_CMD 0x13 /* Min protocol version 2.9 */ +#define SAHARA_WRITE_DATA_CMD 0x14 /* Min protocol version 3.0 */ + +#define SAHARA_PACKET_MAX_SIZE 0xffffU /* MHI_MAX_MTU */ +#define SAHARA_TRANSFER_MAX_SIZE 0x80000 +#define SAHARA_READ_MAX_SIZE 0xfff0U /* Avoid unaligned requests */ +#define SAHARA_NUM_TX_BUF DIV_ROUND_UP(SAHARA_TRANSFER_MAX_SIZE,\ + SAHARA_PACKET_MAX_SIZE) +#define SAHARA_IMAGE_ID_NONE U32_MAX + +#define SAHARA_VERSION 2 +#define SAHARA_SUCCESS 0 +#define SAHARA_TABLE_ENTRY_STR_LEN 20 + +#define SAHARA_MODE_IMAGE_TX_PENDING 0x0 +#define SAHARA_MODE_IMAGE_TX_COMPLETE 0x1 +#define SAHARA_MODE_MEMORY_DEBUG 0x2 +#define SAHARA_MODE_COMMAND 0x3 + +#define SAHARA_HELLO_LENGTH 0x30 +#define SAHARA_READ_DATA_LENGTH 0x14 +#define SAHARA_END_OF_IMAGE_LENGTH 0x10 +#define SAHARA_DONE_LENGTH 0x8 +#define SAHARA_RESET_LENGTH 0x8 +#define SAHARA_MEM_DEBUG64_LENGTH 0x18 +#define SAHARA_MEM_READ64_LENGTH 0x18 + +struct sahara_packet { + __le32 cmd; + __le32 length; + + union { + struct { + __le32 version; + __le32 version_compat; + __le32 max_length; + __le32 mode; + } hello; + struct { + __le32 version; + __le32 version_compat; + __le32 status; + __le32 mode; + } hello_resp; + struct { + __le32 image; + __le32 offset; + __le32 length; + } read_data; + struct { + __le32 image; + __le32 status; + } end_of_image; + struct { + __le64 table_address; + __le64 table_length; + } memory_debug64; + struct { + __le64 memory_address; + __le64 memory_length; + } memory_read64; + }; +}; + +struct sahara_debug_table_entry64 { + __le64 type; + __le64 address; + __le64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +struct sahara_dump_table_entry { + u64 type; + u64 address; + u64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +#define SAHARA_DUMP_V1_MAGIC 0x1234567890abcdef +#define SAHARA_DUMP_V1_VER 1 +struct sahara_memory_dump_meta_v1 { + u64 magic; + u64 version; + u64 dump_size; + u64 table_size; +}; + +/* + * Layout of crashdump provided to user via devcoredump + * +------------------------------------------+ + * | Crashdump Meta structure | + * | type: struct sahara_memory_dump_meta_v1 | + * +------------------------------------------+ + * | Crashdump Table | + * | type: array of struct | + * | sahara_dump_table_entry | + * | | + * | | + * +------------------------------------------+ + * | Crashdump | + * | | + * | | + * | | + * | | + * | | + * +------------------------------------------+ + * + * First is the metadata header. Userspace can use the magic number to verify + * the content type, and then check the version for the rest of the format. + * New versions should keep the magic number location/value, and version + * location, but increment the version value. + * + * For v1, the metadata lists the size of the entire dump (header + table + + * dump) and the size of the table. Then the dump image table, which describes + * the contents of the dump. Finally all the images are listed in order, with + * no deadspace in between. Userspace can use the sizes listed in the image + * table to reconstruct the individual images. + */ + +struct sahara_context { + struct sahara_packet *tx[SAHARA_NUM_TX_BUF]; + struct sahara_packet *rx; + struct work_struct fw_work; + struct work_struct dump_work; + struct mhi_device *mhi_dev; + const char **image_table; + u32 table_size; + u32 active_image_id; + const struct firmware *firmware; + u64 dump_table_address; + u64 dump_table_length; + size_t rx_size; + size_t rx_size_requested; + void *mem_dump; + size_t mem_dump_sz; + struct sahara_dump_table_entry *dump_image; + u64 dump_image_offset; + void *mem_dump_freespace; + u64 dump_images_left; + bool is_mem_dump_mode; +}; + +static const char *aic100_image_table[] = { + [1] = "qcom/aic100/fw1.bin", + [2] = "qcom/aic100/fw2.bin", + [4] = "qcom/aic100/fw4.bin", + [5] = "qcom/aic100/fw5.bin", + [6] = "qcom/aic100/fw6.bin", + [8] = "qcom/aic100/fw8.bin", + [9] = "qcom/aic100/fw9.bin", + [10] = "qcom/aic100/fw10.bin", +}; + +static int sahara_find_image(struct sahara_context *context, u32 image_id) +{ + int ret; + + if (image_id == context->active_image_id) + return 0; + + if (context->active_image_id != SAHARA_IMAGE_ID_NONE) { + dev_err(&context->mhi_dev->dev, "image id %d is not valid as %d is active\n", + image_id, context->active_image_id); + return -EINVAL; + } + + if (image_id >= context->table_size || !context->image_table[image_id]) { + dev_err(&context->mhi_dev->dev, "request for unknown image: %d\n", image_id); + return -EINVAL; + } + + /* + * This image might be optional. The device may continue without it. + * Only the device knows. Suppress error messages that could suggest an + * a problem when we were actually able to continue. + */ + ret = firmware_request_nowarn(&context->firmware, + context->image_table[image_id], + &context->mhi_dev->dev); + if (ret) { + dev_dbg(&context->mhi_dev->dev, "request for image id %d / file %s failed %d\n", + image_id, context->image_table[image_id], ret); + return ret; + } + + context->active_image_id = image_id; + + return 0; +} + +static void sahara_release_image(struct sahara_context *context) +{ + if (context->active_image_id != SAHARA_IMAGE_ID_NONE) + release_firmware(context->firmware); + context->active_image_id = SAHARA_IMAGE_ID_NONE; +} + +static void sahara_send_reset(struct sahara_context *context) +{ + int ret; + + context->is_mem_dump_mode = false; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH); + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_RESET_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send reset response %d\n", ret); +} + +static void sahara_hello(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "HELLO cmd received. length:%d version:%d version_compat:%d max_length:%d mode:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->hello.version), + le32_to_cpu(context->rx->hello.version_compat), + le32_to_cpu(context->rx->hello.max_length), + le32_to_cpu(context->rx->hello.mode)); + + if (le32_to_cpu(context->rx->length) != SAHARA_HELLO_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed hello packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + if (le32_to_cpu(context->rx->hello.version) != SAHARA_VERSION) { + dev_err(&context->mhi_dev->dev, "Unsupported hello packet - version %d\n", + le32_to_cpu(context->rx->hello.version)); + return; + } + + if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING && + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE && + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) { + dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n", + le32_to_cpu(context->rx->hello.mode)); + return; + } + + context->tx[0]->cmd = cpu_to_le32(SAHARA_HELLO_RESP_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_HELLO_LENGTH); + context->tx[0]->hello_resp.version = cpu_to_le32(SAHARA_VERSION); + context->tx[0]->hello_resp.version_compat = cpu_to_le32(SAHARA_VERSION); + context->tx[0]->hello_resp.status = cpu_to_le32(SAHARA_SUCCESS); + context->tx[0]->hello_resp.mode = context->rx->hello_resp.mode; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_HELLO_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret); +} + +static void sahara_read_data(struct sahara_context *context) +{ + u32 image_id, data_offset, data_len, pkt_data_len; + int ret; + int i; + + dev_dbg(&context->mhi_dev->dev, + "READ_DATA cmd received. length:%d image:%d offset:%d data_length:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->read_data.image), + le32_to_cpu(context->rx->read_data.offset), + le32_to_cpu(context->rx->read_data.length)); + + if (le32_to_cpu(context->rx->length) != SAHARA_READ_DATA_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + image_id = le32_to_cpu(context->rx->read_data.image); + data_offset = le32_to_cpu(context->rx->read_data.offset); + data_len = le32_to_cpu(context->rx->read_data.length); + + ret = sahara_find_image(context, image_id); + if (ret) { + sahara_send_reset(context); + return; + } + + /* + * Image is released when the device is done with it via + * SAHARA_END_OF_IMAGE_CMD. sahara_send_reset() will either cause the + * device to retry the operation with a modification, or decide to be + * done with the image and trigger SAHARA_END_OF_IMAGE_CMD. + * release_image() is called from SAHARA_END_OF_IMAGE_CMD. processing + * and is not needed here on error. + */ + + if (data_len > SAHARA_TRANSFER_MAX_SIZE) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n", + data_len, SAHARA_TRANSFER_MAX_SIZE); + sahara_send_reset(context); + return; + } + + if (data_offset >= context->firmware->size) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data offset %d exceeds file size %zu\n", + data_offset, context->firmware->size); + sahara_send_reset(context); + return; + } + + if (size_add(data_offset, data_len) > context->firmware->size) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data offset %d and length %d exceeds file size %zu\n", + data_offset, data_len, context->firmware->size); + sahara_send_reset(context); + return; + } + + for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) { + pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE); + + memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len); + + data_offset += pkt_data_len; + data_len -= pkt_data_len; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, + context->tx[i], pkt_data_len, + !data_len ? MHI_EOT : MHI_CHAIN); + if (ret) { + dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", + ret); + return; + } + } +} + +static void sahara_end_of_image(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "END_OF_IMAGE cmd received. length:%d image:%d status:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->end_of_image.image), + le32_to_cpu(context->rx->end_of_image.status)); + + if (le32_to_cpu(context->rx->length) != SAHARA_END_OF_IMAGE_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed end_of_image packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + if (context->active_image_id != SAHARA_IMAGE_ID_NONE && + le32_to_cpu(context->rx->end_of_image.image) != context->active_image_id) { + dev_err(&context->mhi_dev->dev, "Malformed end_of_image packet - image %d is not the active image\n", + le32_to_cpu(context->rx->end_of_image.image)); + return; + } + + sahara_release_image(context); + + if (le32_to_cpu(context->rx->end_of_image.status)) + return; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_DONE_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_DONE_LENGTH); + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_DONE_LENGTH, MHI_EOT); + if (ret) + dev_dbg(&context->mhi_dev->dev, "Unable to send done response %d\n", ret); +} + +static void sahara_memory_debug64(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "MEMORY DEBUG64 cmd received. length:%d table_address:%#llx table_length:%#llx\n", + le32_to_cpu(context->rx->length), + le64_to_cpu(context->rx->memory_debug64.table_address), + le64_to_cpu(context->rx->memory_debug64.table_length)); + + if (le32_to_cpu(context->rx->length) != SAHARA_MEM_DEBUG64_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + context->dump_table_address = le64_to_cpu(context->rx->memory_debug64.table_address); + context->dump_table_length = le64_to_cpu(context->rx->memory_debug64.table_length); + + if (context->dump_table_length % sizeof(struct sahara_debug_table_entry64) != 0 || + !context->dump_table_length) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - table length %lld\n", + context->dump_table_length); + return; + } + + /* + * From this point, the protocol flips. We make memory_read requests to + * the device, and the device responds with the raw data. If the device + * has an error, it will send an End of Image command. First we need to + * request the memory dump table so that we know where all the pieces + * of the dump are that we can consume. + */ + + context->is_mem_dump_mode = true; + + /* + * Assume that the table is smaller than our MTU so that we can read it + * in one shot. The spec does not put an upper limit on the table, but + * no known device will exceed this. + */ + if (context->dump_table_length > SAHARA_PACKET_MAX_SIZE) { + dev_err(&context->mhi_dev->dev, "Memory dump table length %lld exceeds supported size. Discarding dump\n", + context->dump_table_length); + sahara_send_reset(context); + return; + } + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_table_address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(context->dump_table_length); + + context->rx_size_requested = context->dump_table_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump table %d\n", ret); +} + +static void sahara_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, fw_work); + int ret; + + switch (le32_to_cpu(context->rx->cmd)) { + case SAHARA_HELLO_CMD: + sahara_hello(context); + break; + case SAHARA_READ_DATA_CMD: + sahara_read_data(context); + break; + case SAHARA_END_OF_IMAGE_CMD: + sahara_end_of_image(context); + break; + case SAHARA_DONE_RESP_CMD: + /* Intentional do nothing as we don't need to exit an app */ + break; + case SAHARA_RESET_RESP_CMD: + /* Intentional do nothing as we don't need to exit an app */ + break; + case SAHARA_MEM_DEBUG64_CMD: + sahara_memory_debug64(context); + break; + default: + dev_err(&context->mhi_dev->dev, "Unknown command %d\n", + le32_to_cpu(context->rx->cmd)); + break; + } + + ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx, + SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); +} + +static void sahara_parse_dump_table(struct sahara_context *context) +{ + struct sahara_dump_table_entry *image_out_table; + struct sahara_debug_table_entry64 *dev_table; + struct sahara_memory_dump_meta_v1 *dump_meta; + u64 table_nents; + u64 dump_length; + int ret; + u64 i; + + table_nents = context->dump_table_length / sizeof(*dev_table); + context->dump_images_left = table_nents; + dump_length = 0; + + dev_table = (struct sahara_debug_table_entry64 *)(context->rx); + for (i = 0; i < table_nents; ++i) { + /* Do not trust the device, ensure the strings are terminated */ + dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + + dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + dev_dbg(&context->mhi_dev->dev, + "Memory dump table entry %lld type: %lld address: %#llx length: %#llx description: \"%s\" filename \"%s\"\n", + i, + le64_to_cpu(dev_table[i].type), + le64_to_cpu(dev_table[i].address), + le64_to_cpu(dev_table[i].length), + dev_table[i].description, + dev_table[i].filename); + } + + dump_length = size_add(dump_length, sizeof(*dump_meta)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + context->mem_dump_sz = dump_length; + context->mem_dump = vzalloc(dump_length); + if (!context->mem_dump) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + /* Populate the dump metadata and table for userspace */ + dump_meta = context->mem_dump; + dump_meta->magic = SAHARA_DUMP_V1_MAGIC; + dump_meta->version = SAHARA_DUMP_V1_VER; + dump_meta->dump_size = dump_length; + dump_meta->table_size = context->dump_table_length; + + image_out_table = context->mem_dump + sizeof(*dump_meta); + for (i = 0; i < table_nents; ++i) { + image_out_table[i].type = le64_to_cpu(dev_table[i].type); + image_out_table[i].address = le64_to_cpu(dev_table[i].address); + image_out_table[i].length = le64_to_cpu(dev_table[i].length); + strscpy(image_out_table[i].description, dev_table[i].description, + SAHARA_TABLE_ENTRY_STR_LEN); + strscpy(image_out_table[i].filename, + dev_table[i].filename, + SAHARA_TABLE_ENTRY_STR_LEN); + } + + context->mem_dump_freespace = &image_out_table[i]; + + /* Done parsing the table, switch to image dump mode */ + context->dump_table_length = 0; + + /* Request the first chunk of the first image */ + context->dump_image = &image_out_table[0]; + dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->dump_image_offset = dump_length; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_image->address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump content %d\n", ret); +} + +static void sahara_parse_dump_image(struct sahara_context *context) +{ + u64 dump_length; + int ret; + + memcpy(context->mem_dump_freespace, context->rx, context->rx_size); + context->mem_dump_freespace += context->rx_size; + + if (context->dump_image_offset >= context->dump_image->length) { + /* Need to move to next image */ + context->dump_image++; + context->dump_images_left--; + context->dump_image_offset = 0; + + if (!context->dump_images_left) { + /* Dump done */ + dev_coredumpv(context->mhi_dev->mhi_cntrl->cntrl_dev, + context->mem_dump, + context->mem_dump_sz, + GFP_KERNEL); + context->mem_dump = NULL; + sahara_send_reset(context); + return; + } + } + + /* Get next image chunk */ + dump_length = context->dump_image->length - context->dump_image_offset; + dump_length = min(dump_length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = + cpu_to_le64(context->dump_image->address + context->dump_image_offset); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->dump_image_offset += dump_length; + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, + "Unable to send read for dump content %d\n", ret); +} + +static void sahara_dump_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, dump_work); + int ret; + + /* + * We should get the expected raw data, but if the device has an error + * it is supposed to send EOI with an error code. + */ + if (context->rx_size != context->rx_size_requested && + context->rx_size != SAHARA_END_OF_IMAGE_LENGTH) { + dev_err(&context->mhi_dev->dev, + "Unexpected response to read_data. Expected size: %#zx got: %#zx\n", + context->rx_size_requested, + context->rx_size); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) == SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Unexpected EOI response to read_data. Status: %d\n", + le32_to_cpu(context->rx->end_of_image.status)); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) != SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Invalid EOI response to read_data. CMD: %d\n", + le32_to_cpu(context->rx->cmd)); + goto error; + } + + /* + * Need to know if we received the dump table, or part of a dump image. + * Since we get raw data, we cannot tell from the data itself. Instead, + * we use the stored dump_table_length, which we zero after we read and + * process the entire table. + */ + if (context->dump_table_length) + sahara_parse_dump_table(context); + else + sahara_parse_dump_image(context); + + ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx, + SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); + + return; + +error: + vfree(context->mem_dump); + context->mem_dump = NULL; + sahara_send_reset(context); +} + +static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct sahara_context *context; + int ret; + int i; + + context = devm_kzalloc(&mhi_dev->dev, sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->rx = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL); + if (!context->rx) + return -ENOMEM; + + /* + * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it + * will request for READ_DATA. This is larger than + * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to + * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a + * READ_DATA, it requires a transfer of the exact size requested. We + * can use MHI_CHAIN to link multiple buffers into a single transfer + * but the remote side will not consume the buffers until it sees an + * EOT, thus we need to allocate enough buffers to put in the tx fifo + * to cover an entire READ_DATA request of the max size. + */ + for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) { + context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL); + if (!context->tx[i]) + return -ENOMEM; + } + + context->mhi_dev = mhi_dev; + INIT_WORK(&context->fw_work, sahara_processing); + INIT_WORK(&context->dump_work, sahara_dump_processing); + context->image_table = aic100_image_table; + context->table_size = ARRAY_SIZE(aic100_image_table); + context->active_image_id = SAHARA_IMAGE_ID_NONE; + dev_set_drvdata(&mhi_dev->dev, context); + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, context->rx, SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) { + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + return 0; +} + +static void sahara_mhi_remove(struct mhi_device *mhi_dev) +{ + struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); + + cancel_work_sync(&context->fw_work); + cancel_work_sync(&context->dump_work); + vfree(context->mem_dump); + sahara_release_image(context); + mhi_unprepare_from_transfer(mhi_dev); +} + +static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ +} + +static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); + + if (!mhi_result->transaction_status) { + context->rx_size = mhi_result->bytes_xferd; + if (context->is_mem_dump_mode) + schedule_work(&context->dump_work); + else + schedule_work(&context->fw_work); + } + +} + +static const struct mhi_device_id sahara_mhi_match_table[] = { + { .chan = "QAIC_SAHARA", }, + {}, +}; + +static struct mhi_driver sahara_mhi_driver = { + .id_table = sahara_mhi_match_table, + .remove = sahara_mhi_remove, + .probe = sahara_mhi_probe, + .ul_xfer_cb = sahara_mhi_ul_xfer_cb, + .dl_xfer_cb = sahara_mhi_dl_xfer_cb, + .driver = { + .name = "sahara", + }, +}; + +int sahara_register(void) +{ + return mhi_driver_register(&sahara_mhi_driver); +} + +void sahara_unregister(void) +{ + mhi_driver_unregister(&sahara_mhi_driver); +} diff --git a/drivers/accel/qaic/sahara.h b/drivers/accel/qaic/sahara.h new file mode 100644 index 000000000000..640208acc0d1 --- /dev/null +++ b/drivers/accel/qaic/sahara.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#ifndef __SAHARA_H__ +#define __SAHARA_H__ + +int sahara_register(void); +void sahara_unregister(void); +#endif /* __SAHARA_H__ */ |