diff options
Diffstat (limited to 'drivers/accel')
178 files changed, 46571 insertions, 11867 deletions
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index c437206aa3f1..bdf48ccafcf2 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -24,7 +24,11 @@ menuconfig DRM_ACCEL different device files, called accel/accel* (in /dev, sysfs and debugfs). +source "drivers/accel/amdxdna/Kconfig" +source "drivers/accel/ethosu/Kconfig" source "drivers/accel/habanalabs/Kconfig" source "drivers/accel/ivpu/Kconfig" +source "drivers/accel/qaic/Kconfig" +source "drivers/accel/rocket/Kconfig" endif diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index 07aa77aed1c8..1d3a7251b950 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -1,4 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += habanalabs/ -obj-y += ivpu/ +obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/ +obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) += ethosu/ +obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ +obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ +obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/ +obj-$(CONFIG_DRM_ACCEL_ROCKET) += rocket/
\ No newline at end of file diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig new file mode 100644 index 000000000000..f39d7a87296c --- /dev/null +++ b/drivers/accel/amdxdna/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_AMDXDNA + tristate "AMD AI Engine" + depends on AMD_IOMMU + depends on DRM_ACCEL + depends on PCI && HAS_IOMEM + depends on X86_64 + select DRM_SCHED + select DRM_GEM_SHMEM_HELPER + select FW_LOADER + select HMM_MIRROR + help + Choose this option to enable support for NPU integrated into AMD + client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to + accelerate machine learning applications. + + If "M" is selected, the driver module will be amdxdna. diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile new file mode 100644 index 000000000000..6344aaf523fa --- /dev/null +++ b/drivers/accel/amdxdna/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0-only + +amdxdna-y := \ + aie2_ctx.o \ + aie2_error.o \ + aie2_message.o \ + aie2_pci.o \ + aie2_pm.o \ + aie2_psp.o \ + aie2_smu.o \ + aie2_solver.o \ + amdxdna_ctx.o \ + amdxdna_gem.o \ + amdxdna_mailbox.o \ + amdxdna_mailbox_helper.o \ + amdxdna_pci_drv.o \ + amdxdna_pm.o \ + amdxdna_sysfs.o \ + amdxdna_ubuf.o \ + npu1_regs.o \ + npu2_regs.o \ + npu4_regs.o \ + npu5_regs.o \ + npu6_regs.o +obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO new file mode 100644 index 000000000000..0e4bbebeaedf --- /dev/null +++ b/drivers/accel/amdxdna/TODO @@ -0,0 +1 @@ +- Add debugfs support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c new file mode 100644 index 000000000000..42d876a427c5 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -0,0 +1,1079 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/drm_syncobj.h> +#include <linux/hmm.h> +#include <linux/types.h> +#include <linux/xarray.h> +#include <trace/events/amdxdna.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "aie2_solver.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" + +static bool force_cmdlist; +module_param(force_cmdlist, bool, 0600); +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); + +#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ + +static void aie2_job_release(struct kref *ref) +{ + struct amdxdna_sched_job *job; + + job = container_of(ref, struct amdxdna_sched_job, refcnt); + amdxdna_sched_job_cleanup(job); + atomic64_inc(&job->hwctx->job_free_cnt); + wake_up(&job->hwctx->priv->job_free_wq); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); +} + +static void aie2_job_put(struct amdxdna_sched_job *job) +{ + kref_put(&job->refcnt, aie2_job_release); +} + +static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx) +{ + hwctx->old_status = hwctx->status; + hwctx->status = HWCTX_STAT_STOP; +} + +static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx) +{ + hwctx->status = hwctx->old_status; +} + +/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, + struct drm_sched_job *bad_job) +{ + drm_sched_stop(&hwctx->priv->sched, bad_job); + aie2_destroy_context(xdna->dev_handle, hwctx); +} + +static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_gem_obj *heap = hwctx->priv->heap; + int ret; + + ret = aie2_create_context(xdna->dev_handle, hwctx); + if (ret) { + XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); + goto out; + } + + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, + heap->mem.userptr, heap->mem.size); + if (ret) { + XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); + goto out; + } + + if (hwctx->status != HWCTX_STAT_READY) { + XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); + goto out; + } + + ret = aie2_config_cu(hwctx, NULL); + if (ret) { + XDNA_ERR(xdna, "Config cu failed, ret %d", ret); + goto out; + } + +out: + drm_sched_start(&hwctx->priv->sched, 0); + XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); + return ret; +} + +static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) +{ + struct dma_fence *fence, *out_fence = NULL; + int ret; + + fence = drm_syncobj_fence_get(hwctx->priv->syncobj); + if (!fence) + return NULL; + + ret = dma_fence_chain_find_seqno(&fence, seq); + if (ret) + goto out; + + out_fence = dma_fence_get(dma_fence_chain_contained(fence)); + +out: + dma_fence_put(fence); + return out_fence; +} + +static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) +{ + struct dma_fence *fence; + + fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); + if (!fence) + return; + + /* Wait up to 2 seconds for fw to finish all pending requests */ + dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000)); + dma_fence_put(fence); +} + +static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + aie2_hwctx_wait_for_idle(hwctx); + aie2_hwctx_stop(xdna, hwctx, NULL); + aie2_hwctx_status_shift_stop(hwctx); + + return 0; +} + +void aie2_hwctx_suspend(struct amdxdna_client *client) +{ + struct amdxdna_dev *xdna = client->xdna; + + /* + * Command timeout is unlikely. But if it happens, it doesn't + * break the system. aie2_hwctx_stop() will destroy mailbox + * and abort all commands. + */ + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb); +} + +static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + aie2_hwctx_status_restore(hwctx); + return aie2_hwctx_restart(xdna, hwctx); +} + +int aie2_hwctx_resume(struct amdxdna_client *client) +{ + /* + * The resume path cannot guarantee that mailbox channel can be + * regenerated. If this happen, when submit message to this + * mailbox channel, error will return. + */ + return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb); +} + +static void +aie2_sched_notify(struct amdxdna_sched_job *job) +{ + struct dma_fence *fence = job->fence; + + trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); + + amdxdna_pm_suspend_put(job->hwctx->client->xdna); + job->hwctx->priv->completed++; + dma_fence_signal(fence); + + up(&job->hwctx->priv->job_sem); + job->job_done = true; + mmput_async(job->mm); + aie2_job_put(job); +} + +static int +aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + struct amdxdna_gem_obj *cmd_abo; + int ret = 0; + u32 status; + + cmd_abo = job->cmd_bo; + + if (unlikely(job->job_timeout)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + ret = -EINVAL; + goto out; + } + + if (unlikely(!data) || unlikely(size != sizeof(u32))) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + + status = readl(data); + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + if (status == AIE2_STATUS_SUCCESS) + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); + else + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); + +out: + aie2_sched_notify(job); + return ret; +} + +static int +aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + int ret = 0; + + if (unlikely(!data)) + goto out; + + if (unlikely(size != sizeof(u32))) { + ret = -EINVAL; + goto out; + } + + job->drv_cmd->result = readl(data); + +out: + aie2_sched_notify(job); + return ret; +} + +static int +aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) +{ + struct amdxdna_sched_job *job = handle; + struct amdxdna_gem_obj *cmd_abo; + struct amdxdna_dev *xdna; + u32 fail_cmd_status; + u32 fail_cmd_idx; + u32 cmd_status; + int ret = 0; + + cmd_abo = job->cmd_bo; + + if (unlikely(job->job_timeout)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + ret = -EINVAL; + goto out; + } + + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + + cmd_status = readl(data + offsetof(struct cmd_chain_resp, status)); + xdna = job->hwctx->client->xdna; + XDNA_DBG(xdna, "Status 0x%x", cmd_status); + if (cmd_status == AIE2_STATUS_SUCCESS) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); + goto out; + } + + /* Slow path to handle error, read from ringbuf on BAR */ + fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx)); + fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status)); + XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", + fail_cmd_idx, fail_cmd_status); + + if (fail_cmd_status == AIE2_STATUS_SUCCESS) { + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + ret = -EINVAL; + goto out; + } + amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); + + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { + struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); + + cc->error_index = fail_cmd_idx; + if (cc->error_index >= cc->command_count) + cc->error_index = 0; + } +out: + aie2_sched_notify(job); + return ret; +} + +static struct dma_fence * +aie2_sched_job_run(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_hwctx *hwctx = job->hwctx; + struct dma_fence *fence; + int ret; + + if (!mmget_not_zero(job->mm)) + return ERR_PTR(-ESRCH); + + kref_get(&job->refcnt); + fence = dma_fence_get(job->fence); + + if (job->drv_cmd) { + switch (job->drv_cmd->opcode) { + case SYNC_DEBUG_BO: + ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); + break; + case ATTACH_DEBUG_BO: + ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); + break; + default: + ret = -EINVAL; + break; + } + goto out; + } + + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); + + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) + ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); + else if (force_cmdlist) + ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); + else + ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); + +out: + if (ret) { + dma_fence_put(job->fence); + aie2_job_put(job); + mmput(job->mm); + fence = ERR_PTR(ret); + } + trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); + + return fence; +} + +static void aie2_sched_job_free(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_hwctx *hwctx = job->hwctx; + + trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); + if (!job->job_done) + up(&hwctx->priv->job_sem); + + drm_sched_job_cleanup(sched_job); + aie2_job_put(job); +} + +static enum drm_gpu_sched_stat +aie2_sched_job_timedout(struct drm_sched_job *sched_job) +{ + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); + struct amdxdna_hwctx *hwctx = job->hwctx; + struct amdxdna_dev *xdna; + + xdna = hwctx->client->xdna; + trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); + job->job_timeout = true; + mutex_lock(&xdna->dev_lock); + aie2_hwctx_stop(xdna, hwctx, sched_job); + + aie2_hwctx_restart(xdna, hwctx); + mutex_unlock(&xdna->dev_lock); + + return DRM_GPU_SCHED_STAT_RESET; +} + +static const struct drm_sched_backend_ops sched_ops = { + .run_job = aie2_sched_job_run, + .free_job = aie2_sched_job_free, + .timedout_job = aie2_sched_job_timedout, +}; + +static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_dev_hdl *ndev; + int start, end, first, last; + u32 width = 1, entries = 0; + int i; + + if (!hwctx->num_tiles) { + XDNA_ERR(xdna, "Number of tiles is zero"); + return -EINVAL; + } + + ndev = xdna->dev_handle; + if (unlikely(!ndev->metadata.core.row_count)) { + XDNA_WARN(xdna, "Core tile row count is zero"); + return -EINVAL; + } + + hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count; + if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { + XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); + return -EINVAL; + } + + if (ndev->priv->col_align == COL_ALIGN_NATURE) + width = hwctx->num_col; + + /* + * In range [start, end], find out columns that is multiple of width. + * 'first' is the first column, + * 'last' is the last column, + * 'entries' is the total number of columns. + */ + start = xdna->dev_info->first_col; + end = ndev->total_col - hwctx->num_col; + if (start > 0 && end == 0) { + XDNA_DBG(xdna, "Force start from col 0"); + start = 0; + } + first = start + (width - start % width) % width; + last = end - end % width; + if (last >= first) + entries = (last - first) / width + 1; + XDNA_DBG(xdna, "start %d end %d first %d last %d", + start, end, first, last); + + if (unlikely(!entries)) { + XDNA_ERR(xdna, "Start %d end %d width %d", + start, end, width); + return -EINVAL; + } + + hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); + if (!hwctx->col_list) + return -ENOMEM; + + hwctx->col_list_len = entries; + hwctx->col_list[0] = first; + for (i = 1; i < entries; i++) + hwctx->col_list[i] = hwctx->col_list[i - 1] + width; + + print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, + entries * sizeof(*hwctx->col_list), false); + return 0; +} + +static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct alloc_requests *xrs_req; + int ret; + + xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); + if (!xrs_req) + return -ENOMEM; + + xrs_req->cdo.start_cols = hwctx->col_list; + xrs_req->cdo.cols_len = hwctx->col_list_len; + xrs_req->cdo.ncols = hwctx->num_col; + xrs_req->cdo.qos_cap.opc = hwctx->max_opc; + + xrs_req->rqos.gops = hwctx->qos.gops; + xrs_req->rqos.fps = hwctx->qos.fps; + xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; + xrs_req->rqos.latency = hwctx->qos.latency; + xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; + xrs_req->rqos.priority = hwctx->qos.priority; + + xrs_req->rid = (uintptr_t)hwctx; + + ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); + if (ret) + XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); + + kfree(xrs_req); + return ret; +} + +static void aie2_release_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + int ret; + + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); + if (ret) + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); +} + +static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct drm_file *filp = hwctx->client->filp; + struct drm_syncobj *syncobj; + u32 hdl; + int ret; + + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; + + ret = drm_syncobj_create(&syncobj, 0, NULL); + if (ret) { + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); + return ret; + } + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); + if (ret) { + drm_syncobj_put(syncobj); + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); + return ret; + } + hwctx->priv->syncobj = syncobj; + hwctx->syncobj_hdl = hdl; + + return 0; +} + +static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) +{ + /* + * The syncobj_hdl is owned by user space and will be cleaned up + * separately. + */ + drm_syncobj_put(hwctx->priv->syncobj); +} + +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + const struct drm_sched_init_args args = { + .ops = &sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = HWCTX_MAX_CMDS, + .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT), + .name = "amdxdna_js", + .dev = xdna->ddev.dev, + }; + struct drm_gpu_scheduler *sched; + struct amdxdna_hwctx_priv *priv; + struct amdxdna_gem_obj *heap; + int i, ret; + + priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + hwctx->priv = priv; + + mutex_lock(&client->mm_lock); + heap = client->dev_heap; + if (!heap) { + XDNA_ERR(xdna, "The client dev heap object not exist"); + mutex_unlock(&client->mm_lock); + ret = -ENOENT; + goto free_priv; + } + drm_gem_object_get(to_gobj(heap)); + mutex_unlock(&client->mm_lock); + priv->heap = heap; + sema_init(&priv->job_sem, HWCTX_MAX_CMDS); + + ret = amdxdna_gem_pin(heap); + if (ret) { + XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); + goto put_heap; + } + + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { + struct amdxdna_gem_obj *abo; + struct amdxdna_drm_create_bo args = { + .flags = 0, + .type = AMDXDNA_BO_DEV, + .vaddr = 0, + .size = MAX_CHAIN_CMDBUF_SIZE, + }; + + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp); + if (IS_ERR(abo)) { + ret = PTR_ERR(abo); + goto free_cmd_bufs; + } + + XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", + i, abo->mem.dev_addr, abo->mem.size); + priv->cmd_buf[i] = abo; + } + + sched = &priv->sched; + mutex_init(&priv->io_lock); + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&priv->io_lock); + fs_reclaim_release(GFP_KERNEL); + + ret = drm_sched_init(sched, &args); + if (ret) { + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); + goto free_cmd_bufs; + } + + ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (ret) { + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); + goto free_sched; + } + + ret = aie2_hwctx_col_list(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create col list failed, ret %d", ret); + goto free_entity; + } + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto free_col_list; + + ret = aie2_alloc_resource(hwctx); + if (ret) { + XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); + goto suspend_put; + } + + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, + heap->mem.userptr, heap->mem.size); + if (ret) { + XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); + goto release_resource; + } + + ret = aie2_ctx_syncobj_create(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); + goto release_resource; + } + amdxdna_pm_suspend_put(xdna); + + hwctx->status = HWCTX_STAT_INIT; + init_waitqueue_head(&priv->job_free_wq); + + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); + + return 0; + +release_resource: + aie2_release_resource(hwctx); +suspend_put: + amdxdna_pm_suspend_put(xdna); +free_col_list: + kfree(hwctx->col_list); +free_entity: + drm_sched_entity_destroy(&priv->entity); +free_sched: + drm_sched_fini(&priv->sched); +free_cmd_bufs: + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { + if (!priv->cmd_buf[i]) + continue; + drm_gem_object_put(to_gobj(priv->cmd_buf[i])); + } + amdxdna_gem_unpin(heap); +put_heap: + drm_gem_object_put(to_gobj(heap)); +free_priv: + kfree(priv); + return ret; +} + +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna; + int idx; + + xdna = hwctx->client->xdna; + + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); + aie2_hwctx_wait_for_idle(hwctx); + + /* Request fw to destroy hwctx and cancel the rest pending requests */ + aie2_release_resource(hwctx); + + mutex_unlock(&xdna->dev_lock); + drm_sched_entity_destroy(&hwctx->priv->entity); + + /* Wait for all submitted jobs to be completed or canceled */ + wait_event(hwctx->priv->job_free_wq, + atomic64_read(&hwctx->job_submit_cnt) == + atomic64_read(&hwctx->job_free_cnt)); + mutex_lock(&xdna->dev_lock); + + drm_sched_fini(&hwctx->priv->sched); + aie2_ctx_syncobj_destroy(hwctx); + + for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) + drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); + amdxdna_gem_unpin(hwctx->priv->heap); + drm_gem_object_put(to_gobj(hwctx->priv->heap)); + + mutex_destroy(&hwctx->priv->io_lock); + kfree(hwctx->col_list); + kfree(hwctx->priv); + kfree(hwctx->cus); +} + +static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size) +{ + struct amdxdna_hwctx *hwctx = handle; + + amdxdna_pm_suspend_put(hwctx->client->xdna); + return 0; +} + +static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) +{ + struct amdxdna_hwctx_param_config_cu *config = buf; + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 total_size; + int ret; + + XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); + if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) + return -EINVAL; + + if (hwctx->status != HWCTX_STAT_INIT) { + XDNA_ERR(xdna, "Not support re-config CU"); + return -EINVAL; + } + + if (!config->num_cus) { + XDNA_ERR(xdna, "Number of CU is zero"); + return -EINVAL; + } + + total_size = struct_size(config, cu_configs, config->num_cus); + if (total_size > size) { + XDNA_ERR(xdna, "CU config larger than size"); + return -EINVAL; + } + + hwctx->cus = kmemdup(config, total_size, GFP_KERNEL); + if (!hwctx->cus) + return -ENOMEM; + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto free_cus; + + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler); + if (ret) { + XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); + goto pm_suspend_put; + } + + wmb(); /* To avoid locking in command submit when check status */ + hwctx->status = HWCTX_STAT_READY; + + return 0; + +pm_suspend_put: + amdxdna_pm_suspend_put(xdna); +free_cus: + kfree(hwctx->cus); + hwctx->cus = NULL; + return ret; +} + +static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq) +{ + struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq); + + if (!out_fence) { + XDNA_ERR(hwctx->client->xdna, "Failed to get fence"); + return; + } + + dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(out_fence); +} + +static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl, + bool attach) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_drv_cmd cmd = { 0 }; + struct amdxdna_gem_obj *abo; + u64 seq; + int ret; + + abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV); + if (!abo) { + XDNA_ERR(xdna, "Get bo %d failed", bo_hdl); + return -EINVAL; + } + + if (attach) { + if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) { + ret = -EBUSY; + goto put_obj; + } + cmd.opcode = ATTACH_DEBUG_BO; + } else { + if (abo->assigned_hwctx != hwctx->id) { + ret = -EINVAL; + goto put_obj; + } + cmd.opcode = DETACH_DEBUG_BO; + } + + ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, + &bo_hdl, 1, hwctx->id, &seq); + if (ret) { + XDNA_ERR(xdna, "Submit command failed"); + goto put_obj; + } + + aie2_cmd_wait(hwctx, seq); + if (cmd.result) { + XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); + goto put_obj; + } + + if (attach) + abo->assigned_hwctx = hwctx->id; + else + abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; + + XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); + +put_obj: + amdxdna_gem_put_obj(abo); + return ret; +} + +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + switch (type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + return aie2_hwctx_cu_config(hwctx, buf, size); + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true); + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false); + default: + XDNA_DBG(xdna, "Not supported type %d", type); + return -EOPNOTSUPP; + } +} + +int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_drv_cmd cmd = { 0 }; + u64 seq; + int ret; + + cmd.opcode = SYNC_DEBUG_BO; + ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, + &debug_bo_hdl, 1, hwctx->id, &seq); + if (ret) { + XDNA_ERR(xdna, "Submit command failed"); + return ret; + } + + aie2_cmd_wait(hwctx, seq); + if (cmd.result) { + XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); + return -EINVAL; + } + + return 0; +} + +static int aie2_populate_range(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp; + unsigned long timeout; + struct mm_struct *mm; + bool found; + int ret; + + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); +again: + found = false; + down_write(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (mapp->invalid) { + found = true; + break; + } + } + + if (!found) { + abo->mem.map_invalid = false; + up_write(&xdna->notifier_lock); + return 0; + } + kref_get(&mapp->refcnt); + up_write(&xdna->notifier_lock); + + XDNA_DBG(xdna, "populate memory range %lx %lx", + mapp->vma->vm_start, mapp->vma->vm_end); + mm = mapp->notifier.mm; + if (!mmget_not_zero(mm)) { + amdxdna_umap_put(mapp); + return -EFAULT; + } + + mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); + mmap_read_lock(mm); + ret = hmm_range_fault(&mapp->range); + mmap_read_unlock(mm); + if (ret) { + if (time_after(jiffies, timeout)) { + ret = -ETIME; + goto put_mm; + } + + if (ret == -EBUSY) { + amdxdna_umap_put(mapp); + goto again; + } + + goto put_mm; + } + + down_write(&xdna->notifier_lock); + if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); + goto again; + } + mapp->invalid = false; + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); + goto again; + +put_mm: + amdxdna_umap_put(mapp); + mmput(mm); + return ret; +} + +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct ww_acquire_ctx acquire_ctx; + struct dma_fence_chain *chain; + struct amdxdna_gem_obj *abo; + unsigned long timeout = 0; + int ret, i; + + ret = down_interruptible(&hwctx->priv->job_sem); + if (ret) { + XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); + return ret; + } + + chain = dma_fence_chain_alloc(); + if (!chain) { + XDNA_ERR(xdna, "Alloc fence chain failed"); + ret = -ENOMEM; + goto up_sem; + } + + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx, + hwctx->client->filp->client_id); + if (ret) { + XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); + goto free_chain; + } + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto cleanup_job; + +retry: + ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + if (ret) { + XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); + goto suspend_put; + } + + for (i = 0; i < job->bo_cnt; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); + if (ret) { + XDNA_WARN(xdna, "Failed to reserve fences %d", ret); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + goto suspend_put; + } + } + + down_read(&xdna->notifier_lock); + for (i = 0; i < job->bo_cnt; i++) { + abo = to_xdna_obj(job->bos[i]); + if (abo->mem.map_invalid) { + up_read(&xdna->notifier_lock); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + if (!timeout) { + timeout = jiffies + + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + } else if (time_after(jiffies, timeout)) { + ret = -ETIME; + goto suspend_put; + } + + ret = aie2_populate_range(abo); + if (ret) + goto suspend_put; + goto retry; + } + } + + mutex_lock(&hwctx->priv->io_lock); + drm_sched_job_arm(&job->base); + job->out_fence = dma_fence_get(&job->base.s_fence->finished); + for (i = 0; i < job->bo_cnt; i++) + dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); + job->seq = hwctx->priv->seq++; + kref_get(&job->refcnt); + drm_sched_entity_push_job(&job->base); + + *seq = job->seq; + drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); + mutex_unlock(&hwctx->priv->io_lock); + + up_read(&xdna->notifier_lock); + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); + + aie2_job_put(job); + atomic64_inc(&hwctx->job_submit_cnt); + + return 0; + +suspend_put: + amdxdna_pm_suspend_put(xdna); +cleanup_job: + drm_sched_job_cleanup(&job->base); +free_chain: + dma_fence_chain_free(chain); +up_sem: + up(&hwctx->priv->job_sem); + job->job_done = true; + return ret; +} + +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, + unsigned long cur_seq) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct drm_gem_object *gobj = to_gobj(abo); + long ret; + + ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, + true, MAX_SCHEDULE_TIMEOUT); + if (!ret || ret == -ERESTARTSYS) + XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); +} diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c new file mode 100644 index 000000000000..d452008ec4f4 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_error.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/kernel.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "amdxdna_error.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +struct async_event { + struct amdxdna_dev_hdl *ndev; + struct async_event_msg_resp resp; + struct workqueue_struct *wq; + struct work_struct work; + u8 *buf; + dma_addr_t addr; + u32 size; +}; + +struct async_events { + struct workqueue_struct *wq; + u8 *buf; + dma_addr_t addr; + u32 size; + u32 event_cnt; + struct async_event event[] __counted_by(event_cnt); +}; + +/* + * Below enum, struct and lookup tables are porting from XAIE util header file. + * + * Below data is defined by AIE device and it is used for decode error message + * from the device. + */ + +enum aie_module_type { + AIE_MEM_MOD = 0, + AIE_CORE_MOD, + AIE_PL_MOD, + AIE_UNKNOWN_MOD, +}; + +enum aie_error_category { + AIE_ERROR_SATURATION = 0, + AIE_ERROR_FP, + AIE_ERROR_STREAM, + AIE_ERROR_ACCESS, + AIE_ERROR_BUS, + AIE_ERROR_INSTRUCTION, + AIE_ERROR_ECC, + AIE_ERROR_LOCK, + AIE_ERROR_DMA, + AIE_ERROR_MEM_PARITY, + /* Unknown is not from XAIE, added for better category */ + AIE_ERROR_UNKNOWN, +}; + +/* Don't pack, unless XAIE side changed */ +struct aie_error { + __u8 row; + __u8 col; + __u32 mod_type; + __u8 event_id; +}; + +struct aie_err_info { + u32 err_cnt; + u32 ret_code; + u32 rsvd; + struct aie_error payload[] __counted_by(err_cnt); +}; + +struct aie_event_category { + u8 event_id; + enum aie_error_category category; +}; + +#define EVENT_CATEGORY(id, cat) { id, cat } +static const struct aie_event_category aie_ml_mem_event_cat[] = { + EVENT_CATEGORY(88U, AIE_ERROR_ECC), + EVENT_CATEGORY(90U, AIE_ERROR_ECC), + EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY), + EVENT_CATEGORY(97U, AIE_ERROR_DMA), + EVENT_CATEGORY(98U, AIE_ERROR_DMA), + EVENT_CATEGORY(99U, AIE_ERROR_DMA), + EVENT_CATEGORY(100U, AIE_ERROR_DMA), + EVENT_CATEGORY(101U, AIE_ERROR_LOCK), +}; + +static const struct aie_event_category aie_ml_core_event_cat[] = { + EVENT_CATEGORY(55U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(56U, AIE_ERROR_STREAM), + EVENT_CATEGORY(57U, AIE_ERROR_STREAM), + EVENT_CATEGORY(58U, AIE_ERROR_BUS), + EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION), + EVENT_CATEGORY(60U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(62U, AIE_ERROR_ECC), + EVENT_CATEGORY(64U, AIE_ERROR_ECC), + EVENT_CATEGORY(65U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(66U, AIE_ERROR_ACCESS), + EVENT_CATEGORY(67U, AIE_ERROR_LOCK), + EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION), + EVENT_CATEGORY(71U, AIE_ERROR_STREAM), + EVENT_CATEGORY(72U, AIE_ERROR_BUS), +}; + +static const struct aie_event_category aie_ml_mem_tile_event_cat[] = { + EVENT_CATEGORY(130U, AIE_ERROR_ECC), + EVENT_CATEGORY(132U, AIE_ERROR_ECC), + EVENT_CATEGORY(133U, AIE_ERROR_DMA), + EVENT_CATEGORY(134U, AIE_ERROR_DMA), + EVENT_CATEGORY(135U, AIE_ERROR_STREAM), + EVENT_CATEGORY(136U, AIE_ERROR_STREAM), + EVENT_CATEGORY(137U, AIE_ERROR_STREAM), + EVENT_CATEGORY(138U, AIE_ERROR_BUS), + EVENT_CATEGORY(139U, AIE_ERROR_LOCK), +}; + +static const struct aie_event_category aie_ml_shim_tile_event_cat[] = { + EVENT_CATEGORY(64U, AIE_ERROR_BUS), + EVENT_CATEGORY(65U, AIE_ERROR_STREAM), + EVENT_CATEGORY(66U, AIE_ERROR_STREAM), + EVENT_CATEGORY(67U, AIE_ERROR_BUS), + EVENT_CATEGORY(68U, AIE_ERROR_BUS), + EVENT_CATEGORY(69U, AIE_ERROR_BUS), + EVENT_CATEGORY(70U, AIE_ERROR_BUS), + EVENT_CATEGORY(71U, AIE_ERROR_BUS), + EVENT_CATEGORY(72U, AIE_ERROR_DMA), + EVENT_CATEGORY(73U, AIE_ERROR_DMA), + EVENT_CATEGORY(74U, AIE_ERROR_LOCK), +}; + +static const enum amdxdna_error_num aie_cat_err_num_map[] = { + [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION, + [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP, + [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM, + [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS, + [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS, + [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC, + [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK, + [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA, + [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1); + +static const enum amdxdna_error_module aie_err_mod_map[] = { + [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY, + [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE, + [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL, + [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1); + +static enum aie_error_category +aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) +{ + const struct aie_event_category *lut; + int num_entry; + int i; + + switch (mod_type) { + case AIE_PL_MOD: + lut = aie_ml_shim_tile_event_cat; + num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat); + break; + case AIE_CORE_MOD: + lut = aie_ml_core_event_cat; + num_entry = ARRAY_SIZE(aie_ml_core_event_cat); + break; + case AIE_MEM_MOD: + if (row == 1) { + lut = aie_ml_mem_tile_event_cat; + num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat); + } else { + lut = aie_ml_mem_event_cat; + num_entry = ARRAY_SIZE(aie_ml_mem_event_cat); + } + break; + default: + return AIE_ERROR_UNKNOWN; + } + + for (i = 0; i < num_entry; i++) { + if (event_id != lut[i].event_id) + continue; + + if (lut[i].category > AIE_ERROR_UNKNOWN) + return AIE_ERROR_UNKNOWN; + + return lut[i].category; + } + + return AIE_ERROR_UNKNOWN; +} + +static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) +{ + struct aie_error *errs = err_info; + enum amdxdna_error_module err_mod; + enum aie_error_category aie_err; + enum amdxdna_error_num err_num; + struct aie_error *last_err; + + last_err = &errs[num_err - 1]; + if (last_err->mod_type >= AIE_UNKNOWN_MOD) { + err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN]; + err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD]; + } else { + aie_err = aie_get_error_category(last_err->row, + last_err->event_id, + last_err->mod_type); + err_num = aie_cat_err_num_map[aie_err]; + err_mod = aie_err_mod_map[last_err->mod_type]; + } + + ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod); + ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real()); + ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col); +} + +static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) +{ + struct aie_error *errs = err_info; + u32 err_col = 0; /* assume that AIE has less than 32 columns */ + int i; + + /* Get err column bitmap */ + for (i = 0; i < num_err; i++) { + struct aie_error *err = &errs[i]; + enum aie_error_category cat; + + cat = aie_get_error_category(err->row, err->event_id, err->mod_type); + XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d", + err->row, err->col, err->mod_type, + err->event_id, cat); + + if (err->col >= 32) { + XDNA_WARN(ndev->xdna, "Invalid column number"); + break; + } + + err_col |= (1 << err->col); + } + + return err_col; +} + +static int aie2_error_async_cb(void *handle, void __iomem *data, size_t size) +{ + struct async_event *e = handle; + + if (data) { + e->resp.type = readl(data + offsetof(struct async_event_msg_resp, type)); + wmb(); /* Update status in the end, so that no lock for here */ + e->resp.status = readl(data + offsetof(struct async_event_msg_resp, status)); + } + queue_work(e->wq, &e->work); + return 0; +} + +static int aie2_error_event_send(struct async_event *e) +{ + drm_clflush_virt_range(e->buf, e->size); /* device can access */ + return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e, + aie2_error_async_cb); +} + +static void aie2_error_worker(struct work_struct *err_work) +{ + struct aie_err_info *info; + struct amdxdna_dev *xdna; + struct async_event *e; + u32 max_err; + u32 err_col; + + e = container_of(err_work, struct async_event, work); + + xdna = e->ndev->xdna; + + if (e->resp.status == MAX_AIE2_STATUS_CODE) + return; + + e->resp.status = MAX_AIE2_STATUS_CODE; + + print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4, + e->buf, 0x100, false); + + info = (struct aie_err_info *)e->buf; + XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code); + + max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error); + if (unlikely(info->err_cnt > max_err)) { + WARN_ONCE(1, "Error count too large %d\n", info->err_cnt); + return; + } + err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt); + if (!err_col) { + XDNA_WARN(xdna, "Did not get error column"); + return; + } + + mutex_lock(&xdna->dev_lock); + aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt); + + /* Re-sent this event to firmware */ + if (aie2_error_event_send(e)) + XDNA_WARN(xdna, "Unable to register async event"); + mutex_unlock(&xdna->dev_lock); +} + +void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct async_events *events; + + events = ndev->async_events; + + mutex_unlock(&xdna->dev_lock); + destroy_workqueue(events->wq); + mutex_lock(&xdna->dev_lock); + + dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, + events->addr, DMA_FROM_DEVICE); + kfree(events); +} + +int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + u32 total_col = ndev->total_col; + u32 total_size = ASYNC_BUF_SIZE * total_col; + struct async_events *events; + int i, ret; + + events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL); + if (!events) + return -ENOMEM; + + events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!events->buf) { + ret = -ENOMEM; + goto free_events; + } + events->size = total_size; + events->event_cnt = total_col; + + events->wq = alloc_ordered_workqueue("async_wq", 0); + if (!events->wq) { + ret = -ENOMEM; + goto free_buf; + } + + for (i = 0; i < events->event_cnt; i++) { + struct async_event *e = &events->event[i]; + u32 offset = i * ASYNC_BUF_SIZE; + + e->ndev = ndev; + e->wq = events->wq; + e->buf = &events->buf[offset]; + e->addr = events->addr + offset; + e->size = ASYNC_BUF_SIZE; + e->resp.status = MAX_AIE2_STATUS_CODE; + INIT_WORK(&e->work, aie2_error_worker); + + ret = aie2_error_event_send(e); + if (ret) + goto free_wq; + } + + ndev->async_events = events; + + XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x", + events->event_cnt, events->size); + return 0; + +free_wq: + destroy_workqueue(events->wq); +free_buf: + dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, + events->addr, DMA_FROM_DEVICE); +free_events: + kfree(events); + return ret; +} + +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + args->num_element = 1; + args->element_size = sizeof(ndev->last_async_err); + if (copy_to_user(u64_to_user_ptr(args->buffer), + &ndev->last_async_err, args->element_size)) + return -EFAULT; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c new file mode 100644 index 000000000000..d493bb1c3360 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_message.c @@ -0,0 +1,1074 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/bitfield.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/xarray.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_mailbox_helper.h" +#include "amdxdna_pci_drv.h" + +#define DECLARE_AIE2_MSG(name, op) \ + DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) + +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) + +static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, + struct xdna_mailbox_msg *msg) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct xdna_notify *hdl = msg->handle; + int ret; + + if (!ndev->mgmt_chann) + return -ENODEV; + + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock)); + ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); + if (ret == -ETIME) { + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; + } + + if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { + XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", + msg->opcode, *hdl->data); + ret = -EINVAL; + } + + return ret; +} + +int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) +{ + DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND); + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) +{ + DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME); + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value) +{ + DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG); + int ret; + + req.type = type; + req.value = value; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret); + return ret; + } + + return 0; +} + +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) +{ + DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG); + int ret; + + req.type = type; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret); + return ret; + } + + *value = resp.value; + return 0; +} + +int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid) +{ + DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID); + + req.pasid = pasid; + + return aie2_send_mgmt_msg_wait(ndev, &msg); +} + +int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version) +{ + DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed", + resp.major, resp.minor); + + version->major = resp.major; + version->minor = resp.minor; + + return 0; +} + +int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata) +{ + DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO); + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + metadata->size = resp.info.size; + metadata->cols = resp.info.cols; + metadata->rows = resp.info.rows; + + metadata->version.major = resp.info.major; + metadata->version.minor = resp.info.minor; + + metadata->core.row_count = resp.info.core_rows; + metadata->core.row_start = resp.info.core_row_start; + metadata->core.dma_channel_count = resp.info.core_dma_channels; + metadata->core.lock_count = resp.info.core_locks; + metadata->core.event_reg_count = resp.info.core_events; + + metadata->mem.row_count = resp.info.mem_rows; + metadata->mem.row_start = resp.info.mem_row_start; + metadata->mem.dma_channel_count = resp.info.mem_dma_channels; + metadata->mem.lock_count = resp.info.mem_locks; + metadata->mem.event_reg_count = resp.info.mem_events; + + metadata->shim.row_count = resp.info.shim_rows; + metadata->shim.row_start = resp.info.shim_row_start; + metadata->shim.dma_channel_count = resp.info.shim_dma_channels; + metadata->shim.lock_count = resp.info.shim_locks; + metadata->shim.event_reg_count = resp.info.shim_events; + + return 0; +} + +int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, + struct amdxdna_fw_ver *fw_ver) +{ + DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION); + int ret; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + fw_ver->major = resp.major; + fw_ver->minor = resp.minor; + fw_ver->sub = resp.sub; + fw_ver->build = resp.build; + + return 0; +} + +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + struct xdna_mailbox_chann_res x2i; + struct xdna_mailbox_chann_res i2x; + struct cq_pair *cq_pair; + u32 intr_reg; + int ret; + + req.aie_type = 1; + req.start_col = hwctx->start_col; + req.num_col = hwctx->num_col; + req.num_cq_pairs_requested = 1; + req.pasid = hwctx->client->pasid; + req.context_priority = 2; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + hwctx->fw_ctx_id = resp.context_id; + WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + + if (ndev->force_preempt_enabled) { + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); + if (ret) { + XDNA_ERR(xdna, "failed to enable force preempt %d", ret); + return ret; + } + } + + cq_pair = &resp.cq_pair[0]; + x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); + x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); + x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); + x2i.rb_size = cq_pair->x2i_q.buf_size; + + i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); + i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); + i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); + i2x.rb_size = cq_pair->i2x_q.buf_size; + + ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); + if (ret == -EINVAL) { + XDNA_ERR(xdna, "not able to create channel"); + goto out_destroy_context; + } + + intr_reg = i2x.mb_head_ptr_reg + 4; + hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, + intr_reg, ret); + if (!hwctx->priv->mbox_chann) { + XDNA_ERR(xdna, "not able to create channel"); + ret = -EINVAL; + goto out_destroy_context; + } + ndev->hwctx_num++; + + XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", + hwctx->name, ret, resp.msix_id); + XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name, + hwctx->fw_ctx_id, hwctx->client->pasid); + + return 0; + +out_destroy_context: + aie2_destroy_context(ndev, hwctx); + return ret; +} + +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + if (hwctx->fw_ctx_id == -1) + return 0; + + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); + + req.context_id = hwctx->fw_ctx_id; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret); + + xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, + hwctx->fw_ctx_id); + hwctx->priv->mbox_chann = NULL; + hwctx->fw_ctx_id = -1; + ndev->hwctx_num--; + + return ret; +} + +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) +{ + DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + req.context_id = context_id; + req.buf_addr = addr; + req.buf_size = size; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx", + context_id, addr, size); + + return 0; +} + +static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg) +{ + u32 *bitmap = arg; + + *bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col); + + return 0; +} + +int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, + u32 size, u32 *cols_filled) +{ + DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS); + struct amdxdna_dev *xdna = ndev->xdna; + struct amdxdna_client *client; + dma_addr_t dma_addr; + u32 aie_bitmap = 0; + u8 *buff_addr; + int ret; + + buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!buff_addr) + return -ENOMEM; + + /* Go through each hardware context and mark the AIE columns that are active */ + list_for_each_entry(client, &xdna->client_list, node) + amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map); + + *cols_filled = 0; + req.dump_buff_addr = dma_addr; + req.dump_buff_size = size; + req.num_cols = hweight32(aie_bitmap); + req.aie_bitmap = aie_bitmap; + + drm_clflush_virt_range(buff_addr, size); /* device can access */ + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(xdna, "Error during NPU query, status %d", ret); + goto fail; + } + + XDNA_DBG(xdna, "Query NPU status completed"); + + if (size < resp.size) { + ret = -EINVAL; + XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); + goto fail; + } + + if (copy_to_user(buf, buff_addr, resp.size)) { + ret = -EFAULT; + XDNA_ERR(xdna, "Failed to copy NPU status to user space"); + goto fail; + } + + *cols_filled = aie_bitmap; + +fail: + dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE); + return ret; +} + +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header) +{ + DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY); + struct amdxdna_dev *xdna = ndev->xdna; + dma_addr_t dma_addr; + u8 *addr; + int ret; + + if (header->type >= MAX_TELEMETRY_TYPE) + return -EINVAL; + + addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + req.buf_addr = dma_addr; + req.buf_size = size; + req.type = header->type; + + drm_clflush_virt_range(addr, size); /* device can access */ + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed, status %d", ret); + goto free_buf; + } + + if (size < resp.size) { + ret = -EINVAL; + XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); + goto free_buf; + } + + if (copy_to_user(buf, addr, resp.size)) { + ret = -EFAULT; + XDNA_ERR(xdna, "Failed to copy telemetry to user space"); + goto free_buf; + } + + header->major = resp.major; + header->minor = resp.minor; + +free_buf: + dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE); + return ret; +} + +int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, + void *handle, int (*cb)(void*, void __iomem *, size_t)) +{ + struct async_event_msg_req req = { 0 }; + struct xdna_mailbox_msg msg = { + .send_data = (u8 *)&req, + .send_size = sizeof(req), + .handle = handle, + .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG, + .notify_cb = cb, + }; + + req.buf_addr = addr; + req.buf_size = size; + + XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size); + return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT); +} + +int aie2_config_cu(struct amdxdna_hwctx *hwctx, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 shift = xdna->dev_info->dev_mem_buf_shift; + struct config_cu_req req = { 0 }; + struct xdna_mailbox_msg msg; + struct drm_gem_object *gobj; + struct amdxdna_gem_obj *abo; + int i; + + if (!chann) + return -ENODEV; + + if (hwctx->cus->num_cus > MAX_NUM_CUS) { + XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS); + return -EINVAL; + } + + for (i = 0; i < hwctx->cus->num_cus; i++) { + struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i]; + + if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad))) + return -EINVAL; + + gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo); + if (!gobj) { + XDNA_ERR(xdna, "Lookup GEM object failed"); + return -EINVAL; + } + abo = to_xdna_obj(gobj); + + if (abo->type != AMDXDNA_BO_DEV) { + drm_gem_object_put(gobj); + XDNA_ERR(xdna, "Invalid BO type"); + return -EINVAL; + } + + req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR, + abo->mem.dev_addr >> shift); + req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func); + XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i, + abo->mem.dev_addr, req.cfgs[i]); + drm_gem_object_put(gobj); + } + req.num_cus = hwctx->cus->num_cus; + + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.handle = hwctx; + msg.opcode = MSG_OP_CONFIG_CU; + msg.notify_cb = notify_cb; + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); +} + +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) +{ + struct execute_buffer_req *cu_req = req; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len > sizeof(cu_req->payload)) + return -EINVAL; + + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cu_req->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memcpy(cu_req->payload, cmd, cmd_len); + + *size = sizeof(*cu_req); + *msg_op = MSG_OP_EXECUTE_BUFFER_CF; + return 0; +} + +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) +{ + struct exec_dpu_req *dpu_req = req; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload)) + return -EINVAL; + + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_req->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + dpu_req->inst_buf_addr = sn->buffer; + dpu_req->inst_size = sn->buffer_size; + dpu_req->inst_prop_cnt = sn->prop_count; + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn)); + + *size = sizeof(*dpu_req); + *msg_op = MSG_OP_EXEC_DPU; + return 0; +} + +static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_req *chain_req = req; + + chain_req->buf_addr = slot_addr; + chain_req->buf_size = size; + chain_req->count = cmd_cnt; +} + +static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_npu_req *npu_chain_req = req; + + npu_chain_req->flags = 0; + npu_chain_req->reserved = 0; + npu_chain_req->buf_addr = slot_addr; + npu_chain_req->buf_size = size; + npu_chain_req->count = cmd_cnt; +} + +static int +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_execbuf_cf *cf_slot = slot; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*cf_slot) + cmd_len) + return -EINVAL; + + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cf_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + cf_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(cf_slot->args, cmd, cmd_len); + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*cf_slot) + cmd_len; + return 0; +} + +static int +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_dpu *dpu_slot = slot; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + u32 arg_sz; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*dpu_slot) + arg_sz) + return -EINVAL; + + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + dpu_slot->inst_buf_addr = sn->buffer; + dpu_slot->inst_size = sn->buffer_size; + dpu_slot->inst_prop_cnt = sn->prop_count; + dpu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(dpu_slot->args, sn->prop_args, arg_sz); + + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*dpu_slot) + arg_sz; + return 0; +} + +static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + return -EOPNOTSUPP; +} + +static u32 aie2_get_chain_msg_op(u32 cmd_op) +{ + switch (cmd_op) { + case ERT_START_CU: + return MSG_OP_CHAIN_EXEC_BUFFER_CF; + case ERT_START_NPU: + return MSG_OP_CHAIN_EXEC_DPU; + default: + break; + } + + return MSG_OP_MAX_OPCODE; +} + +static struct aie2_exec_msg_ops legacy_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_exec_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_cf, + .fill_dpu_slot = aie2_cmdlist_fill_dpu, + .fill_preempt_slot = aie2_cmdlist_unsupp, + .fill_elf_slot = aie2_cmdlist_unsupp, + .get_chain_msg_op = aie2_get_chain_msg_op, +}; + +static int +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*npu_slot) + cmd_len) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_NON_ELF; + npu_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(npu_slot->args, cmd, cmd_len); + + *size = sizeof(*npu_slot) + cmd_len; + return 0; +} + +static int +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + u32 arg_sz; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; + npu_slot->inst_buf_addr = sn->buffer; + npu_slot->inst_size = sn->buffer_size; + npu_slot->inst_prop_cnt = sn->prop_count; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, sn->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PREEMPT; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, pd->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_ELF; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = 1; + npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN; + + *size = struct_size(npu_slot, args, npu_slot->arg_cnt); + return 0; +} + +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) +{ + return MSG_OP_CHAIN_EXEC_NPU; +} + +static struct aie2_exec_msg_ops npu_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_npu_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_npu_cf, + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, + .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt, + .fill_elf_slot = aie2_cmdlist_fill_npu_elf, + .get_chain_msg_op = aie2_get_npu_chain_msg_op, +}; + +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *msg_op) +{ + struct amdxdna_dev *xdna = cmd_abo->client->xdna; + int ret; + u32 op; + + + op = amdxdna_cmd_get_op(cmd_abo); + switch (op) { + case ERT_START_CU: + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init CU req failed ret %d", ret); + return ret; + } + break; + case ERT_START_NPU: + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret); + return ret; + } + + break; + default: + XDNA_ERR(xdna, "Unsupported op %d", op); + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +static int +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *cmd_op) +{ + struct amdxdna_dev *xdna = cmd_abo->client->xdna; + int ret; + u32 op; + + op = amdxdna_cmd_get_op(cmd_abo); + if (*cmd_op == ERT_INVALID_CMD) + *cmd_op = op; + else if (op != *cmd_op) + return -EINVAL; + + switch (op) { + case ERT_START_CU: + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU: + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT_ELF: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size); + break; + default: + XDNA_INFO(xdna, "Unsupported op %d", op); + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +void aie2_msg_init(struct amdxdna_dev_hdl *ndev) +{ + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND)) + ndev->exec_msg_ops = &npu_exec_message_ops; + else + ndev->exec_msg_ops = &legacy_exec_message_ops; +} + +static inline struct amdxdna_gem_obj * +aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) +{ + int idx = get_job_idx(job->seq); + + return job->hwctx->priv->cmd_buf[idx]; +} + +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct xdna_mailbox_msg msg; + union exec_req req; + int ret; + + if (!chann) + return -ENODEV; + + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode); + if (ret) + return ret; + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, + 0x40, false); + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_client *client = hwctx->client; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_cmd_chain *payload; + struct xdna_mailbox_msg msg; + union exec_chain_req req; + u32 payload_len; + u32 offset = 0; + size_t size; + int ret; + u32 op; + u32 i; + + op = amdxdna_cmd_get_op(cmd_abo); + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); + if (op != ERT_CMD_CHAIN || !payload || + payload_len < struct_size(payload, data, payload->command_count)) + return -EINVAL; + + op = ERT_INVALID_CMD; + for (i = 0; i < payload->command_count; i++) { + u32 boh = (u32)(payload->data[i]); + struct amdxdna_gem_obj *abo; + + abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); + if (!abo) { + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh); + return -ENOENT; + } + + size = cmdbuf_abo->mem.size - offset; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset, + abo, &size, &op); + amdxdna_gem_put_obj(abo); + if (ret) + return ret; + + offset += size; + } + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; + + /* The offset is the accumulated total size of the cmd buffer */ + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + offset, payload->command_count); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct xdna_mailbox_msg msg; + union exec_chain_req req; + u32 op = ERT_INVALID_CMD; + size_t size; + int ret; + + size = cmdbuf_abo->mem.size; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op); + if (ret) + return ret; + + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; + + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + size, 1); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(hwctx->client->xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct xdna_mailbox_msg msg; + struct sync_bo_req req; + int ret = 0; + + req.src_addr = 0; + req.dst_addr = amdxdna_dev_bo_offset(abo); + req.size = abo->mem.size; + + /* Device to Host */ + req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | + FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); + + XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", + req.size, req.src_addr, req.dst_addr); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.opcode = MSG_OP_SYNC_BO; + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; + } + + return 0; +} + +int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) +{ + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct config_debug_bo_req req; + struct xdna_mailbox_msg msg; + + if (job->drv_cmd->opcode == ATTACH_DEBUG_BO) + req.config = DEBUG_BO_REGISTER; + else + req.config = DEBUG_BO_UNREGISTER; + + req.offset = amdxdna_dev_bo_offset(abo); + req.size = abo->mem.size; + + XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d", + req.offset, req.size, req.config); + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + msg.send_size = sizeof(req); + msg.opcode = MSG_OP_CONFIG_DEBUG_BO; + + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); +} diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h new file mode 100644 index 000000000000..1c957a6298d3 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -0,0 +1,448 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_MSG_PRIV_H_ +#define _AIE2_MSG_PRIV_H_ + +enum aie2_msg_opcode { + MSG_OP_CREATE_CONTEXT = 0x2, + MSG_OP_DESTROY_CONTEXT = 0x3, + MSG_OP_GET_TELEMETRY = 0x4, + MSG_OP_SYNC_BO = 0x7, + MSG_OP_EXECUTE_BUFFER_CF = 0xC, + MSG_OP_QUERY_COL_STATUS = 0xD, + MSG_OP_QUERY_AIE_TILE_INFO = 0xE, + MSG_OP_QUERY_AIE_VERSION = 0xF, + MSG_OP_EXEC_DPU = 0x10, + MSG_OP_CONFIG_CU = 0x11, + MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12, + MSG_OP_CHAIN_EXEC_DPU = 0x13, + MSG_OP_CONFIG_DEBUG_BO = 0x14, + MSG_OP_CHAIN_EXEC_NPU = 0x18, + MSG_OP_MAX_XRT_OPCODE, + MSG_OP_SUSPEND = 0x101, + MSG_OP_RESUME = 0x102, + MSG_OP_ASSIGN_MGMT_PASID = 0x103, + MSG_OP_INVOKE_SELF_TEST = 0x104, + MSG_OP_MAP_HOST_BUFFER = 0x106, + MSG_OP_GET_FIRMWARE_VERSION = 0x108, + MSG_OP_SET_RUNTIME_CONFIG = 0x10A, + MSG_OP_GET_RUNTIME_CONFIG = 0x10B, + MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C, + MSG_OP_MAX_DRV_OPCODE, + MSG_OP_GET_PROTOCOL_VERSION = 0x301, + MSG_OP_MAX_OPCODE +}; + +enum aie2_msg_status { + AIE2_STATUS_SUCCESS = 0x0, + /* AIE Error codes */ + AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001, + AIE2_STATUS_AIE_FP_ERROR = 0x1000002, + AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003, + AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004, + AIE2_STATUS_AIE_BUS_ERROR = 0x1000005, + AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006, + AIE2_STATUS_AIE_ECC_ERROR = 0x1000007, + AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008, + AIE2_STATUS_AIE_DMA_ERROR = 0x1000009, + AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a, + AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b, + AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c, + AIE2_STATUS_MAX_AIE_STATUS_CODE, + /* MGMT ERT Error codes */ + AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001, + AIE2_STATUS_MGMT_ERT_HASH_MISMATCH, + AIE2_STATUS_MGMT_ERT_NOAVAIL, + AIE2_STATUS_MGMT_ERT_INVALID_PARAM, + AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE, + AIE2_STATUS_MGMT_ERT_BUSY, + AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE, + MAX_MGMT_ERT_STATUS_CODE, + /* APP ERT Error codes */ + AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001, + AIE2_STATUS_APP_INVALID_INSTR, + AIE2_STATUS_APP_LOAD_PDI_FAIL, + MAX_APP_ERT_STATUS_CODE, + /* NPU RTOS Error Codes */ + AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001, + AIE2_STATUS_INVALID_COMMAND, + AIE2_STATUS_INVALID_PARAM, + AIE2_STATUS_INVALID_OPERATION = 0x4000006, + AIE2_STATUS_ASYNC_EVENT_MSGS_FULL, + AIE2_STATUS_MAX_RTOS_STATUS_CODE, + MAX_AIE2_STATUS_CODE +}; + +struct assign_mgmt_pasid_req { + __u16 pasid; + __u16 reserved; +} __packed; + +struct assign_mgmt_pasid_resp { + enum aie2_msg_status status; +} __packed; + +struct map_host_buffer_req { + __u32 context_id; + __u64 buf_addr; + __u64 buf_size; +} __packed; + +struct map_host_buffer_resp { + enum aie2_msg_status status; +} __packed; + +#define MAX_CQ_PAIRS 2 +struct cq_info { + __u32 head_addr; + __u32 tail_addr; + __u32 buf_addr; + __u32 buf_size; +}; + +struct cq_pair { + struct cq_info x2i_q; + struct cq_info i2x_q; +}; + +struct create_ctx_req { + __u32 aie_type; + __u8 start_col; + __u8 num_col; + __u16 reserved; + __u8 num_cq_pairs_requested; + __u8 reserved1; + __u16 pasid; + __u32 pad[2]; + __u32 sec_comm_target_type; + __u32 context_priority; +} __packed; + +struct create_ctx_resp { + enum aie2_msg_status status; + __u32 context_id; + __u16 msix_id; + __u8 num_cq_pairs_allocated; + __u8 reserved; + struct cq_pair cq_pair[MAX_CQ_PAIRS]; +} __packed; + +struct destroy_ctx_req { + __u32 context_id; +} __packed; + +struct destroy_ctx_resp { + enum aie2_msg_status status; +} __packed; + +enum telemetry_type { + TELEMETRY_TYPE_DISABLED, + TELEMETRY_TYPE_HEALTH, + TELEMETRY_TYPE_ERROR_INFO, + TELEMETRY_TYPE_PROFILING, + TELEMETRY_TYPE_DEBUG, + MAX_TELEMETRY_TYPE +}; + +struct get_telemetry_req { + enum telemetry_type type; + __u64 buf_addr; + __u32 buf_size; +} __packed; + +struct get_telemetry_resp { + __u32 major; + __u32 minor; + __u32 size; + enum aie2_msg_status status; +} __packed; + +struct execute_buffer_req { + __u32 cu_idx; + __u32 payload[19]; +} __packed; + +struct exec_dpu_req { + __u64 inst_buf_addr; + __u32 inst_size; + __u32 inst_prop_cnt; + __u32 cu_idx; + __u32 payload[35]; +} __packed; + +enum exec_npu_type { + EXEC_NPU_TYPE_NON_ELF = 0x1, + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, + EXEC_NPU_TYPE_PREEMPT = 0x3, + EXEC_NPU_TYPE_ELF = 0x4, +}; + +union exec_req { + struct execute_buffer_req ebuf; + struct exec_dpu_req dpu_req; +}; + +struct execute_buffer_resp { + enum aie2_msg_status status; +} __packed; + +struct aie_tile_info { + __u32 size; + __u16 major; + __u16 minor; + __u16 cols; + __u16 rows; + __u16 core_rows; + __u16 mem_rows; + __u16 shim_rows; + __u16 core_row_start; + __u16 mem_row_start; + __u16 shim_row_start; + __u16 core_dma_channels; + __u16 mem_dma_channels; + __u16 shim_dma_channels; + __u16 core_locks; + __u16 mem_locks; + __u16 shim_locks; + __u16 core_events; + __u16 mem_events; + __u16 shim_events; + __u16 reserved; +}; + +struct aie_tile_info_req { + __u32 reserved; +} __packed; + +struct aie_tile_info_resp { + enum aie2_msg_status status; + struct aie_tile_info info; +} __packed; + +struct aie_version_info_req { + __u32 reserved; +} __packed; + +struct aie_version_info_resp { + enum aie2_msg_status status; + __u16 major; + __u16 minor; +} __packed; + +struct aie_column_info_req { + __u64 dump_buff_addr; + __u32 dump_buff_size; + __u32 num_cols; + __u32 aie_bitmap; +} __packed; + +struct aie_column_info_resp { + enum aie2_msg_status status; + __u32 size; +} __packed; + +struct suspend_req { + __u32 place_holder; +} __packed; + +struct suspend_resp { + enum aie2_msg_status status; +} __packed; + +struct resume_req { + __u32 place_holder; +} __packed; + +struct resume_resp { + enum aie2_msg_status status; +} __packed; + +struct check_header_hash_req { + __u64 hash_high; + __u64 hash_low; +} __packed; + +struct check_header_hash_resp { + enum aie2_msg_status status; +} __packed; + +struct query_error_req { + __u64 buf_addr; + __u32 buf_size; + __u32 next_row; + __u32 next_column; + __u32 next_module; +} __packed; + +struct query_error_resp { + enum aie2_msg_status status; + __u32 num_err; + __u32 has_next_err; + __u32 next_row; + __u32 next_column; + __u32 next_module; +} __packed; + +struct protocol_version_req { + __u32 reserved; +} __packed; + +struct protocol_version_resp { + enum aie2_msg_status status; + __u32 major; + __u32 minor; +} __packed; + +struct firmware_version_req { + __u32 reserved; +} __packed; + +struct firmware_version_resp { + enum aie2_msg_status status; + __u32 major; + __u32 minor; + __u32 sub; + __u32 build; +} __packed; + +#define MAX_NUM_CUS 32 +#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0) +#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17) +struct config_cu_req { + __u32 num_cus; + __u32 cfgs[MAX_NUM_CUS]; +} __packed; + +struct config_cu_resp { + enum aie2_msg_status status; +} __packed; + +struct set_runtime_cfg_req { + __u32 type; + __u64 value; +} __packed; + +struct set_runtime_cfg_resp { + enum aie2_msg_status status; +} __packed; + +struct get_runtime_cfg_req { + __u32 type; +} __packed; + +struct get_runtime_cfg_resp { + enum aie2_msg_status status; + __u64 value; +} __packed; + +enum async_event_type { + ASYNC_EVENT_TYPE_AIE_ERROR, + ASYNC_EVENT_TYPE_EXCEPTION, + MAX_ASYNC_EVENT_TYPE +}; + +#define ASYNC_BUF_SIZE SZ_8K +struct async_event_msg_req { + __u64 buf_addr; + __u32 buf_size; +} __packed; + +struct async_event_msg_resp { + enum aie2_msg_status status; + enum async_event_type type; +} __packed; + +#define MAX_CHAIN_CMDBUF_SIZE SZ_4K + +struct cmd_chain_slot_execbuf_cf { + __u32 cu_idx; + __u32 arg_cnt; + __u32 args[] __counted_by(arg_cnt); +}; + +struct cmd_chain_slot_dpu { + __u64 inst_buf_addr; + __u32 inst_size; + __u32 inst_prop_cnt; + __u32 cu_idx; + __u32 arg_cnt; +#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32)) + __u32 args[] __counted_by(arg_cnt); +}; + +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) +#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3 +struct cmd_chain_slot_npu { + enum exec_npu_type type; + u64 inst_buf_addr; + u64 save_buf_addr; + u64 restore_buf_addr; + u32 inst_size; + u32 save_size; + u32 restore_size; + u32 inst_prop_cnt; + u32 cu_idx; + u32 arg_cnt; + u32 args[] __counted_by(arg_cnt); +} __packed; + +struct cmd_chain_req { + __u64 buf_addr; + __u32 buf_size; + __u32 count; +} __packed; + +struct cmd_chain_npu_req { + u32 flags; + u32 reserved; + u64 buf_addr; + u32 buf_size; + u32 count; +} __packed; + +union exec_chain_req { + struct cmd_chain_npu_req npu_req; + struct cmd_chain_req req; +}; + +struct cmd_chain_resp { + enum aie2_msg_status status; + __u32 fail_cmd_idx; + enum aie2_msg_status fail_cmd_status; +} __packed; + +#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0) +#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4) +struct sync_bo_req { + __u64 src_addr; + __u64 dst_addr; + __u32 size; +#define SYNC_BO_DEV_MEM 0 +#define SYNC_BO_HOST_MEM 2 + __u32 type; +} __packed; + +struct sync_bo_resp { + enum aie2_msg_status status; +} __packed; + +#define DEBUG_BO_UNREGISTER 0 +#define DEBUG_BO_REGISTER 1 +struct config_debug_bo_req { + __u64 offset; + __u64 size; + /* + * config operations. + * DEBUG_BO_REGISTER: Register debug buffer + * DEBUG_BO_UNREGISTER: Unregister debug buffer + */ + __u32 config; +} __packed; + +struct config_debug_bo_resp { + enum aie2_msg_status status; +} __packed; +#endif /* _AIE2_MSG_PRIV_H_ */ diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c new file mode 100644 index 000000000000..ceef1c502e9e --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -0,0 +1,1187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/firmware.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/pci.h> +#include <linux/xarray.h> + +#include "aie2_msg_priv.h" +#include "aie2_pci.h" +#include "aie2_solver.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" + +static int aie2_max_col = XRS_MAX_COL; +module_param(aie2_max_col, uint, 0600); +MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); + +/* + * The management mailbox channel is allocated by firmware. + * The related register and ring buffer information is on SRAM BAR. + * This struct is the register layout. + */ +#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */ +struct mgmt_mbox_chann_info { + __u32 x2i_tail; + __u32 x2i_head; + __u32 x2i_buf; + __u32 x2i_buf_sz; + __u32 i2x_tail; + __u32 i2x_head; + __u32 i2x_buf; + __u32 i2x_buf_sz; + __u32 magic; + __u32 msi_id; + __u32 prot_major; + __u32 prot_minor; + __u32 rsvd[4]; +}; + +static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) +{ + const struct aie2_fw_feature_tbl *feature; + struct amdxdna_dev *xdna = ndev->xdna; + + /* + * The driver supported mailbox behavior is defined by + * ndev->priv->protocol_major and protocol_minor. + * + * When protocol_major and fw_major are different, it means driver + * and firmware are incompatible. + */ + if (ndev->priv->protocol_major != fw_major) { + XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d", + fw_major, fw_minor); + return -EINVAL; + } + + /* + * When protocol_minor is greater then fw_minor, that means driver + * relies on operation the installed firmware does not support. + */ + if (ndev->priv->protocol_minor > fw_minor) { + XDNA_ERR(xdna, "Firmware minor version smaller than supported"); + return -EINVAL; + } + + for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor; + feature++) { + if (fw_minor < feature->min_minor) + continue; + if (feature->max_minor > 0 && fw_minor > feature->max_minor) + continue; + + set_bit(feature->feature, &ndev->feature_mask); + } + + return 0; +} + +static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg); + XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg); + XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr); + XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size); + XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg); + XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg); + XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr); + XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size); + XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx); + XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major); + XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor); +} + +static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) +{ + struct mgmt_mbox_chann_info info_regs; + struct xdna_mailbox_chann_res *i2x; + struct xdna_mailbox_chann_res *x2i; + u32 addr, off; + u32 *reg; + int ret; + int i; + + /* + * Once firmware is alive, it will write management channel + * information in SRAM BAR and write the address of that information + * at FW_ALIVE_OFF offset in SRMA BAR. + * + * Read a non-zero value from FW_ALIVE_OFF implies that firmware + * is alive. + */ + ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF), + addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT); + if (ret || !addr) + return -ETIME; + + off = AIE2_SRAM_OFF(ndev, addr); + reg = (u32 *)&info_regs; + for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++) + reg[i] = readl(ndev->sram_base + off + i * sizeof(u32)); + + if (info_regs.magic != MGMT_MBOX_MAGIC) { + XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic); + ret = -EINVAL; + goto done; + } + + i2x = &ndev->mgmt_i2x; + x2i = &ndev->mgmt_x2i; + + i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head); + i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail); + i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf); + i2x->rb_size = info_regs.i2x_buf_sz; + + x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head); + x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail); + x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf); + x2i->rb_size = info_regs.x2i_buf_sz; + + ndev->mgmt_chan_idx = info_regs.msi_id; + ndev->mgmt_prot_major = info_regs.prot_major; + ndev->mgmt_prot_minor = info_regs.prot_minor; + + ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor); + +done: + aie2_dump_chann_info_debug(ndev); + + /* Must clear address at FW_ALIVE_OFF */ + writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF)); + + return ret; +} + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val) +{ + const struct rt_config *cfg; + u32 value; + int ret; + + for (cfg = ndev->priv->rt_config; cfg->type; cfg++) { + if (cfg->category != category) + continue; + + if (cfg->feature_mask && + bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX)) + continue; + + value = val ? *val : cfg->value; + ret = aie2_set_runtime_cfg(ndev, cfg->type, value); + if (ret) { + XDNA_ERR(ndev->xdna, "Set type %d value %d failed", + cfg->type, value); + return ret; + } + } + + return 0; +} + +static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_suspend_fw(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Suspend firmware failed"); + return ret; + } + + ret = aie2_resume_fw(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Resume firmware failed"); + return ret; + } + + return 0; +} + +static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Runtime config failed"); + return ret; + } + + ret = aie2_assign_mgmt_pasid(ndev, 0); + if (ret) { + XDNA_ERR(ndev->xdna, "Can not assign PASID"); + return ret; + } + + ret = aie2_xdna_reset(ndev); + if (ret) { + XDNA_ERR(ndev->xdna, "Reset firmware failed"); + return ret; + } + + return 0; +} + +static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver); + if (ret) { + XDNA_ERR(ndev->xdna, "query firmware version failed"); + return ret; + } + + ret = aie2_query_aie_version(ndev, &ndev->version); + if (ret) { + XDNA_ERR(ndev->xdna, "Query AIE version failed"); + return ret; + } + + ret = aie2_query_aie_metadata(ndev, &ndev->metadata); + if (ret) { + XDNA_ERR(ndev->xdna, "Query AIE metadata failed"); + return ret; + } + + ndev->total_col = min(aie2_max_col, ndev->metadata.cols); + + return 0; +} + +static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev) +{ + if (aie2_suspend_fw(ndev)) + XDNA_ERR(ndev->xdna, "Suspend_fw failed"); + XDNA_DBG(ndev->xdna, "Firmware suspended"); +} + +static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + hwctx->start_col = action->part.start_col; + hwctx->num_col = action->part.ncols; + ret = aie2_create_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "create context failed, ret %d", ret); + + return ret; +} + +static int aie2_xrs_unload(void *cb_arg) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + ret = aie2_destroy_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "destroy context failed, ret %d", ret); + + return ret; +} + +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_dev_hdl *ndev; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + ndev = xdna->dev_handle; + ndev->dft_dpm_level = dpm_level; + if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) + return 0; + + return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); +} + +static struct xrs_action_ops aie2_xrs_actions = { + .load = aie2_xrs_load, + .unload = aie2_xrs_unload, + .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level, +}; + +static void aie2_hw_stop(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + + if (ndev->dev_status <= AIE2_DEV_INIT) { + XDNA_ERR(xdna, "device is already stopped"); + return; + } + + aie2_mgmt_fw_fini(ndev); + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; + drmm_kfree(&xdna->ddev, ndev->mbox); + ndev->mbox = NULL; + aie2_psp_stop(ndev->psp_hdl); + aie2_smu_fini(ndev); + aie2_error_async_events_free(ndev); + pci_disable_device(pdev); + + ndev->dev_status = AIE2_DEV_INIT; +} + +static int aie2_hw_start(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + struct xdna_mailbox_res mbox_res; + u32 xdna_mailbox_intr_reg; + int mgmt_mb_irq, ret; + + if (ndev->dev_status >= AIE2_DEV_START) { + XDNA_INFO(xdna, "device is already started"); + return 0; + } + + ret = pci_enable_device(pdev); + if (ret) { + XDNA_ERR(xdna, "failed to enable device, ret %d", ret); + return ret; + } + pci_set_master(pdev); + + ret = aie2_smu_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init smu, ret %d", ret); + goto disable_dev; + } + + ret = aie2_psp_start(ndev->psp_hdl); + if (ret) { + XDNA_ERR(xdna, "failed to start psp, ret %d", ret); + goto fini_smu; + } + + ret = aie2_get_mgmt_chann_info(ndev); + if (ret) { + XDNA_ERR(xdna, "firmware is not alive"); + goto stop_psp; + } + + mbox_res.ringbuf_base = ndev->sram_base; + mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); + mbox_res.mbox_base = ndev->mbox_base; + mbox_res.mbox_size = MBOX_SIZE(ndev); + mbox_res.name = "xdna_mailbox"; + ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); + if (!ndev->mbox) { + XDNA_ERR(xdna, "failed to create mailbox device"); + ret = -ENODEV; + goto stop_psp; + } + + mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx); + if (mgmt_mb_irq < 0) { + ret = mgmt_mb_irq; + XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret); + goto stop_psp; + } + + xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4; + ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, + &ndev->mgmt_x2i, + &ndev->mgmt_i2x, + xdna_mailbox_intr_reg, + mgmt_mb_irq); + if (!ndev->mgmt_chann) { + XDNA_ERR(xdna, "failed to create management mailbox channel"); + ret = -EINVAL; + goto stop_psp; + } + + ret = aie2_pm_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + goto destroy_mgmt_chann; + } + + ret = aie2_mgmt_fw_init(ndev); + if (ret) { + XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); + goto destroy_mgmt_chann; + } + + ret = aie2_mgmt_fw_query(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to query fw, ret %d", ret); + goto destroy_mgmt_chann; + } + + ret = aie2_error_async_events_alloc(ndev); + if (ret) { + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); + goto destroy_mgmt_chann; + } + + ndev->dev_status = AIE2_DEV_START; + + return 0; + +destroy_mgmt_chann: + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); +stop_psp: + aie2_psp_stop(ndev->psp_hdl); +fini_smu: + aie2_smu_fini(ndev); +disable_dev: + pci_disable_device(pdev); + + return ret; +} + +static int aie2_hw_suspend(struct amdxdna_dev *xdna) +{ + struct amdxdna_client *client; + + guard(mutex)(&xdna->dev_lock); + list_for_each_entry(client, &xdna->client_list, node) + aie2_hwctx_suspend(client); + + aie2_hw_stop(xdna); + + return 0; +} + +static int aie2_hw_resume(struct amdxdna_dev *xdna) +{ + struct amdxdna_client *client; + int ret; + + ret = aie2_hw_start(xdna); + if (ret) { + XDNA_ERR(xdna, "Start hardware failed, %d", ret); + return ret; + } + + list_for_each_entry(client, &xdna->client_list, node) { + ret = aie2_hwctx_resume(client); + if (ret) + break; + } + + return ret; +} + +static int aie2_init(struct amdxdna_dev *xdna) +{ + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + void __iomem *tbl[PCI_NUM_RESOURCES] = {0}; + struct init_config xrs_cfg = { 0 }; + struct amdxdna_dev_hdl *ndev; + struct psp_config psp_conf; + const struct firmware *fw; + unsigned long bars = 0; + int i, nvec, ret; + + ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL); + if (!ndev) + return -ENOMEM; + + ndev->priv = xdna->dev_info->dev_priv; + ndev->xdna = xdna; + + ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); + if (ret) { + XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", + ndev->priv->fw_path, ret); + return ret; + } + + ret = pcim_enable_device(pdev); + if (ret) { + XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret); + goto release_fw; + } + + for (i = 0; i < PSP_MAX_REGS; i++) + set_bit(PSP_REG_BAR(ndev, i), &bars); + + set_bit(xdna->dev_info->sram_bar, &bars); + set_bit(xdna->dev_info->smu_bar, &bars); + set_bit(xdna->dev_info->mbox_bar, &bars); + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!test_bit(i, &bars)) + continue; + tbl[i] = pcim_iomap(pdev, i, 0); + if (!tbl[i]) { + XDNA_ERR(xdna, "map bar %d failed", i); + ret = -ENOMEM; + goto release_fw; + } + } + + ndev->sram_base = tbl[xdna->dev_info->sram_bar]; + ndev->smu_base = tbl[xdna->dev_info->smu_bar]; + ndev->mbox_base = tbl[xdna->dev_info->mbox_bar]; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret); + goto release_fw; + } + + nvec = pci_msix_vec_count(pdev); + if (nvec <= 0) { + XDNA_ERR(xdna, "does not get number of interrupt vector"); + ret = -EINVAL; + goto release_fw; + } + + ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); + if (ret < 0) { + XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret); + goto release_fw; + } + + psp_conf.fw_size = fw->size; + psp_conf.fw_buf = fw->data; + for (i = 0; i < PSP_MAX_REGS; i++) + psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i); + ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf); + if (!ndev->psp_hdl) { + XDNA_ERR(xdna, "failed to create psp"); + ret = -ENOMEM; + goto release_fw; + } + xdna->dev_handle = ndev; + + ret = aie2_hw_start(xdna); + if (ret) { + XDNA_ERR(xdna, "start npu failed, ret %d", ret); + goto release_fw; + } + + xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1; + for (i = 0; i < xrs_cfg.clk_list.num_levels; i++) + xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk; + xrs_cfg.sys_eff_factor = 1; + xrs_cfg.ddev = &xdna->ddev; + xrs_cfg.actions = &aie2_xrs_actions; + xrs_cfg.total_col = ndev->total_col; + + xdna->xrs_hdl = xrsm_init(&xrs_cfg); + if (!xdna->xrs_hdl) { + XDNA_ERR(xdna, "Initialize resolver failed"); + ret = -EINVAL; + goto stop_hw; + } + + release_firmware(fw); + aie2_msg_init(ndev); + amdxdna_pm_init(xdna); + return 0; + +stop_hw: + aie2_hw_stop(xdna); +release_fw: + release_firmware(fw); + + return ret; +} + +static void aie2_fini(struct amdxdna_dev *xdna) +{ + amdxdna_pm_fini(xdna); + aie2_hw_stop(xdna); +} + +static int aie2_get_aie_status(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_status status; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret; + + ndev = xdna->dev_handle; + if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) { + XDNA_ERR(xdna, "Failed to copy AIE request into kernel"); + return -EFAULT; + } + + if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) { + XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.", + status.buffer_size, ndev->metadata.cols * ndev->metadata.size); + return -EINVAL; + } + + ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer), + status.buffer_size, &status.cols_filled); + if (ret) { + XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret); + return ret; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) { + XDNA_ERR(xdna, "Failed to copy AIE request info to user space"); + return -EFAULT; + } + + return 0; +} + +static int aie2_get_aie_metadata(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_metadata *meta; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret = 0; + + ndev = xdna->dev_handle; + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->col_size = ndev->metadata.size; + meta->cols = ndev->metadata.cols; + meta->rows = ndev->metadata.rows; + + meta->version.major = ndev->metadata.version.major; + meta->version.minor = ndev->metadata.version.minor; + + meta->core.row_count = ndev->metadata.core.row_count; + meta->core.row_start = ndev->metadata.core.row_start; + meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count; + meta->core.lock_count = ndev->metadata.core.lock_count; + meta->core.event_reg_count = ndev->metadata.core.event_reg_count; + + meta->mem.row_count = ndev->metadata.mem.row_count; + meta->mem.row_start = ndev->metadata.mem.row_start; + meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count; + meta->mem.lock_count = ndev->metadata.mem.lock_count; + meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count; + + meta->shim.row_count = ndev->metadata.shim.row_count; + meta->shim.row_start = ndev->metadata.shim.row_start; + meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count; + meta->shim.lock_count = ndev->metadata.shim.lock_count; + meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count; + + if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta))) + ret = -EFAULT; + + kfree(meta); + return ret; +} + +static int aie2_get_aie_version(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_aie_version version; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + version.major = ndev->version.major; + version.minor = ndev->version.minor; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +static int aie2_get_firmware_version(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_firmware_version version; + struct amdxdna_dev *xdna = client->xdna; + + version.major = xdna->fw_ver.major; + version.minor = xdna->fw_ver.minor; + version.patch = xdna->fw_ver.sub; + version.build = xdna->fw_ver.build; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +static int aie2_get_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_power_mode mode = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + mode.power_mode = ndev->pw_mode; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode))) + return -EFAULT; + + return 0; +} + +static int aie2_get_clock_metadata(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_clock_metadata *clock; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + int ret = 0; + + ndev = xdna->dev_handle; + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return -ENOMEM; + + snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name), + "MP-NPU Clock"); + clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq; + snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock"); + clock->h_clock.freq_mhz = ndev->hclk_freq; + + if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock))) + ret = -EFAULT; + + kfree(clock); + return ret; +} + +static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_drm_hwctx_entry *tmp __free(kfree) = NULL; + struct amdxdna_drm_get_array *array_args = arg; + struct amdxdna_drm_hwctx_entry __user *buf; + u32 size; + + if (!array_args->num_element) + return -EINVAL; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp->pid = hwctx->client->pid; + tmp->context_id = hwctx->id; + tmp->start_col = hwctx->start_col; + tmp->num_col = hwctx->num_col; + tmp->command_submissions = hwctx->priv->seq; + tmp->command_completions = hwctx->priv->completed; + tmp->pasid = hwctx->client->pasid; + tmp->priority = hwctx->qos.priority; + tmp->gops = hwctx->qos.gops; + tmp->fps = hwctx->qos.fps; + tmp->dma_bandwidth = hwctx->qos.dma_bandwidth; + tmp->latency = hwctx->qos.latency; + tmp->frame_exec_time = hwctx->qos.frame_exec_time; + tmp->state = AMDXDNA_HWCTX_STATE_ACTIVE; + + buf = u64_to_user_ptr(array_args->buffer); + size = min(sizeof(*tmp), array_args->element_size); + + if (copy_to_user(buf, tmp, size)) + return -EFAULT; + + array_args->buffer += size; + array_args->num_element--; + + return 0; +} + +static int aie2_get_hwctx_status(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_array array_args; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_client *tmp_client; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + array_args.element_size = sizeof(struct amdxdna_drm_query_hwctx); + array_args.buffer = args->buffer; + array_args.num_element = args->buffer_size / array_args.element_size; + list_for_each_entry(tmp_client, &xdna->client_list, node) { + ret = amdxdna_hwctx_walk(tmp_client, &array_args, + aie2_hwctx_status_cb); + if (ret) + break; + } + + args->buffer_size -= (u32)(array_args.buffer - args->buffer); + return 0; +} + +static int aie2_query_resource_info(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_resource_info res_info; + const struct amdxdna_dev_priv *priv; + struct amdxdna_dev_hdl *ndev; + struct amdxdna_dev *xdna; + + xdna = client->xdna; + ndev = xdna->dev_handle; + priv = ndev->priv; + + res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk; + res_info.npu_tops_max = ndev->max_tops; + res_info.npu_task_max = priv->hwctx_limit; + res_info.npu_tops_curr = ndev->curr_tops; + res_info.npu_task_curr = ndev->hwctx_num; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info))) + return -EFAULT; + + return 0; +} + +static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 *map = arg; + + if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) { + XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id, + xdna->dev_handle->priv->hwctx_limit); + return -EINVAL; + } + + map[hwctx->fw_ctx_id] = hwctx->id; + return 0; +} + +static int aie2_get_telemetry(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL; + u32 telemetry_data_sz, header_sz, elem_num; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_client *tmp_client; + int ret; + + elem_num = xdna->dev_handle->priv->hwctx_limit; + header_sz = struct_size(header, map, elem_num); + if (args->buffer_size <= header_sz) { + XDNA_ERR(xdna, "Invalid buffer size"); + return -EINVAL; + } + + telemetry_data_sz = args->buffer_size - header_sz; + if (telemetry_data_sz > SZ_4M) { + XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz); + return -EINVAL; + } + + header = kzalloc(header_sz, GFP_KERNEL); + if (!header) + return -ENOMEM; + + if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) { + XDNA_ERR(xdna, "Failed to copy telemetry header from user"); + return -EFAULT; + } + + header->map_num_elements = elem_num; + list_for_each_entry(tmp_client, &xdna->client_list, node) { + ret = amdxdna_hwctx_walk(tmp_client, &header->map, + aie2_fill_hwctx_map); + if (ret) + return ret; + } + + ret = aie2_query_telemetry(xdna->dev_handle, + u64_to_user_ptr(args->buffer + header_sz), + telemetry_data_sz, header); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed ret %d", ret); + return ret; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) { + XDNA_ERR(xdna, "Copy header failed"); + return -EFAULT; + } + + return 0; +} + +static int aie2_get_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_attribute_state state = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE) + state.state = ndev->force_preempt_enabled; + else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE) + state.state = ndev->frame_boundary_preempt; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state))) + return -EFAULT; + + return 0; +} + +static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + + switch (args->param) { + case DRM_AMDXDNA_QUERY_AIE_STATUS: + ret = aie2_get_aie_status(client, args); + break; + case DRM_AMDXDNA_QUERY_AIE_METADATA: + ret = aie2_get_aie_metadata(client, args); + break; + case DRM_AMDXDNA_QUERY_AIE_VERSION: + ret = aie2_get_aie_version(client, args); + break; + case DRM_AMDXDNA_QUERY_CLOCK_METADATA: + ret = aie2_get_clock_metadata(client, args); + break; + case DRM_AMDXDNA_QUERY_HW_CONTEXTS: + ret = aie2_get_hwctx_status(client, args); + break; + case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION: + ret = aie2_get_firmware_version(client, args); + break; + case DRM_AMDXDNA_GET_POWER_MODE: + ret = aie2_get_power_mode(client, args); + break; + case DRM_AMDXDNA_QUERY_TELEMETRY: + ret = aie2_get_telemetry(client, args); + break; + case DRM_AMDXDNA_QUERY_RESOURCE_INFO: + ret = aie2_query_resource_info(client, args); + break; + case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE: + case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE: + ret = aie2_get_preempt_state(client, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + } + + amdxdna_pm_suspend_put(xdna); + XDNA_DBG(xdna, "Got param %d", args->param); + +dev_exit: + drm_dev_exit(idx); + return ret; +} + +static int aie2_query_ctx_status_array(struct amdxdna_client *client, + struct amdxdna_drm_get_array *args) +{ + struct amdxdna_drm_get_array array_args; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_client *tmp_client; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (args->element_size > SZ_4K || args->num_element > SZ_1K) { + XDNA_DBG(xdna, "Invalid element size %d or number of element %d", + args->element_size, args->num_element); + return -EINVAL; + } + + array_args.element_size = min(args->element_size, + sizeof(struct amdxdna_drm_hwctx_entry)); + array_args.buffer = args->buffer; + array_args.num_element = args->num_element * args->element_size / + array_args.element_size; + list_for_each_entry(tmp_client, &xdna->client_list, node) { + ret = amdxdna_hwctx_walk(tmp_client, &array_args, + aie2_hwctx_status_cb); + if (ret) + break; + } + + args->element_size = array_args.element_size; + args->num_element = (u32)((array_args.buffer - args->buffer) / + args->element_size); + + return 0; +} + +static int aie2_get_array(struct amdxdna_client *client, + struct amdxdna_drm_get_array *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + + switch (args->param) { + case DRM_AMDXDNA_HW_CONTEXT_ALL: + ret = aie2_query_ctx_status_array(client, args); + break; + case DRM_AMDXDNA_HW_LAST_ASYNC_ERR: + ret = aie2_get_array_async_error(xdna->dev_handle, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + } + + amdxdna_pm_suspend_put(xdna); + XDNA_DBG(xdna, "Got param %d", args->param); + +dev_exit: + drm_dev_exit(idx); + return ret; +} + +static int aie2_set_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_drm_set_power_mode power_state; + enum amdxdna_power_mode_type power_mode; + struct amdxdna_dev *xdna = client->xdna; + + if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer), + sizeof(power_state))) { + XDNA_ERR(xdna, "Failed to copy power mode request into kernel"); + return -EFAULT; + } + + if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad))) + return -EINVAL; + + power_mode = power_state.power_mode; + if (power_mode > POWER_MODE_TURBO) { + XDNA_ERR(xdna, "Invalid power mode %d", power_mode); + return -EINVAL; + } + + return aie2_pm_set_mode(xdna->dev_handle, power_mode); +} + +static int aie2_set_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle; + struct amdxdna_drm_attribute_state state; + u32 val; + int ret; + + if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state))) + return -EFAULT; + + if (state.state > 1) + return -EINVAL; + + if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad))) + return -EINVAL; + + if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) { + ndev->force_preempt_enabled = state.state; + } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) { + val = state.state; + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, + &val); + if (ret) + return ret; + + ndev->frame_boundary_preempt = state.state; + } + + return 0; +} + +static int aie2_set_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + ret = amdxdna_pm_resume_get(xdna); + if (ret) + goto dev_exit; + + switch (args->param) { + case DRM_AMDXDNA_SET_POWER_MODE: + ret = aie2_set_power_mode(client, args); + break; + case DRM_AMDXDNA_SET_FORCE_PREEMPT: + case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT: + ret = aie2_set_preempt_state(client, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + break; + } + + amdxdna_pm_suspend_put(xdna); +dev_exit: + drm_dev_exit(idx); + return ret; +} + +const struct amdxdna_dev_ops aie2_ops = { + .init = aie2_init, + .fini = aie2_fini, + .resume = aie2_hw_resume, + .suspend = aie2_hw_suspend, + .get_aie_info = aie2_get_info, + .set_aie_state = aie2_set_state, + .hwctx_init = aie2_hwctx_init, + .hwctx_fini = aie2_hwctx_fini, + .hwctx_config = aie2_hwctx_config, + .hwctx_sync_debug_bo = aie2_hwctx_sync_debug_bo, + .cmd_submit = aie2_cmd_submit, + .hmm_invalidate = aie2_hmm_invalidate, + .get_array = aie2_get_array, +}; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h new file mode 100644 index 000000000000..a5f9c42155d1 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -0,0 +1,346 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_PCI_H_ +#define _AIE2_PCI_H_ + +#include <drm/amdxdna_accel.h> +#include <linux/semaphore.h> + +#include "amdxdna_mailbox.h" + +#define AIE2_INTERVAL 20000 /* us */ +#define AIE2_TIMEOUT 1000000 /* us */ + +/* Firmware determines device memory base address and size */ +#define AIE2_DEVM_BASE 0x4000000 +#define AIE2_DEVM_SIZE SZ_64M + +#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev)) + +#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr) +#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr) + +#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx) +#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset) +#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset) + +#define SMU_REG(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \ +}) +#define SRAM_GET_ADDR(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \ +}) + +#define CHAN_SLOT_SZ SZ_8K +#define MBOX_SIZE(ndev) \ +({ \ + typeof(ndev) _ndev = (ndev); \ + ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \ + pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ +}) + +enum aie2_smu_reg_idx { + SMU_CMD_REG = 0, + SMU_ARG_REG, + SMU_INTR_REG, + SMU_RESP_REG, + SMU_OUT_REG, + SMU_MAX_REGS /* Keep this at the end */ +}; + +enum aie2_sram_reg_idx { + MBOX_CHANN_OFF = 0, + FW_ALIVE_OFF, + SRAM_MAX_INDEX /* Keep this at the end */ +}; + +enum psp_reg_idx { + PSP_CMD_REG = 0, + PSP_ARG0_REG, + PSP_ARG1_REG, + PSP_ARG2_REG, + PSP_NUM_IN_REGS, /* number of input registers */ + PSP_INTR_REG = PSP_NUM_IN_REGS, + PSP_STATUS_REG, + PSP_RESP_REG, + PSP_MAX_REGS /* Keep this at the end */ +}; + +struct amdxdna_client; +struct amdxdna_fw_ver; +struct amdxdna_hwctx; +struct amdxdna_sched_job; + +struct psp_config { + const void *fw_buf; + u32 fw_size; + void __iomem *psp_regs[PSP_MAX_REGS]; +}; + +struct aie_version { + u16 major; + u16 minor; +}; + +struct aie_tile_metadata { + u16 row_count; + u16 row_start; + u16 dma_channel_count; + u16 lock_count; + u16 event_reg_count; +}; + +struct aie_metadata { + u32 size; + u16 cols; + u16 rows; + struct aie_version version; + struct aie_tile_metadata core; + struct aie_tile_metadata mem; + struct aie_tile_metadata shim; +}; + +enum rt_config_category { + AIE2_RT_CFG_INIT, + AIE2_RT_CFG_CLK_GATING, + AIE2_RT_CFG_FORCE_PREEMPT, + AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, +}; + +struct rt_config { + u32 type; + u32 value; + u32 category; + unsigned long feature_mask; +}; + +struct dpm_clk_freq { + u32 npuclk; + u32 hclk; +}; + +/* + * Define the maximum number of pending commands in a hardware context. + * Must be power of 2! + */ +#define HWCTX_MAX_CMDS 4 +#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) +struct amdxdna_hwctx_priv { + struct amdxdna_gem_obj *heap; + void *mbox_chann; + + struct drm_gpu_scheduler sched; + struct drm_sched_entity entity; + + struct mutex io_lock; /* protect seq and cmd order */ + struct wait_queue_head job_free_wq; + u32 num_pending; + u64 seq; + struct semaphore job_sem; + bool job_done; + + /* Completed job counter */ + u64 completed; + + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; + struct drm_syncobj *syncobj; +}; + +enum aie2_dev_status { + AIE2_DEV_UNINIT, + AIE2_DEV_INIT, + AIE2_DEV_START, +}; + +struct aie2_exec_msg_ops { + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + u32 (*get_chain_msg_op)(u32 cmd_op); +}; + +struct amdxdna_dev_hdl { + struct amdxdna_dev *xdna; + const struct amdxdna_dev_priv *priv; + void __iomem *sram_base; + void __iomem *smu_base; + void __iomem *mbox_base; + struct psp_device *psp_hdl; + + struct xdna_mailbox_chann_res mgmt_x2i; + struct xdna_mailbox_chann_res mgmt_i2x; + u32 mgmt_chan_idx; + u32 mgmt_prot_major; + u32 mgmt_prot_minor; + + u32 total_col; + struct aie_version version; + struct aie_metadata metadata; + unsigned long feature_mask; + struct aie2_exec_msg_ops *exec_msg_ops; + + /* power management and clock*/ + enum amdxdna_power_mode_type pw_mode; + u32 dpm_level; + u32 dft_dpm_level; + u32 max_dpm_level; + u32 clk_gating; + u32 npuclk_freq; + u32 hclk_freq; + u32 max_tops; + u32 curr_tops; + u32 force_preempt_enabled; + u32 frame_boundary_preempt; + + /* Mailbox and the management channel */ + struct mailbox *mbox; + struct mailbox_channel *mgmt_chann; + struct async_events *async_events; + + enum aie2_dev_status dev_status; + u32 hwctx_num; + + struct amdxdna_async_error last_async_err; +}; + +#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ + [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE} + +struct aie2_bar_off_pair { + int bar_idx; + u32 offset; +}; + +struct aie2_hw_ops { + int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +}; + +enum aie2_fw_feature { + AIE2_NPU_COMMAND, + AIE2_PREEMPT, + AIE2_FEATURE_MAX +}; + +struct aie2_fw_feature_tbl { + enum aie2_fw_feature feature; + u32 max_minor; + u32 min_minor; +}; + +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) + +struct amdxdna_dev_priv { + const char *fw_path; + u64 protocol_major; + u64 protocol_minor; + const struct rt_config *rt_config; + const struct dpm_clk_freq *dpm_clk_tbl; + const struct aie2_fw_feature_tbl *fw_feature_tbl; + +#define COL_ALIGN_NONE 0 +#define COL_ALIGN_NATURE 1 + u32 col_align; + u32 mbox_dev_addr; + /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ + u32 mbox_size; + u32 hwctx_limit; + u32 sram_dev_addr; + struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; + struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; + struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; + struct aie2_hw_ops hw_ops; +}; + +extern const struct amdxdna_dev_ops aie2_ops; + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val); + +/* aie2 npu hw config */ +extern const struct dpm_clk_freq npu1_dpm_clk_table[]; +extern const struct dpm_clk_freq npu4_dpm_clk_table[]; +extern const struct rt_config npu1_default_rt_cfg[]; +extern const struct rt_config npu4_default_rt_cfg[]; +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; + +/* aie2_smu.c */ +int aie2_smu_init(struct amdxdna_dev_hdl *ndev); +void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + +/* aie2_pm.c */ +int aie2_pm_init(struct amdxdna_dev_hdl *ndev); +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); + +/* aie2_psp.c */ +struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); +int aie2_psp_start(struct psp_device *psp); +void aie2_psp_stop(struct psp_device *psp); + +/* aie2_error.c */ +int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); +void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); +int aie2_error_async_msg_thread(void *data); +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, + struct amdxdna_drm_get_array *args); + +/* aie2_message.c */ +void aie2_msg_init(struct amdxdna_dev_hdl *ndev); +int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); +int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); +int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); +int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); +int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); +int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); +int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, + struct amdxdna_fw_ver *fw_ver); +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); +int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header); +int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, + void *handle, int (*cb)(void*, void __iomem *, size_t)); +int aie2_config_cu(struct amdxdna_hwctx *hwctx, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); + +/* aie2_hwctx.c */ +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); +int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); +void aie2_hwctx_suspend(struct amdxdna_client *client); +int aie2_hwctx_resume(struct amdxdna_client *client); +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); + +#endif /* _AIE2_PCI_H_ */ diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c new file mode 100644 index 000000000000..426c38fce848 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> + +#include "aie2_pci.h" +#include "amdxdna_pci_drv.h" + +#define AIE2_CLK_GATING_ENABLE 1 +#define AIE2_CLK_GATING_DISABLE 0 + +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) +{ + int ret; + + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val); + if (ret) + return ret; + + ndev->clk_gating = val; + return 0; +} + +int aie2_pm_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + if (ndev->dev_status != AIE2_DEV_UNINIT) { + /* Resume device */ + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating); + if (ret) + return ret; + + return 0; + } + + while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk) + ndev->max_dpm_level++; + ndev->max_dpm_level--; + + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); + if (ret) + return ret; + + ndev->pw_mode = POWER_MODE_DEFAULT; + ndev->dft_dpm_level = ndev->max_dpm_level; + + return 0; +} + +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target) +{ + struct amdxdna_dev *xdna = ndev->xdna; + u32 clk_gating, dpm_level; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (ndev->pw_mode == target) + return 0; + + switch (target) { + case POWER_MODE_TURBO: + if (ndev->hwctx_num) { + XDNA_ERR(xdna, "Can not set turbo when there is active hwctx"); + return -EINVAL; + } + + clk_gating = AIE2_CLK_GATING_DISABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_HIGH: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_DEFAULT: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->dft_dpm_level; + break; + default: + return -EOPNOTSUPP; + } + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, clk_gating); + if (ret) + return ret; + + ndev->pw_mode = target; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c new file mode 100644 index 000000000000..f28a060a8810 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/bitfield.h> +#include <linux/iopoll.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +#define PSP_STATUS_READY BIT(31) + +/* PSP commands */ +#define PSP_VALIDATE 1 +#define PSP_START 2 +#define PSP_RELEASE_TMR 3 + +/* PSP special arguments */ +#define PSP_START_COPY_FW 1 + +/* PSP response error code */ +#define PSP_ERROR_CANCEL 0xFFFF0002 +#define PSP_ERROR_BAD_STATE 0xFFFF0007 + +#define PSP_FW_ALIGN 0x10000 +#define PSP_POLL_INTERVAL 20000 /* us */ +#define PSP_POLL_TIMEOUT 1000000 /* us */ + +#define PSP_REG(p, reg) ((p)->psp_regs[reg]) + +struct psp_device { + struct drm_device *ddev; + struct psp_config conf; + u32 fw_buf_sz; + u64 fw_paddr; + void *fw_buffer; + void __iomem *psp_regs[PSP_MAX_REGS]; +}; + +static int psp_exec(struct psp_device *psp, u32 *reg_vals) +{ + u32 resp_code; + int ret, i; + u32 ready; + + /* Write command and argument registers */ + for (i = 0; i < PSP_NUM_IN_REGS; i++) + writel(reg_vals[i], PSP_REG(psp, i)); + + /* clear and set PSP INTR register to kick off */ + writel(0, PSP_REG(psp, PSP_INTR_REG)); + writel(1, PSP_REG(psp, PSP_INTR_REG)); + + /* PSP should be busy. Wait for ready, so we know task is done. */ + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready, + FIELD_GET(PSP_STATUS_READY, ready), + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT); + if (ret) { + drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret); + return ret; + } + + resp_code = readl(PSP_REG(psp, PSP_RESP_REG)); + if (resp_code) { + drm_err(psp->ddev, "fw return error 0x%x", resp_code); + return -EIO; + } + + return 0; +} + +void aie2_psp_stop(struct psp_device *psp) +{ + u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, }; + int ret; + + ret = psp_exec(psp, reg_vals); + if (ret) + drm_err(psp->ddev, "release tmr failed, ret %d", ret); +} + +int aie2_psp_start(struct psp_device *psp) +{ + u32 reg_vals[PSP_NUM_IN_REGS]; + int ret; + + reg_vals[0] = PSP_VALIDATE; + reg_vals[1] = lower_32_bits(psp->fw_paddr); + reg_vals[2] = upper_32_bits(psp->fw_paddr); + reg_vals[3] = psp->fw_buf_sz; + + ret = psp_exec(psp, reg_vals); + if (ret) { + drm_err(psp->ddev, "failed to validate fw, ret %d", ret); + return ret; + } + + memset(reg_vals, 0, sizeof(reg_vals)); + reg_vals[0] = PSP_START; + reg_vals[1] = PSP_START_COPY_FW; + ret = psp_exec(psp, reg_vals); + if (ret) { + drm_err(psp->ddev, "failed to start fw, ret %d", ret); + return ret; + } + + return 0; +} + +struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf) +{ + struct psp_device *psp; + u64 offset; + + psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL); + if (!psp) + return NULL; + + psp->ddev = ddev; + memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); + + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); + if (!psp->fw_buffer) { + drm_err(ddev, "no memory for fw buffer"); + return NULL; + } + + /* + * AMD Platform Security Processor(PSP) requires host physical + * address to load NPU firmware. + */ + psp->fw_paddr = virt_to_phys(psp->fw_buffer); + offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr; + psp->fw_paddr += offset; + memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size); + + return psp; +} diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c new file mode 100644 index 000000000000..bd94ee96c2bc --- /dev/null +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/iopoll.h> + +#include "aie2_pci.h" +#include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" + +#define SMU_RESULT_OK 1 + +/* SMU commands */ +#define AIE2_SMU_POWER_ON 0x3 +#define AIE2_SMU_POWER_OFF 0x4 +#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5 +#define AIE2_SMU_SET_HCLK_FREQ 0x6 +#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7 +#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8 + +#define NPU4_DPM_TOPS(ndev, dpm_level) \ +({ \ + typeof(ndev) _ndev = ndev; \ + (4096 * (_ndev)->total_col * \ + (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \ +}) + +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, + u32 reg_arg, u32 *out) +{ + u32 resp; + int ret; + + writel(0, SMU_REG(ndev, SMU_RESP_REG)); + writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG)); + writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG)); + + /* Clear and set SMU_INTR_REG to kick off */ + writel(0, SMU_REG(ndev, SMU_INTR_REG)); + writel(1, SMU_REG(ndev, SMU_INTR_REG)); + + ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp, + resp, AIE2_INTERVAL, AIE2_TIMEOUT); + if (ret) { + XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd); + return ret; + } + + if (out) + *out = readl(SMU_REG(ndev, SMU_OUT_REG)); + + if (resp != SMU_RESULT_OK) { + XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp); + return -EINVAL; + } + + return 0; +} + +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + u32 freq; + int ret; + + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); + goto suspend_put; + } + ndev->npuclk_freq = freq; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); + goto suspend_put; + } + + amdxdna_pm_suspend_put(ndev->xdna); + ndev->hclk_freq = freq; + ndev->dpm_level = dpm_level; + ndev->max_tops = 2 * ndev->total_col; + ndev->curr_tops = ndev->max_tops * freq / 1028; + + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); + + return 0; + +suspend_put: + amdxdna_pm_suspend_put(ndev->xdna); + return ret; +} + +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + int ret; + + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", + dpm_level, ret); + goto suspend_put; + } + + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", + dpm_level, ret); + goto suspend_put; + } + + amdxdna_pm_suspend_put(ndev->xdna); + ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; + ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; + ndev->dpm_level = dpm_level; + ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level); + ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level); + + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); + + return 0; + +suspend_put: + amdxdna_pm_suspend_put(ndev->xdna); + return ret; +} + +int aie2_smu_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + /* + * Failing to set power off indicates an unrecoverable hardware or + * firmware error. + */ + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret); + return ret; + } + + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); + return ret; + } + + return 0; +} + +void aie2_smu_fini(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ndev->priv->hw_ops.set_dpm(ndev, 0); + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); + if (ret) + XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret); +} diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c new file mode 100644 index 000000000000..2013d1f13aae --- /dev/null +++ b/drivers/accel/amdxdna/aie2_solver.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <linux/bitops.h> +#include <linux/bitmap.h> +#include <linux/slab.h> + +#include "aie2_solver.h" + +struct partition_node { + struct list_head list; + u32 nshared; /* # shared requests */ + u32 start_col; /* start column */ + u32 ncols; /* # columns */ + bool exclusive; /* can not be shared if set */ +}; + +struct solver_node { + struct list_head list; + u64 rid; /* Request ID from consumer */ + + struct partition_node *pt_node; + void *cb_arg; + u32 dpm_level; + u32 cols_len; + u32 start_cols[] __counted_by(cols_len); +}; + +struct solver_rgroup { + u32 rgid; + u32 nnode; + u32 npartition_node; + + DECLARE_BITMAP(resbit, XRS_MAX_COL); + struct list_head node_list; + struct list_head pt_node_list; +}; + +struct solver_state { + struct solver_rgroup rgp; + struct init_config cfg; + struct xrs_action_ops *actions; +}; + +static u32 calculate_gops(struct aie_qos *rqos) +{ + u32 service_rate = 0; + + if (rqos->latency) + service_rate = (1000 / rqos->latency); + + if (rqos->fps > service_rate) + return rqos->fps * rqos->gops; + + return service_rate * rqos->gops; +} + +/* + * qos_meet() - Check the QOS request can be met. + */ +static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops) +{ + u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor; + + if (request_gops <= cgops) + return 0; + + return -EINVAL; +} + +/* + * sanity_check() - Do a basic sanity check on allocation request. + */ +static int sanity_check(struct solver_state *xrs, struct alloc_requests *req) +{ + struct cdo_parts *cdop = &req->cdo; + struct aie_qos *rqos = &req->rqos; + u32 cu_clk_freq; + + if (cdop->ncols > xrs->cfg.total_col) + return -EINVAL; + + /* + * We can find at least one CDOs groups that meet the + * GOPs requirement. + */ + cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1]; + + if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000)) + return -EINVAL; + + return 0; +} + +static bool is_valid_qos_dpm_params(struct aie_qos *rqos) +{ + /* + * gops is retrieved from the xmodel, so it's always set + * fps and latency are the configurable params from the application + */ + if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0)) + return true; + + return false; +} + +static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level) +{ + struct solver_rgroup *rgp = &xrs->rgp; + struct cdo_parts *cdop = &req->cdo; + struct aie_qos *rqos = &req->rqos; + u32 freq, max_dpm_level, level; + struct solver_node *node; + + max_dpm_level = xrs->cfg.clk_list.num_levels - 1; + /* If no QoS parameters are passed, set it to the max DPM level */ + if (!is_valid_qos_dpm_params(rqos)) { + level = max_dpm_level; + goto set_dpm; + } + + /* Find one CDO group that meet the GOPs requirement. */ + for (level = 0; level < max_dpm_level; level++) { + freq = xrs->cfg.clk_list.cu_clk_list[level]; + if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000)) + break; + } + + /* set the dpm level which fits all the sessions */ + list_for_each_entry(node, &rgp->node_list, list) { + if (node->dpm_level > level) + level = node->dpm_level; + } + +set_dpm: + *dpm_level = level; + return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level); +} + +static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid) +{ + struct solver_node *node; + + list_for_each_entry(node, &rgp->node_list, list) { + if (node->rid == rid) + return node; + } + + return NULL; +} + +static void remove_partition_node(struct solver_rgroup *rgp, + struct partition_node *pt_node) +{ + pt_node->nshared--; + if (pt_node->nshared > 0) + return; + + list_del(&pt_node->list); + rgp->npartition_node--; + + bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols); + kfree(pt_node); +} + +static void remove_solver_node(struct solver_rgroup *rgp, + struct solver_node *node) +{ + list_del(&node->list); + rgp->nnode--; + + if (node->pt_node) + remove_partition_node(rgp, node->pt_node); + + kfree(node); +} + +static int get_free_partition(struct solver_state *xrs, + struct solver_node *snode, + struct alloc_requests *req) +{ + struct partition_node *pt_node; + u32 ncols = req->cdo.ncols; + u32 col, i; + + for (i = 0; i < snode->cols_len; i++) { + col = snode->start_cols[i]; + if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols) + break; + } + + if (i == snode->cols_len) + return -ENODEV; + + pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL); + if (!pt_node) + return -ENOMEM; + + pt_node->nshared = 1; + pt_node->start_col = col; + pt_node->ncols = ncols; + + /* + * Always set exclusive to false for now. + */ + pt_node->exclusive = false; + + list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list); + xrs->rgp.npartition_node++; + bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols); + + snode->pt_node = pt_node; + + return 0; +} + +static int allocate_partition(struct solver_state *xrs, + struct solver_node *snode, + struct alloc_requests *req) +{ + struct partition_node *pt_node, *rpt_node = NULL; + int idx, ret; + + ret = get_free_partition(xrs, snode, req); + if (!ret) + return ret; + + /* try to get a share-able partition */ + list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) { + if (pt_node->exclusive) + continue; + + if (rpt_node && pt_node->nshared >= rpt_node->nshared) + continue; + + for (idx = 0; idx < snode->cols_len; idx++) { + if (snode->start_cols[idx] != pt_node->start_col) + continue; + + if (req->cdo.ncols != pt_node->ncols) + continue; + + rpt_node = pt_node; + break; + } + } + + if (!rpt_node) + return -ENODEV; + + rpt_node->nshared++; + snode->pt_node = rpt_node; + + return 0; +} + +static struct solver_node *create_solver_node(struct solver_state *xrs, + struct alloc_requests *req) +{ + struct cdo_parts *cdop = &req->cdo; + struct solver_node *node; + int ret; + + node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->rid = req->rid; + node->cols_len = cdop->cols_len; + memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32)); + + ret = allocate_partition(xrs, node, req); + if (ret) + goto free_node; + + list_add_tail(&node->list, &xrs->rgp.node_list); + xrs->rgp.nnode++; + return node; + +free_node: + kfree(node); + return ERR_PTR(ret); +} + +static void fill_load_action(struct solver_state *xrs, + struct solver_node *snode, + struct xrs_action_load *action) +{ + action->rid = snode->rid; + action->part.start_col = snode->pt_node->start_col; + action->part.ncols = snode->pt_node->ncols; +} + +int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg) +{ + struct xrs_action_load load_act; + struct solver_node *snode; + struct solver_state *xrs; + u32 dpm_level; + int ret; + + xrs = (struct solver_state *)hdl; + + ret = sanity_check(xrs, req); + if (ret) { + drm_err(xrs->cfg.ddev, "invalid request"); + return ret; + } + + if (rg_search_node(&xrs->rgp, req->rid)) { + drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid); + return -EEXIST; + } + + snode = create_solver_node(xrs, req); + if (IS_ERR(snode)) + return PTR_ERR(snode); + + fill_load_action(xrs, snode, &load_act); + ret = xrs->cfg.actions->load(cb_arg, &load_act); + if (ret) + goto free_node; + + ret = set_dpm_level(xrs, req, &dpm_level); + if (ret) + goto free_node; + + snode->dpm_level = dpm_level; + snode->cb_arg = cb_arg; + + drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n", + snode->pt_node->start_col, snode->pt_node->ncols); + + return 0; + +free_node: + remove_solver_node(&xrs->rgp, snode); + + return ret; +} + +int xrs_release_resource(void *hdl, u64 rid) +{ + struct solver_state *xrs = hdl; + struct solver_node *node; + + node = rg_search_node(&xrs->rgp, rid); + if (!node) { + drm_err(xrs->cfg.ddev, "node not exist"); + return -ENODEV; + } + + xrs->cfg.actions->unload(node->cb_arg); + remove_solver_node(&xrs->rgp, node); + + return 0; +} + +void *xrsm_init(struct init_config *cfg) +{ + struct solver_rgroup *rgp; + struct solver_state *xrs; + + xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL); + if (!xrs) + return NULL; + + memcpy(&xrs->cfg, cfg, sizeof(*cfg)); + + rgp = &xrs->rgp; + INIT_LIST_HEAD(&rgp->node_list); + INIT_LIST_HEAD(&rgp->pt_node_list); + + return xrs; +} diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h new file mode 100644 index 000000000000..a2e3c52229e9 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_solver.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_SOLVER_H +#define _AIE2_SOLVER_H + +#define XRS_MAX_COL 128 + +/* + * Structure used to describe a partition. A partition is column based + * allocation unit described by its start column and number of columns. + */ +struct aie_part { + u32 start_col; + u32 ncols; +}; + +/* + * The QoS capabilities of a given AIE partition. + */ +struct aie_qos_cap { + u32 opc; /* operations per cycle */ + u32 dma_bw; /* DMA bandwidth */ +}; + +/* + * QoS requirement of a resource allocation. + */ +struct aie_qos { + u32 gops; /* Giga operations */ + u32 fps; /* Frames per second */ + u32 dma_bw; /* DMA bandwidth */ + u32 latency; /* Frame response latency */ + u32 exec_time; /* Frame execution time */ + u32 priority; /* Request priority */ +}; + +/* + * Structure used to describe a relocatable CDO (Configuration Data Object). + */ +struct cdo_parts { + u32 *start_cols; /* Start column array */ + u32 cols_len; /* Length of start column array */ + u32 ncols; /* # of column */ + struct aie_qos_cap qos_cap; /* CDO QoS capabilities */ +}; + +/* + * Structure used to describe a request to allocate. + */ +struct alloc_requests { + u64 rid; + struct cdo_parts cdo; + struct aie_qos rqos; /* Requested QoS */ +}; + +/* + * Load callback argument + */ +struct xrs_action_load { + u32 rid; + struct aie_part part; +}; + +/* + * Define the power level available + * + * POWER_LEVEL_MIN: + * Lowest power level. Usually set when all actions are unloaded. + * + * POWER_LEVEL_n + * Power levels 0 - n, is a step increase in system frequencies + */ +enum power_level { + POWER_LEVEL_MIN = 0x0, + POWER_LEVEL_0 = 0x1, + POWER_LEVEL_1 = 0x2, + POWER_LEVEL_2 = 0x3, + POWER_LEVEL_3 = 0x4, + POWER_LEVEL_4 = 0x5, + POWER_LEVEL_5 = 0x6, + POWER_LEVEL_6 = 0x7, + POWER_LEVEL_7 = 0x8, + POWER_LEVEL_NUM, +}; + +/* + * Structure used to describe the frequency table. + * Resource solver chooses the frequency from the table + * to meet the QOS requirements. + */ +struct clk_list_info { + u32 num_levels; /* available power levels */ + u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/ +}; + +struct xrs_action_ops { + int (*load)(void *cb_arg, struct xrs_action_load *action); + int (*unload)(void *cb_arg); + int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level); +}; + +/* + * Structure used to describe information for solver during initialization. + */ +struct init_config { + u32 total_col; + u32 sys_eff_factor; /* system efficiency factor */ + u32 latency_adj; /* latency adjustment in ms */ + struct clk_list_info clk_list; /* List of frequencies available in system */ + struct drm_device *ddev; + struct xrs_action_ops *actions; +}; + +/* + * xrsm_init() - Register resource solver. Resource solver client needs + * to call this function to register itself. + * + * @cfg: The system metrics for resource solver to use + * + * Return: A resource solver handle + * + * Note: We should only create one handle per AIE array to be managed. + */ +void *xrsm_init(struct init_config *cfg); + +/* + * xrs_allocate_resource() - Request to allocate resources for a given context + * and a partition metadata. (See struct part_meta) + * + * @hdl: Resource solver handle obtained from xrs_init() + * @req: Input to the Resource solver including request id + * and partition metadata. + * @cb_arg: callback argument pointer + * + * Return: 0 when successful. + * Or standard error number when failing + * + * Note: + * There is no lock mechanism inside resource solver. So it is + * the caller's responsibility to lock down XCLBINs and grab + * necessary lock. + */ +int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg); + +/* + * xrs_release_resource() - Request to free resources for a given context. + * + * @hdl: Resource solver handle obtained from xrs_init() + * @rid: The Request ID to identify the requesting context + */ +int xrs_release_resource(void *hdl, u64 rid); +#endif /* _AIE2_SOLVER_H */ diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c new file mode 100644 index 000000000000..d17aef89a0ad --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/xarray.h> +#include <trace/events/amdxdna.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +#define MAX_HWCTX_ID 255 +#define MAX_ARG_COUNT 4095 + +struct amdxdna_fence { + struct dma_fence base; + spinlock_t lock; /* for base */ + struct amdxdna_hwctx *hwctx; +}; + +static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence) +{ + return KBUILD_MODNAME; +} + +static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence) +{ + struct amdxdna_fence *xdna_fence; + + xdna_fence = container_of(fence, struct amdxdna_fence, base); + + return xdna_fence->hwctx->name; +} + +static const struct dma_fence_ops fence_ops = { + .get_driver_name = amdxdna_fence_get_driver_name, + .get_timeline_name = amdxdna_fence_get_timeline_name, +}; + +static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence->hwctx = hwctx; + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0); + return &fence->base; +} + +static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, + struct srcu_struct *ss) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + synchronize_srcu(ss); + + /* At this point, user is not able to submit new commands */ + xdna->dev_info->ops->hwctx_fini(hwctx); + + kfree(hwctx->name); + kfree(hwctx); +} + +int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg, + int (*walk)(struct amdxdna_hwctx *hwctx, void *arg)) +{ + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + int ret = 0, idx; + + idx = srcu_read_lock(&client->hwctx_srcu); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + ret = walk(hwctx, arg); + if (ret) + break; + } + srcu_read_unlock(&client->hwctx_srcu, idx); + + return ret; +} + +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + u32 num_masks, count; + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) + num_masks = 0; + else + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); + + if (size) { + count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); + if (unlikely(count <= num_masks)) { + *size = 0; + return NULL; + } + *size = (count - num_masks) * sizeof(u32); + } + return &cmd->data[num_masks]; +} + +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + u32 num_masks, i; + u32 *cu_mask; + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) + return INVALID_CU_IDX; + + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); + cu_mask = cmd->data; + for (i = 0; i < num_masks; i++) { + if (cu_mask[i]) + return ffs(cu_mask[i]) - 1; + } + + return INVALID_CU_IDX; +} + +/* + * This should be called in close() and remove(). DO NOT call in other syscalls. + * This guarantee that when hwctx and resources will be released, if user + * doesn't call amdxdna_drm_destroy_hwctx_ioctl. + */ +void amdxdna_hwctx_remove_all(struct amdxdna_client *client) +{ + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + XDNA_DBG(client->xdna, "PID %d close HW context %d", + client->pid, hwctx->id); + xa_erase(&client->hwctx_xa, hwctx->id); + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); + } +} + +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_create_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret, idx; + + if (args->ext || args->ext_flags) + return -EINVAL; + + hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL); + if (!hwctx) + return -ENOMEM; + + if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) { + XDNA_ERR(xdna, "Access QoS info failed"); + kfree(hwctx); + return -EFAULT; + } + + hwctx->client = client; + hwctx->fw_ctx_id = -1; + hwctx->num_tiles = args->num_tiles; + hwctx->mem_size = args->mem_size; + hwctx->max_opc = args->max_opc; + + guard(mutex)(&xdna->dev_lock); + + if (!drm_dev_enter(dev, &idx)) { + ret = -ENODEV; + goto free_hwctx; + } + + ret = xdna->dev_info->ops->hwctx_init(hwctx); + if (ret) { + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret); + goto dev_exit; + } + + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id); + if (!hwctx->name) { + ret = -ENOMEM; + goto fini_hwctx; + } + + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx, + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID), + &client->next_hwctxid, GFP_KERNEL); + if (ret < 0) { + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); + goto free_name; + } + + args->handle = hwctx->id; + args->syncobj_handle = hwctx->syncobj_hdl; + + atomic64_set(&hwctx->job_submit_cnt, 0); + atomic64_set(&hwctx->job_free_cnt, 0); + XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret); + drm_dev_exit(idx); + return 0; + +free_name: + kfree(hwctx->name); +fini_hwctx: + xdna->dev_info->ops->hwctx_fini(hwctx); +dev_exit: + drm_dev_exit(idx); +free_hwctx: + kfree(hwctx); + return ret; +} + +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_destroy_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret = 0, idx; + + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + mutex_lock(&xdna->dev_lock); + hwctx = xa_erase(&client->hwctx_xa, args->handle); + if (!hwctx) { + ret = -EINVAL; + XDNA_DBG(xdna, "PID %d HW context %d not exist", + client->pid, args->handle); + goto out; + } + + /* + * The pushed jobs are handled by DRM scheduler during destroy. + * SRCU to synchronize with exec command ioctls. + */ + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); + + XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); +out: + mutex_unlock(&xdna->dev_lock); + drm_dev_exit(idx); + return ret; +} + +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_config_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret, idx; + u32 buf_size; + void *buf; + u64 val; + + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + + if (!xdna->dev_info->ops->hwctx_config) + return -EOPNOTSUPP; + + val = args->param_val; + buf_size = args->param_val_size; + + switch (args->param_type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + /* For those types that param_val is pointer */ + if (buf_size > PAGE_SIZE) { + XDNA_ERR(xdna, "Config CU param buffer too large"); + return -E2BIG; + } + + /* Hwctx needs to keep buf */ + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) { + kfree(buf); + return -EFAULT; + } + + break; + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + /* For those types that param_val is a value */ + buf = NULL; + buf_size = 0; + break; + default: + XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type); + return -EINVAL; + } + + mutex_lock(&xdna->dev_lock); + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, args->handle); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); + ret = -EINVAL; + goto unlock_srcu; + } + + ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); + +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + mutex_unlock(&xdna->dev_lock); + kfree(buf); + return ret; +} + +int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret, idx; + + if (!xdna->dev_info->ops->hwctx_sync_debug_bo) + return -EOPNOTSUPP; + + gobj = drm_gem_object_lookup(client->filp, debug_bo_hdl); + if (!gobj) + return -EINVAL; + + abo = to_xdna_obj(gobj); + guard(mutex)(&xdna->dev_lock); + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx); + if (!hwctx) { + ret = -EINVAL; + goto unlock_srcu; + } + + ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl); + +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + drm_gem_object_put(gobj); + return ret; +} + +static void +amdxdna_arg_bos_put(struct amdxdna_sched_job *job) +{ + int i; + + for (i = 0; i < job->bo_cnt; i++) { + if (!job->bos[i]) + break; + drm_gem_object_put(job->bos[i]); + } +} + +static int +amdxdna_arg_bos_lookup(struct amdxdna_client *client, + struct amdxdna_sched_job *job, + u32 *bo_hdls, u32 bo_cnt) +{ + struct drm_gem_object *gobj; + int i, ret; + + job->bo_cnt = bo_cnt; + for (i = 0; i < job->bo_cnt; i++) { + struct amdxdna_gem_obj *abo; + + gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]); + if (!gobj) { + ret = -ENOENT; + goto put_shmem_bo; + } + abo = to_xdna_obj(gobj); + + mutex_lock(&abo->lock); + if (abo->pinned) { + mutex_unlock(&abo->lock); + job->bos[i] = gobj; + continue; + } + + ret = amdxdna_gem_pin_nolock(abo); + if (ret) { + mutex_unlock(&abo->lock); + drm_gem_object_put(gobj); + goto put_shmem_bo; + } + abo->pinned = true; + mutex_unlock(&abo->lock); + + job->bos[i] = gobj; + } + + return 0; + +put_shmem_bo: + amdxdna_arg_bos_put(job); + return ret; +} + +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) +{ + trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); + amdxdna_arg_bos_put(job); + amdxdna_gem_put_obj(job->cmd_bo); + dma_fence_put(job->fence); +} + +int amdxdna_cmd_submit(struct amdxdna_client *client, + struct amdxdna_drv_cmd *drv_cmd, + u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, + u32 hwctx_hdl, u64 *seq) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_sched_job *job; + struct amdxdna_hwctx *hwctx; + int ret, idx; + + XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); + job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); + if (!job) + return -ENOMEM; + + job->drv_cmd = drv_cmd; + + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { + job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); + if (!job->cmd_bo) { + XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl); + ret = -EINVAL; + goto free_job; + } + } + + ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); + if (ret) { + XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); + goto cmd_put; + } + + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, hwctx_hdl); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", + client->pid, hwctx_hdl); + ret = -EINVAL; + goto unlock_srcu; + } + + + job->hwctx = hwctx; + job->mm = current->mm; + + job->fence = amdxdna_fence_create(hwctx); + if (!job->fence) { + XDNA_ERR(xdna, "Failed to create fence"); + ret = -ENOMEM; + goto unlock_srcu; + } + kref_init(&job->refcnt); + + ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq); + if (ret) + goto put_fence; + + /* + * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated + * resource after synchronize_srcu(). The submitted jobs should be + * handled by the queue, for example DRM scheduler, in device layer. + * For here we can unlock SRCU. + */ + srcu_read_unlock(&client->hwctx_srcu, idx); + trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed"); + + return 0; + +put_fence: + dma_fence_put(job->fence); +unlock_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + amdxdna_arg_bos_put(job); +cmd_put: + amdxdna_gem_put_obj(job->cmd_bo); +free_job: + kfree(job); + return ret; +} + +/* + * The submit command ioctl submits a command to firmware. One firmware command + * may contain multiple command BOs for processing as a whole. + * The command sequence number is returned which can be used for wait command ioctl. + */ +static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, + struct amdxdna_drm_exec_cmd *args) +{ + struct amdxdna_dev *xdna = client->xdna; + u32 *arg_bo_hdls = NULL; + u32 cmd_bo_hdl; + int ret; + + if (args->arg_count > MAX_ARG_COUNT) { + XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); + return -EINVAL; + } + + /* Only support single command for now. */ + if (args->cmd_count != 1) { + XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count); + return -EINVAL; + } + + cmd_bo_hdl = (u32)args->cmd_handles; + if (args->arg_count) { + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); + if (!arg_bo_hdls) + return -ENOMEM; + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), + args->arg_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_cmd_bo_hdls; + } + } + + ret = amdxdna_cmd_submit(client, NULL, cmd_bo_hdl, arg_bo_hdls, + args->arg_count, args->hwctx, &args->seq); + if (ret) + XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); + +free_cmd_bo_hdls: + kfree(arg_bo_hdls); + if (!ret) + XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq); + return ret; +} + +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_exec_cmd *args = data; + + if (args->ext || args->ext_flags) + return -EINVAL; + + switch (args->type) { + case AMDXDNA_CMD_SUBMIT_EXEC_BUF: + return amdxdna_drm_submit_execbuf(client, args); + } + + XDNA_ERR(client->xdna, "Invalid command type %d", args->type); + return -EINVAL; +} diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h new file mode 100644 index 000000000000..b6151244d64f --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_CTX_H_ +#define _AMDXDNA_CTX_H_ + +#include <linux/bitfield.h> + +#include "amdxdna_gem.h" + +struct amdxdna_hwctx_priv; + +enum ert_cmd_opcode { + ERT_START_CU = 0, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PREEMPT_ELF = 22, + ERT_INVALID_CMD = ~0U, +}; + +enum ert_cmd_state { + ERT_CMD_STATE_INVALID, + ERT_CMD_STATE_NEW, + ERT_CMD_STATE_QUEUED, + ERT_CMD_STATE_RUNNING, + ERT_CMD_STATE_COMPLETED, + ERT_CMD_STATE_ERROR, + ERT_CMD_STATE_ABORT, + ERT_CMD_STATE_SUBMITTED, + ERT_CMD_STATE_TIMEOUT, + ERT_CMD_STATE_NORESPONSE, +}; + +/* + * Interpretation of the beginning of data payload for ERT_START_NPU in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_start_npu { + u64 buffer; /* instruction buffer address */ + u32 buffer_size; /* size of buffer in bytes */ + u32 prop_count; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + +/* + * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles. + */ +struct amdxdna_cmd_chain { + u32 command_count; + u32 submit_index; + u32 error_index; + u32 reserved[3]; + u64 data[] __counted_by(command_count); +}; + +/* + * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_preempt_data { + u64 inst_buf; /* instruction buffer address */ + u64 save_buf; /* save buffer address */ + u64 restore_buf; /* restore buffer address */ + u32 inst_size; /* size of instruction buffer in bytes */ + u32 save_size; /* size of save buffer in bytes */ + u32 restore_size; /* size of restore buffer in bytes */ + u32 inst_prop_cnt; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + +/* Exec buffer command header format */ +#define AMDXDNA_CMD_STATE GENMASK(3, 0) +#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) +#define AMDXDNA_CMD_COUNT GENMASK(22, 12) +#define AMDXDNA_CMD_OPCODE GENMASK(27, 23) +struct amdxdna_cmd { + u32 header; + u32 data[]; +}; + +#define INVALID_CU_IDX (~0U) + +struct amdxdna_hwctx { + struct amdxdna_client *client; + struct amdxdna_hwctx_priv *priv; + char *name; + + u32 id; + u32 max_opc; + u32 num_tiles; + u32 mem_size; + u32 fw_ctx_id; + u32 col_list_len; + u32 *col_list; + u32 start_col; + u32 num_col; +#define HWCTX_STAT_INIT 0 +#define HWCTX_STAT_READY 1 +#define HWCTX_STAT_STOP 2 + u32 status; + u32 old_status; + + struct amdxdna_qos_info qos; + struct amdxdna_hwctx_param_config_cu *cus; + u32 syncobj_hdl; + + atomic64_t job_submit_cnt; + atomic64_t job_free_cnt ____cacheline_aligned_in_smp; +}; + +#define drm_job_to_xdna_job(j) \ + container_of(j, struct amdxdna_sched_job, base) + +enum amdxdna_job_opcode { + SYNC_DEBUG_BO, + ATTACH_DEBUG_BO, + DETACH_DEBUG_BO, +}; + +struct amdxdna_drv_cmd { + enum amdxdna_job_opcode opcode; + u32 result; +}; + +struct amdxdna_sched_job { + struct drm_sched_job base; + struct kref refcnt; + struct amdxdna_hwctx *hwctx; + struct mm_struct *mm; + /* The fence to notice DRM scheduler that job is done by hardware */ + struct dma_fence *fence; + /* user can wait on this fence */ + struct dma_fence *out_fence; + bool job_done; + bool job_timeout; + u64 seq; + struct amdxdna_drv_cmd *drv_cmd; + struct amdxdna_gem_obj *cmd_bo; + size_t bo_cnt; + struct drm_gem_object *bos[] __counted_by(bo_cnt); +}; + +static inline u32 +amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header); +} + +static inline void +amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + cmd->header &= ~AMDXDNA_CMD_STATE; + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s); +} + +static inline enum ert_cmd_state +amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_cmd *cmd = abo->mem.kva; + + return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header); +} + +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); + +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); +void amdxdna_hwctx_remove_all(struct amdxdna_client *client); +int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg, + int (*walk)(struct amdxdna_hwctx *hwctx, void *arg)); +int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl); + +int amdxdna_cmd_submit(struct amdxdna_client *client, + struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls, + u32 *arg_bo_hdls, u32 arg_bo_cnt, + u32 hwctx_hdl, u64 *seq); + +int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, + u64 seq, u32 timeout); + +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +#endif /* _AMDXDNA_CTX_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h new file mode 100644 index 000000000000..c51de86ec12b --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_error.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_ERROR_H_ +#define _AMDXDNA_ERROR_H_ + +#include <linux/bitfield.h> +#include <linux/bits.h> + +#define AMDXDNA_ERR_DRV_AIE 4 +#define AMDXDNA_ERR_SEV_CRITICAL 3 +#define AMDXDNA_ERR_CLASS_AIE 2 + +#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0) +#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16) +#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24) +#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32) +#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40) + +enum amdxdna_error_num { + AMDXDNA_ERROR_NUM_AIE_SATURATION = 3, + AMDXDNA_ERROR_NUM_AIE_FP, + AMDXDNA_ERROR_NUM_AIE_STREAM, + AMDXDNA_ERROR_NUM_AIE_ACCESS, + AMDXDNA_ERROR_NUM_AIE_BUS, + AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + AMDXDNA_ERROR_NUM_AIE_ECC, + AMDXDNA_ERROR_NUM_AIE_LOCK, + AMDXDNA_ERROR_NUM_AIE_DMA, + AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + AMDXDNA_ERROR_NUM_UNKNOWN = 15, +}; + +enum amdxdna_error_module { + AMDXDNA_ERROR_MODULE_AIE_CORE = 3, + AMDXDNA_ERROR_MODULE_AIE_MEMORY, + AMDXDNA_ERROR_MODULE_AIE_SHIM, + AMDXDNA_ERROR_MODULE_AIE_NOC, + AMDXDNA_ERROR_MODULE_AIE_PL, + AMDXDNA_ERROR_MODULE_UNKNOWN = 8, +}; + +#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \ + (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \ + FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE)) + +#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0) +#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8) + +#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \ + (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \ + FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row)) + +#endif /* _AMDXDNA_ERROR_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c new file mode 100644 index 000000000000..dfa916eeb2d9 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -0,0 +1,972 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_cache.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/dma-buf.h> +#include <linux/dma-direct.h> +#include <linux/iosys-map.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" +#include "amdxdna_ubuf.h" + +#define XDNA_MAX_CMD_BO_SIZE SZ_32K + +MODULE_IMPORT_NS("DMA_BUF"); + +static int +amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_client *client = abo->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_mem *mem = &abo->mem; + struct amdxdna_gem_obj *heap; + u64 offset; + u32 align; + int ret; + + mutex_lock(&client->mm_lock); + + heap = client->dev_heap; + if (!heap) { + ret = -EINVAL; + goto unlock_out; + } + + if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) { + XDNA_ERR(xdna, "Invalid dev heap userptr"); + ret = -EINVAL; + goto unlock_out; + } + + if (mem->size == 0 || mem->size > heap->mem.size) { + XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx", + mem->size, heap->mem.size); + ret = -EINVAL; + goto unlock_out; + } + + align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift); + ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node, + mem->size, align, + 0, DRM_MM_INSERT_BEST); + if (ret) { + XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + goto unlock_out; + } + + mem->dev_addr = abo->mm_node.start; + offset = mem->dev_addr - heap->mem.dev_addr; + mem->userptr = heap->mem.userptr + offset; + mem->kva = heap->mem.kva + offset; + + drm_gem_object_get(to_gobj(heap)); + +unlock_out: + mutex_unlock(&client->mm_lock); + + return ret; +} + +static void +amdxdna_gem_destroy_obj(struct amdxdna_gem_obj *abo) +{ + mutex_destroy(&abo->lock); + kfree(abo); +} + +static void +amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_gem_obj *heap; + + mutex_lock(&abo->client->mm_lock); + + drm_mm_remove_node(&abo->mm_node); + + heap = abo->client->dev_heap; + drm_gem_object_put(to_gobj(heap)); + + mutex_unlock(&abo->client->mm_lock); +} + +static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier); + struct amdxdna_gem_obj *abo = mapp->abo; + struct amdxdna_dev *xdna; + + xdna = to_xdna_dev(to_gobj(abo)->dev); + XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d", + mapp->vma->vm_start, mapp->vma->vm_end, abo->type); + + if (!mmu_notifier_range_blockable(range)) + return false; + + down_write(&xdna->notifier_lock); + abo->mem.map_invalid = true; + mapp->invalid = true; + mmu_interval_set_seq(&mapp->notifier, cur_seq); + up_write(&xdna->notifier_lock); + + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + + if (range->event == MMU_NOTIFY_UNMAP) { + down_write(&xdna->notifier_lock); + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + up_write(&xdna->notifier_lock); + } + + return true; +} + +static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { + .invalidate = amdxdna_hmm_invalidate, +}; + +static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp; + + down_read(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (!vma || mapp->vma == vma) { + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + if (vma) + break; + } + } + up_read(&xdna->notifier_lock); +} + +static void amdxdna_umap_release(struct kref *ref) +{ + struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt); + struct vm_area_struct *vma = mapp->vma; + struct amdxdna_dev *xdna; + + mmu_interval_notifier_remove(&mapp->notifier); + if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_clear_unevictable(vma->vm_file->f_mapping); + + xdna = to_xdna_dev(to_gobj(mapp->abo)->dev); + down_write(&xdna->notifier_lock); + list_del(&mapp->node); + up_write(&xdna->notifier_lock); + + kvfree(mapp->range.hmm_pfns); + kfree(mapp); +} + +void amdxdna_umap_put(struct amdxdna_umap *mapp) +{ + kref_put(&mapp->refcnt, amdxdna_umap_release); +} + +static void amdxdna_hmm_unreg_work(struct work_struct *work) +{ + struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap, + hmm_unreg_work); + + amdxdna_umap_put(mapp); +} + +static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long len = vma->vm_end - vma->vm_start; + unsigned long addr = vma->vm_start; + struct amdxdna_umap *mapp; + u32 nr_pages; + int ret; + + if (!xdna->dev_info->ops->hmm_invalidate) + return 0; + + mapp = kzalloc(sizeof(*mapp), GFP_KERNEL); + if (!mapp) + return -ENOMEM; + + nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; + mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns), + GFP_KERNEL); + if (!mapp->range.hmm_pfns) { + ret = -ENOMEM; + goto free_map; + } + + ret = mmu_interval_notifier_insert_locked(&mapp->notifier, + current->mm, + addr, + len, + &amdxdna_hmm_ops); + if (ret) { + XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); + goto free_pfns; + } + + mapp->range.notifier = &mapp->notifier; + mapp->range.start = vma->vm_start; + mapp->range.end = vma->vm_end; + mapp->range.default_flags = HMM_PFN_REQ_FAULT; + mapp->vma = vma; + mapp->abo = abo; + kref_init(&mapp->refcnt); + + if (abo->mem.userptr == AMDXDNA_INVALID_ADDR) + abo->mem.userptr = addr; + INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work); + if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_set_unevictable(vma->vm_file->f_mapping); + + down_write(&xdna->notifier_lock); + list_add_tail(&mapp->node, &abo->mem.umap_list); + up_write(&xdna->notifier_lock); + + return 0; + +free_pfns: + kvfree(mapp->range.hmm_pfns); +free_map: + kfree(mapp); + return ret; +} + +static void amdxdna_gem_dev_obj_free(struct drm_gem_object *gobj) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + if (abo->pinned) + amdxdna_gem_unpin(abo); + + amdxdna_gem_heap_free(abo); + drm_gem_object_release(gobj); + amdxdna_gem_destroy_obj(abo); +} + +static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long num_pages = vma_pages(vma); + unsigned long offset = 0; + int ret; + + if (!is_import_bo(abo)) { + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) { + XDNA_ERR(xdna, "Failed shmem mmap %d", ret); + return ret; + } + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages %d", ret); + vma->vm_ops->close(vma); + return ret; + } + + return 0; + } + + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + ret = dma_buf_mmap(abo->dma_buf, vma, 0); + if (ret) { + XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret); + return ret; + } + + do { + vm_fault_t fault_ret; + + fault_ret = handle_mm_fault(vma, vma->vm_start + offset, + FAULT_FLAG_WRITE, NULL); + if (fault_ret & VM_FAULT_ERROR) { + vma->vm_ops->close(vma); + XDNA_ERR(xdna, "Fault in page failed"); + return -EFAULT; + } + + offset += PAGE_SIZE; + } while (--num_pages); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(to_gobj(abo)); + + return 0; +} + +static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + int ret; + + ret = amdxdna_hmm_register(abo, vma); + if (ret) + return ret; + + ret = amdxdna_insert_pages(abo, vma); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages, ret %d", ret); + goto hmm_unreg; + } + + XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx", + drm_vma_node_offset_addr(&gobj->vma_node), abo->type, + vma->vm_start, gobj->size); + return 0; + +hmm_unreg: + amdxdna_hmm_unregister(abo, vma); + return ret; +} + +static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_gem_object *gobj = dma_buf->priv; + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + unsigned long num_pages = vma_pages(vma); + int ret; + + vma->vm_ops = &drm_gem_shmem_vm_ops; + vma->vm_private_data = gobj; + + drm_gem_object_get(gobj); + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) + goto put_obj; + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) + goto close_vma; + + return 0; + +close_vma: + vma->vm_ops->close(vma); +put_obj: + drm_gem_object_put(gobj); + return ret; +} + +static const struct dma_buf_ops amdxdna_dmabuf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = amdxdna_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +static int amdxdna_gem_obj_vmap(struct amdxdna_gem_obj *abo, void **vaddr) +{ + struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL); + int ret; + + if (is_import_bo(abo)) + ret = dma_buf_vmap_unlocked(abo->dma_buf, &map); + else + ret = drm_gem_vmap(to_gobj(abo), &map); + + *vaddr = map.vaddr; + return ret; +} + +static void amdxdna_gem_obj_vunmap(struct amdxdna_gem_obj *abo) +{ + struct iosys_map map; + + if (!abo->mem.kva) + return; + + iosys_map_set_vaddr(&map, abo->mem.kva); + + if (is_import_bo(abo)) + dma_buf_vunmap_unlocked(abo->dma_buf, &map); + else + drm_gem_vunmap(to_gobj(abo), &map); +} + +static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags) +{ + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + if (abo->dma_buf) { + get_dma_buf(abo->dma_buf); + return abo->dma_buf; + } + + exp_info.ops = &amdxdna_dmabuf_ops; + exp_info.size = gobj->size; + exp_info.flags = flags; + exp_info.priv = gobj; + exp_info.resv = gobj->resv; + + return drm_gem_dmabuf_export(gobj->dev, &exp_info); +} + +static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo) +{ + dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL); + dma_buf_detach(abo->dma_buf, abo->attach); + dma_buf_put(abo->dma_buf); + drm_gem_object_release(to_gobj(abo)); + kfree(abo); +} + +static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + + XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + + amdxdna_hmm_unregister(abo, NULL); + flush_workqueue(xdna->notifier_wq); + + if (abo->pinned) + amdxdna_gem_unpin(abo); + + if (abo->type == AMDXDNA_BO_DEV_HEAP) + drm_mm_takedown(&abo->mm); + + amdxdna_gem_obj_vunmap(abo); + mutex_destroy(&abo->lock); + + if (is_import_bo(abo)) { + amdxdna_imported_obj_free(abo); + return; + } + + drm_gem_shmem_free(&abo->base); +} + +static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { + .free = amdxdna_gem_dev_obj_free, +}; + +static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { + .free = amdxdna_gem_obj_free, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = amdxdna_gem_obj_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, + .export = amdxdna_gem_prime_export, +}; + +static struct amdxdna_gem_obj * +amdxdna_gem_create_obj(struct drm_device *dev, size_t size) +{ + struct amdxdna_gem_obj *abo; + + abo = kzalloc(sizeof(*abo), GFP_KERNEL); + if (!abo) + return ERR_PTR(-ENOMEM); + + abo->pinned = false; + abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; + mutex_init(&abo->lock); + + abo->mem.userptr = AMDXDNA_INVALID_ADDR; + abo->mem.dev_addr = AMDXDNA_INVALID_ADDR; + abo->mem.size = size; + INIT_LIST_HEAD(&abo->mem.umap_list); + + return abo; +} + +/* For drm_driver->gem_create_object callback */ +struct drm_gem_object * +amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size) +{ + struct amdxdna_gem_obj *abo; + + abo = amdxdna_gem_create_obj(dev, size); + if (IS_ERR(abo)) + return ERR_CAST(abo); + + to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs; + + return to_gobj(abo); +} + +static struct amdxdna_gem_obj * +amdxdna_gem_create_shmem_object(struct drm_device *dev, size_t size) +{ + struct drm_gem_shmem_object *shmem = drm_gem_shmem_create(dev, size); + + if (IS_ERR(shmem)) + return ERR_CAST(shmem); + + shmem->map_wc = false; + return to_xdna_obj(&shmem->base); +} + +static struct amdxdna_gem_obj * +amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + enum amdxdna_ubuf_flag flags = 0; + struct amdxdna_drm_va_tbl va_tbl; + struct drm_gem_object *gobj; + struct dma_buf *dma_buf; + + if (copy_from_user(&va_tbl, u64_to_user_ptr(args->vaddr), sizeof(va_tbl))) { + XDNA_DBG(xdna, "Access va table failed"); + return ERR_PTR(-EINVAL); + } + + if (va_tbl.num_entries) { + if (args->type == AMDXDNA_BO_CMD) + flags |= AMDXDNA_UBUF_FLAG_MAP_DMA; + + dma_buf = amdxdna_get_ubuf(dev, flags, va_tbl.num_entries, + u64_to_user_ptr(args->vaddr + sizeof(va_tbl))); + } else { + dma_buf = dma_buf_get(va_tbl.dmabuf_fd); + } + + if (IS_ERR(dma_buf)) + return ERR_CAST(dma_buf); + + gobj = amdxdna_gem_prime_import(dev, dma_buf); + if (IS_ERR(gobj)) { + dma_buf_put(dma_buf); + return ERR_CAST(gobj); + } + + dma_buf_put(dma_buf); + + return to_xdna_obj(gobj); +} + +static struct amdxdna_gem_obj * +amdxdna_gem_create_object(struct drm_device *dev, + struct amdxdna_drm_create_bo *args) +{ + size_t aligned_sz = PAGE_ALIGN(args->size); + + if (args->vaddr) + return amdxdna_gem_create_ubuf_object(dev, args); + + return amdxdna_gem_create_shmem_object(dev, aligned_sz); +} + +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + struct sg_table *sgt; + int ret; + + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto put_buf; + } + + sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto fail_detach; + } + + gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(gobj)) { + ret = PTR_ERR(gobj); + goto fail_unmap; + } + + abo = to_xdna_obj(gobj); + abo->attach = attach; + abo->dma_buf = dma_buf; + + return gobj; + +fail_unmap: + dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); +fail_detach: + dma_buf_detach(dma_buf, attach); +put_buf: + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + +static struct amdxdna_gem_obj * +amdxdna_drm_alloc_shmem(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_gem_obj *abo; + + abo = amdxdna_gem_create_object(dev, args); + if (IS_ERR(abo)) + return ERR_CAST(abo); + + abo->client = client; + abo->type = AMDXDNA_BO_SHMEM; + + return abo; +} + +static struct amdxdna_gem_obj * +amdxdna_drm_create_dev_heap(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_gem_obj *abo; + int ret; + + if (args->size > xdna->dev_info->dev_mem_size) { + XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx", + args->size, xdna->dev_info->dev_mem_size); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&client->mm_lock); + if (client->dev_heap) { + XDNA_DBG(client->xdna, "dev heap is already created"); + ret = -EBUSY; + goto mm_unlock; + } + + abo = amdxdna_gem_create_object(dev, args); + if (IS_ERR(abo)) { + ret = PTR_ERR(abo); + goto mm_unlock; + } + + abo->type = AMDXDNA_BO_DEV_HEAP; + abo->client = client; + abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base; + drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size); + + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); + if (ret) { + XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret); + goto release_obj; + } + + client->dev_heap = abo; + drm_gem_object_get(to_gobj(abo)); + mutex_unlock(&client->mm_lock); + + return abo; + +release_obj: + drm_gem_object_put(to_gobj(abo)); +mm_unlock: + mutex_unlock(&client->mm_lock); + return ERR_PTR(ret); +} + +struct amdxdna_gem_obj * +amdxdna_drm_alloc_dev_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + size_t aligned_sz = PAGE_ALIGN(args->size); + struct amdxdna_gem_obj *abo; + int ret; + + abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz); + if (IS_ERR(abo)) + return abo; + + to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs; + abo->type = AMDXDNA_BO_DEV; + abo->client = client; + + ret = amdxdna_gem_heap_alloc(abo); + if (ret) { + XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + amdxdna_gem_destroy_obj(abo); + return ERR_PTR(ret); + } + + drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz); + + return abo; +} + +static struct amdxdna_gem_obj * +amdxdna_drm_create_cmd_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_gem_obj *abo; + int ret; + + if (args->size > XDNA_MAX_CMD_BO_SIZE) { + XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size); + return ERR_PTR(-EINVAL); + } + + if (args->size < sizeof(struct amdxdna_cmd)) { + XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size); + return ERR_PTR(-EINVAL); + } + + abo = amdxdna_gem_create_object(dev, args); + if (IS_ERR(abo)) + return ERR_CAST(abo); + + abo->type = AMDXDNA_BO_CMD; + abo->client = filp->driver_priv; + + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); + if (ret) { + XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); + goto release_obj; + } + + return abo; + +release_obj: + drm_gem_object_put(to_gobj(abo)); + return ERR_PTR(ret); +} + +int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_create_bo *args = data; + struct amdxdna_gem_obj *abo; + int ret; + + if (args->flags) + return -EINVAL; + + XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx", + args->type, args->vaddr, args->size, args->flags); + switch (args->type) { + case AMDXDNA_BO_SHMEM: + abo = amdxdna_drm_alloc_shmem(dev, args, filp); + break; + case AMDXDNA_BO_DEV_HEAP: + abo = amdxdna_drm_create_dev_heap(dev, args, filp); + break; + case AMDXDNA_BO_DEV: + abo = amdxdna_drm_alloc_dev_bo(dev, args, filp); + break; + case AMDXDNA_BO_CMD: + abo = amdxdna_drm_create_cmd_bo(dev, args, filp); + break; + default: + return -EINVAL; + } + if (IS_ERR(abo)) + return PTR_ERR(abo); + + /* ready to publish object to userspace */ + ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle); + if (ret) { + XDNA_ERR(xdna, "Create handle failed"); + goto put_obj; + } + + XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx", + args->handle, args->type, abo->mem.userptr, + abo->mem.dev_addr, abo->mem.size); +put_obj: + /* Dereference object reference. Handle holds it now. */ + drm_gem_object_put(to_gobj(abo)); + return ret; +} + +int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + int ret; + + if (abo->type == AMDXDNA_BO_DEV) + abo = abo->client->dev_heap; + + if (is_import_bo(abo)) + return 0; + + ret = drm_gem_shmem_pin(&abo->base); + + XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret); + return ret; +} + +int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) +{ + int ret; + + mutex_lock(&abo->lock); + ret = amdxdna_gem_pin_nolock(abo); + mutex_unlock(&abo->lock); + + return ret; +} + +void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) +{ + if (abo->type == AMDXDNA_BO_DEV) + abo = abo->client->dev_heap; + + if (is_import_bo(abo)) + return; + + mutex_lock(&abo->lock); + drm_gem_shmem_unpin(&abo->base); + mutex_unlock(&abo->lock); +} + +struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, + u32 bo_hdl, u8 bo_type) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + + gobj = drm_gem_object_lookup(client->filp, bo_hdl); + if (!gobj) { + XDNA_DBG(xdna, "Can not find bo %d", bo_hdl); + return NULL; + } + + abo = to_xdna_obj(gobj); + if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type) + return abo; + + drm_gem_object_put(gobj); + return NULL; +} + +int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_drm_get_bo_info *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret = 0; + + if (args->ext || args->ext_flags || args->pad) + return -EINVAL; + + gobj = drm_gem_object_lookup(filp, args->handle); + if (!gobj) { + XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle); + return -ENOENT; + } + + abo = to_xdna_obj(gobj); + args->vaddr = abo->mem.userptr; + args->xdna_addr = abo->mem.dev_addr; + + if (abo->type != AMDXDNA_BO_DEV) + args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node); + else + args->map_offset = AMDXDNA_INVALID_ADDR; + + XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx", + args->handle, args->map_offset, args->vaddr, args->xdna_addr); + + drm_gem_object_put(gobj); + return ret; +} + +/* + * The sync bo ioctl is to make sure the CPU cache is in sync with memory. + * This is required because NPU is not cache coherent device. CPU cache + * flushing/invalidation is expensive so it is best to handle this outside + * of the command submission path. This ioctl allows explicit cache + * flushing/invalidation outside of the critical path. + */ +int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, + void *data, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_sync_bo *args = data; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + int ret; + + gobj = drm_gem_object_lookup(filp, args->handle); + if (!gobj) { + XDNA_ERR(xdna, "Lookup GEM object failed"); + return -ENOENT; + } + abo = to_xdna_obj(gobj); + + ret = amdxdna_gem_pin(abo); + if (ret) { + XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret); + goto put_obj; + } + + if (is_import_bo(abo)) + drm_clflush_sg(abo->base.sgt); + else if (abo->mem.kva) + drm_clflush_virt_range(abo->mem.kva + args->offset, args->size); + else if (abo->base.pages) + drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); + else + drm_WARN(&xdna->ddev, 1, "Can not get flush memory"); + + amdxdna_gem_unpin(abo); + + XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n", + args->handle, args->offset, args->size); + + if (args->direction == SYNC_DIRECT_FROM_DEVICE) + ret = amdxdna_hwctx_sync_debug_bo(abo->client, args->handle); + +put_obj: + drm_gem_object_put(gobj); + return ret; +} diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h new file mode 100644 index 000000000000..f79fc7f3c93b --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_GEM_H_ +#define _AMDXDNA_GEM_H_ + +#include <linux/hmm.h> +#include "amdxdna_pci_drv.h" + +struct amdxdna_umap { + struct vm_area_struct *vma; + struct mmu_interval_notifier notifier; + struct hmm_range range; + struct work_struct hmm_unreg_work; + struct amdxdna_gem_obj *abo; + struct list_head node; + struct kref refcnt; + bool invalid; + bool unmapped; +}; + +struct amdxdna_mem { + u64 userptr; + void *kva; + u64 dev_addr; + size_t size; + struct page **pages; + u32 nr_pages; + struct list_head umap_list; + bool map_invalid; +}; + +struct amdxdna_gem_obj { + struct drm_gem_shmem_object base; + struct amdxdna_client *client; + u8 type; + bool pinned; + struct mutex lock; /* Protects: pinned */ + struct amdxdna_mem mem; + + /* Below members is uninitialized when needed */ + struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */ + struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ + u32 assigned_hwctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *attach; +}; + +#define to_gobj(obj) (&(obj)->base.base) +#define is_import_bo(obj) ((obj)->attach) + +static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj) +{ + return container_of(gobj, struct amdxdna_gem_obj, base.base); +} + +struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, + u32 bo_hdl, u8 bo_type); +static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) +{ + drm_gem_object_put(to_gobj(abo)); +} + +static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo) +{ + return abo->mem.dev_addr - abo->client->dev_heap->mem.dev_addr; +} + +void amdxdna_umap_put(struct amdxdna_umap *mapp); + +struct drm_gem_object * +amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size); +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); +struct amdxdna_gem_obj * +amdxdna_drm_alloc_dev_bo(struct drm_device *dev, + struct amdxdna_drm_create_bo *args, + struct drm_file *filp); + +int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo); +int amdxdna_gem_pin(struct amdxdna_gem_obj *abo); +void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo); + +int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +#endif /* _AMDXDNA_GEM_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c new file mode 100644 index 000000000000..858df97cd3fb --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -0,0 +1,575 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <linux/bitfield.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/slab.h> +#include <linux/xarray.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/amdxdna.h> + +#include "amdxdna_mailbox.h" + +#define MB_ERR(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) +#define MB_DBG(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) +#define MB_WARN_ONCE(chann, fmt, args...) \ +({ \ + typeof(chann) _chann = chann; \ + dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \ + (_chann)->msix_irq, ##args); \ +}) + +#define MAGIC_VAL 0x1D000000U +#define MAGIC_VAL_MASK 0xFF000000 +#define MAX_MSG_ID_ENTRIES 256 +#define MSG_RX_TIMER 200 /* milliseconds */ +#define MAILBOX_NAME "xdna_mailbox" + +enum channel_res_type { + CHAN_RES_X2I, + CHAN_RES_I2X, + CHAN_RES_NUM +}; + +struct mailbox { + struct device *dev; + struct xdna_mailbox_res res; +}; + +struct mailbox_channel { + struct mailbox *mb; + struct xdna_mailbox_chann_res res[CHAN_RES_NUM]; + int msix_irq; + u32 iohub_int_addr; + struct xarray chan_xa; + u32 next_msgid; + u32 x2i_tail; + + /* Received msg related fields */ + struct workqueue_struct *work_q; + struct work_struct rx_work; + u32 i2x_head; + bool bad_state; +}; + +#define MSG_BODY_SZ GENMASK(10, 0) +#define MSG_PROTO_VER GENMASK(23, 16) +struct xdna_msg_header { + __u32 total_size; + __u32 sz_ver; + __u32 id; + __u32 opcode; +} __packed; + +static_assert(sizeof(struct xdna_msg_header) == 16); + +struct mailbox_pkg { + struct xdna_msg_header header; + __u32 payload[]; +}; + +/* The protocol version. */ +#define MSG_PROTOCOL_VERSION 0x1 +/* The tombstone value. */ +#define TOMBSTONE 0xDEADFACE + +struct mailbox_msg { + void *handle; + int (*notify_cb)(void *handle, void __iomem *data, size_t size); + size_t pkg_size; /* package size in bytes */ + struct mailbox_pkg pkg; +}; + +static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + + writel(data, ringbuf_addr); +} + +static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + + return readl(ringbuf_addr); +} + +static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val) +{ + struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; + void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; + int ret, value; + + /* Poll till value is not zero */ + ret = readx_poll_timeout(readl, ringbuf_addr, value, + value, 1 /* us */, 100); + if (ret < 0) + return ret; + + *val = value; + return 0; +} + +static inline void +mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val) +{ + mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val); + mb_chann->i2x_head = headptr_val; +} + +static inline void +mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val) +{ + mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val); + mb_chann->x2i_tail = tailptr_val; +} + +static inline u32 +mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg); +} + +static inline u32 +mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg); +} + +static inline u32 +mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type) +{ + return mb_chann->res[type].rb_size; +} + +static inline int mailbox_validate_msgid(int msg_id) +{ + return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL; +} + +static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) +{ + u32 msg_id; + int ret; + + ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg, + XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1), + &mb_chann->next_msgid, GFP_NOWAIT); + if (ret < 0) + return ret; + + /* + * Add MAGIC_VAL to the higher bits. + */ + msg_id |= MAGIC_VAL; + return msg_id; +} + +static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id) +{ + msg_id &= ~MAGIC_VAL_MASK; + xa_erase_irq(&mb_chann->chan_xa, msg_id); +} + +static void mailbox_release_msg(struct mailbox_channel *mb_chann, + struct mailbox_msg *mb_msg) +{ + MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x", + mb_msg->pkg.header.id, mb_msg->pkg.header.opcode); + if (mb_msg->notify_cb) + mb_msg->notify_cb(mb_msg->handle, NULL, 0); + kfree(mb_msg); +} + +static int +mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) +{ + void __iomem *write_addr; + u32 ringbuf_size; + u32 head, tail; + u32 start_addr; + u32 tmp_tail; + + head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I); + tail = mb_chann->x2i_tail; + ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I); + start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr; + tmp_tail = tail + mb_msg->pkg_size; + + if (tail < head && tmp_tail >= head) + goto no_space; + + if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) && + mb_msg->pkg_size >= head)) + goto no_space; + + if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) { + write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; + writel(TOMBSTONE, write_addr); + + /* tombstone is set. Write from the start of the ringbuf */ + tail = 0; + } + + write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail; + memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size); + mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size); + + trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq, + mb_msg->pkg.header.opcode, + mb_msg->pkg.header.id); + + return 0; + +no_space: + return -ENOSPC; +} + +static int +mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header, + void __iomem *data) +{ + struct mailbox_msg *mb_msg; + int msg_id; + int ret = 0; + + msg_id = header->id; + if (!mailbox_validate_msgid(msg_id)) { + MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id); + return -EINVAL; + } + + msg_id &= ~MAGIC_VAL_MASK; + mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id); + if (!mb_msg) { + MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id); + return -EINVAL; + } + + MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", + header->opcode, header->total_size, header->id); + if (mb_msg->notify_cb) { + ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size); + if (unlikely(ret)) + MB_ERR(mb_chann, "Message callback ret %d", ret); + } + + kfree(mb_msg); + return ret; +} + +static int mailbox_get_msg(struct mailbox_channel *mb_chann) +{ + struct xdna_msg_header header; + void __iomem *read_addr; + u32 msg_size, rest; + u32 ringbuf_size; + u32 head, tail; + u32 start_addr; + int ret; + + if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail)) + return -EINVAL; + head = mb_chann->i2x_head; + ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X); + start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr; + + if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) { + MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail); + return -EINVAL; + } + + /* ringbuf empty */ + if (head == tail) + return -ENOENT; + + if (head == ringbuf_size) + head = 0; + + /* Peek size of the message or TOMBSTONE */ + read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head; + header.total_size = readl(read_addr); + /* size is TOMBSTONE, set next read from 0 */ + if (header.total_size == TOMBSTONE) { + if (head < tail) { + MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x", + head, tail); + return -EINVAL; + } + mailbox_set_headptr(mb_chann, 0); + return 0; + } + + if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) { + MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size); + return -EINVAL; + } + msg_size = sizeof(header) + header.total_size; + + if (msg_size > ringbuf_size - head || msg_size > tail - head) { + MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d", + msg_size, tail, head); + return -EINVAL; + } + + rest = sizeof(header) - sizeof(u32); + read_addr += sizeof(u32); + memcpy_fromio((u32 *)&header + 1, read_addr, rest); + read_addr += rest; + + ret = mailbox_get_resp(mb_chann, &header, read_addr); + + mailbox_set_headptr(mb_chann, head + msg_size); + /* After update head, it can equal to ringbuf_size. This is expected. */ + trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq, + header.opcode, header.id); + + return ret; +} + +static irqreturn_t mailbox_irq_handler(int irq, void *p) +{ + struct mailbox_channel *mb_chann = p; + + trace_mbox_irq_handle(MAILBOX_NAME, irq); + /* Schedule a rx_work to call the callback functions */ + queue_work(mb_chann->work_q, &mb_chann->rx_work); + + return IRQ_HANDLED; +} + +static void mailbox_rx_worker(struct work_struct *rx_work) +{ + struct mailbox_channel *mb_chann; + int ret; + + mb_chann = container_of(rx_work, struct mailbox_channel, rx_work); + + if (READ_ONCE(mb_chann->bad_state)) { + MB_ERR(mb_chann, "Channel in bad state, work aborted"); + return; + } + +again: + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); + + while (1) { + /* + * If return is 0, keep consuming next message, until there is + * no messages or an error happened. + */ + ret = mailbox_get_msg(mb_chann); + if (ret == -ENOENT) + break; + + /* Other error means device doesn't look good, disable irq. */ + if (unlikely(ret)) { + MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret); + WRITE_ONCE(mb_chann->bad_state, true); + return; + } + } + + /* + * The hardware will not generate interrupt if firmware creates a new + * response right after driver clears interrupt register. Check + * the interrupt register to make sure there is not any new response + * before exiting. + */ + if (mailbox_reg_read(mb_chann, mb_chann->iohub_int_addr)) + goto again; +} + +int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_msg *msg, u64 tx_timeout) +{ + struct xdna_msg_header *header; + struct mailbox_msg *mb_msg; + size_t pkg_size; + int ret; + + pkg_size = sizeof(*header) + msg->send_size; + if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) { + MB_ERR(mb_chann, "Message size larger than ringbuf size"); + return -EINVAL; + } + + if (unlikely(!IS_ALIGNED(msg->send_size, 4))) { + MB_ERR(mb_chann, "Message must be 4 bytes align"); + return -EINVAL; + } + + /* The fist word in payload can NOT be TOMBSTONE */ + if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) { + MB_ERR(mb_chann, "Tomb stone in data"); + return -EINVAL; + } + + if (READ_ONCE(mb_chann->bad_state)) { + MB_ERR(mb_chann, "Channel in bad state"); + return -EPIPE; + } + + mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL); + if (!mb_msg) + return -ENOMEM; + + mb_msg->handle = msg->handle; + mb_msg->notify_cb = msg->notify_cb; + mb_msg->pkg_size = pkg_size; + + header = &mb_msg->pkg.header; + /* + * Hardware use total_size and size to split huge message. + * We do not support it here. Thus the values are the same. + */ + header->total_size = msg->send_size; + header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) | + FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION); + header->opcode = msg->opcode; + memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size); + + ret = mailbox_acquire_msgid(mb_chann, mb_msg); + if (unlikely(ret < 0)) { + MB_ERR(mb_chann, "mailbox_acquire_msgid failed"); + goto msg_id_failed; + } + header->id = ret; + + MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", + header->opcode, header->total_size, header->id); + + ret = mailbox_send_msg(mb_chann, mb_msg); + if (ret) { + MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret); + goto release_id; + } + + return 0; + +release_id: + mailbox_release_msgid(mb_chann, header->id); +msg_id_failed: + kfree(mb_msg); + return ret; +} + +struct mailbox_channel * +xdna_mailbox_create_channel(struct mailbox *mb, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 iohub_int_addr, + int mb_irq) +{ + struct mailbox_channel *mb_chann; + int ret; + + if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { + pr_err("Ring buf size must be power of 2"); + return NULL; + } + + mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL); + if (!mb_chann) + return NULL; + + mb_chann->mb = mb; + mb_chann->msix_irq = mb_irq; + mb_chann->iohub_int_addr = iohub_int_addr; + memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); + memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x)); + + xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); + mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); + mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); + + INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); + mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); + if (!mb_chann->work_q) { + MB_ERR(mb_chann, "Create workqueue failed"); + goto free_and_out; + } + + /* Everything look good. Time to enable irq handler */ + ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann); + if (ret) { + MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret); + goto destroy_wq; + } + + mb_chann->bad_state = false; + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); + + MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); + return mb_chann; + +destroy_wq: + destroy_workqueue(mb_chann->work_q); +free_and_out: + kfree(mb_chann); + return NULL; +} + +int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) +{ + struct mailbox_msg *mb_msg; + unsigned long msg_id; + + MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); + free_irq(mb_chann->msix_irq, mb_chann); + destroy_workqueue(mb_chann->work_q); + /* We can clean up and release resources */ + + xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) + mailbox_release_msg(mb_chann, mb_msg); + + xa_destroy(&mb_chann->chan_xa); + + MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); + kfree(mb_chann); + return 0; +} + +void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) +{ + /* Disable an irq and wait. This might sleep. */ + disable_irq(mb_chann->msix_irq); + + /* Cancel RX work and wait for it to finish */ + cancel_work_sync(&mb_chann->rx_work); + MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); +} + +struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, + const struct xdna_mailbox_res *res) +{ + struct mailbox *mb; + + mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL); + if (!mb) + return NULL; + mb->dev = ddev->dev; + + /* mailbox and ring buf base and size information */ + memcpy(&mb->res, res, sizeof(*res)); + + return mb; +} diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h new file mode 100644 index 000000000000..ea367f2fb738 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_MAILBOX_H_ +#define _AIE2_MAILBOX_H_ + +struct mailbox; +struct mailbox_channel; + +/* + * xdna_mailbox_msg - message struct + * + * @opcode: opcode for firmware + * @handle: handle used for the notify callback + * @notify_cb: callback function to notify the sender when there is response + * @send_data: pointing to sending data + * @send_size: size of the sending data + * + * The mailbox will split the sending data in to multiple firmware message if + * the size of the data is too big. This is transparent to the sender. The + * sender will receive one notification. + */ +struct xdna_mailbox_msg { + u32 opcode; + void *handle; + int (*notify_cb)(void *handle, void __iomem *data, size_t size); + u8 *send_data; + size_t send_size; +}; + +/* + * xdna_mailbox_res - mailbox hardware resource + * + * @ringbuf_base: ring buffer base address + * @ringbuf_size: ring buffer size + * @mbox_base: mailbox base address + * @mbox_size: mailbox size + */ +struct xdna_mailbox_res { + void __iomem *ringbuf_base; + size_t ringbuf_size; + void __iomem *mbox_base; + size_t mbox_size; + const char *name; +}; + +/* + * xdna_mailbox_chann_res - resources + * + * @rb_start_addr: ring buffer start address + * @rb_size: ring buffer size + * @mb_head_ptr_reg: mailbox head pointer register + * @mb_tail_ptr_reg: mailbox tail pointer register + */ +struct xdna_mailbox_chann_res { + u32 rb_start_addr; + u32 rb_size; + u32 mb_head_ptr_reg; + u32 mb_tail_ptr_reg; +}; + +/* + * xdna_mailbox_create() -- create mailbox subsystem and initialize + * + * @ddev: device pointer + * @res: SRAM and mailbox resources + * + * Return: If success, return a handle of mailbox subsystem. + * Otherwise, return NULL pointer. + */ +struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, + const struct xdna_mailbox_res *res); + +/* + * xdna_mailbox_create_channel() -- Create a mailbox channel instance + * + * @mailbox: the handle return from xdna_mailbox_create() + * @x2i: host to firmware mailbox resources + * @i2x: firmware to host mailbox resources + * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt + * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index + * + * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. + */ +struct mailbox_channel * +xdna_mailbox_create_channel(struct mailbox *mailbox, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 xdna_mailbox_intr_reg, + int mb_irq); + +/* + * xdna_mailbox_destroy_channel() -- destroy mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Return: if success, return 0. otherwise return error code + */ +int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); + +/* + * xdna_mailbox_stop_channel() -- stop mailbox channel + * + * @mailbox_chann: the handle return from xdna_mailbox_create_channel() + * + * Return: if success, return 0. otherwise return error code + */ +void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann); + +/* + * xdna_mailbox_send_msg() -- Send a message + * + * @mailbox_chann: Mailbox channel handle + * @msg: message struct for message information + * @tx_timeout: the timeout value for sending the message in ms. + * + * Return: If success return 0, otherwise, return error code + */ +int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann, + const struct xdna_mailbox_msg *msg, u64 tx_timeout); + +#endif /* _AIE2_MAILBOX_ */ diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c new file mode 100644 index 000000000000..6d0c24513476 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/gpu_scheduler.h> +#include <linux/completion.h> + +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_mailbox_helper.h" +#include "amdxdna_pci_drv.h" + +int xdna_msg_cb(void *handle, void __iomem *data, size_t size) +{ + struct xdna_notify *cb_arg = handle; + int ret; + + if (unlikely(!data)) + goto out; + + if (unlikely(cb_arg->size != size)) { + cb_arg->error = -EINVAL; + goto out; + } + + memcpy_fromio(cb_arg->data, data, cb_arg->size); + print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET, + 16, 4, cb_arg->data, cb_arg->size, true); +out: + ret = cb_arg->error; + complete(&cb_arg->comp); + return ret; +} + +int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann, + struct xdna_mailbox_msg *msg) +{ + struct xdna_notify *hdl = msg->handle; + int ret; + + ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed, ret %d", ret); + return ret; + } + + ret = wait_for_completion_timeout(&hdl->comp, + msecs_to_jiffies(RX_TIMEOUT)); + if (!ret) { + XDNA_ERR(xdna, "Wait for completion timeout"); + return -ETIME; + } + + return hdl->error; +} diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h new file mode 100644 index 000000000000..556c712cad0a --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_MAILBOX_HELPER_H +#define _AMDXDNA_MAILBOX_HELPER_H + +#define TX_TIMEOUT 2000 /* milliseconds */ +#define RX_TIMEOUT 5000 /* milliseconds */ + +struct amdxdna_dev; + +struct xdna_notify { + struct completion comp; + u32 *data; + size_t size; + int error; + u32 *status; +}; + +#define DECLARE_XDNA_MSG_COMMON(name, op, s) \ + struct name##_req req = { 0 }; \ + struct name##_resp resp = { .status = s }; \ + struct xdna_notify hdl = { \ + .error = 0, \ + .data = (u32 *)&resp, \ + .size = sizeof(resp), \ + .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \ + .status = (u32 *)&resp.status, \ + }; \ + struct xdna_mailbox_msg msg = { \ + .send_data = (u8 *)&req, \ + .send_size = sizeof(req), \ + .handle = &hdl, \ + .opcode = op, \ + .notify_cb = xdna_msg_cb, \ + } + +int xdna_msg_cb(void *handle, void __iomem *data, size_t size); +int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann, + struct xdna_mailbox_msg *msg); + +#endif /* _AMDXDNA_MAILBOX_HELPER_H */ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c new file mode 100644 index 000000000000..1973ab67721b --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_managed.h> +#include <drm/gpu_scheduler.h> +#include <linux/iommu.h> +#include <linux/pci.h> + +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" + +MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); + +/* + * 0.0: Initial version + * 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY + * 0.2: Support getting last error hardware error + * 0.3: Support firmware debug buffer + * 0.4: Support getting resource information + * 0.5: Support getting telemetry data + * 0.6: Support preemption + */ +#define AMDXDNA_DRIVER_MAJOR 0 +#define AMDXDNA_DRIVER_MINOR 6 + +/* + * Bind the driver base on (vendor_id, device_id) pair and later use the + * (device_id, rev_id) pair as a key to select the devices. The devices with + * same device_id have very similar interface to host driver. + */ +static const struct pci_device_id pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) }, + {0} +}; + +MODULE_DEVICE_TABLE(pci, pci_ids); + +static const struct amdxdna_device_id amdxdna_ids[] = { + { 0x1502, 0x0, &dev_npu1_info }, + { 0x17f0, 0x0, &dev_npu2_info }, + { 0x17f0, 0x10, &dev_npu4_info }, + { 0x17f0, 0x11, &dev_npu5_info }, + { 0x17f0, 0x20, &dev_npu6_info }, + {0} +}; + +static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_client *client; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return -ENOMEM; + + client->pid = pid_nr(rcu_access_pointer(filp->pid)); + client->xdna = xdna; + + client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm); + if (IS_ERR(client->sva)) { + ret = PTR_ERR(client->sva); + XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret); + goto failed; + } + client->pasid = iommu_sva_get_pasid(client->sva); + if (client->pasid == IOMMU_PASID_INVALID) { + XDNA_ERR(xdna, "SVA get pasid failed"); + ret = -ENODEV; + goto unbind_sva; + } + init_srcu_struct(&client->hwctx_srcu); + xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); + mutex_init(&client->mm_lock); + + mutex_lock(&xdna->dev_lock); + list_add_tail(&client->node, &xdna->client_list); + mutex_unlock(&xdna->dev_lock); + + filp->driver_priv = client; + client->filp = filp; + + XDNA_DBG(xdna, "pid %d opened", client->pid); + return 0; + +unbind_sva: + iommu_sva_unbind_device(client->sva); +failed: + kfree(client); + + return ret; +} + +static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + + XDNA_DBG(xdna, "closing pid %d", client->pid); + + xa_destroy(&client->hwctx_xa); + cleanup_srcu_struct(&client->hwctx_srcu); + mutex_destroy(&client->mm_lock); + if (client->dev_heap) + drm_gem_object_put(to_gobj(client->dev_heap)); + + iommu_sva_unbind_device(client->sva); + + XDNA_DBG(xdna, "pid %d closed", client->pid); + kfree(client); +} + +static int amdxdna_flush(struct file *f, fl_owner_t id) +{ + struct drm_file *filp = f->private_data; + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = client->xdna; + int idx; + + XDNA_DBG(xdna, "PID %d flushing...", client->pid); + if (!drm_dev_enter(&xdna->ddev, &idx)) + return 0; + + mutex_lock(&xdna->dev_lock); + list_del_init(&client->node); + amdxdna_hwctx_remove_all(client); + mutex_unlock(&xdna->dev_lock); + + drm_dev_exit(idx); + return 0; +} + +static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_get_info *args = data; + int ret; + + if (!xdna->dev_info->ops->get_aie_info) + return -EOPNOTSUPP; + + XDNA_DBG(xdna, "Request parameter %u", args->param); + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->get_aie_info(client, args); + mutex_unlock(&xdna->dev_lock); + return ret; +} + +static int amdxdna_drm_get_array_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_get_array *args = data; + + if (!xdna->dev_info->ops->get_array) + return -EOPNOTSUPP; + + if (args->pad || !args->num_element || !args->element_size) + return -EINVAL; + + guard(mutex)(&xdna->dev_lock); + return xdna->dev_info->ops->get_array(client, args); +} + +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_set_state *args = data; + int ret; + + if (!xdna->dev_info->ops->set_aie_state) + return -EOPNOTSUPP; + + XDNA_DBG(xdna, "Request parameter %u", args->param); + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->set_aie_state(client, args); + mutex_unlock(&xdna->dev_lock); + + return ret; +} + +static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { + /* Context */ + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0), + /* BO */ + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), + /* Execution */ + DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), + /* AIE hardware */ + DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY), +}; + +static const struct file_operations amdxdna_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .flush = amdxdna_flush, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .poll = drm_poll, + .read = drm_read, + .llseek = noop_llseek, + .mmap = drm_gem_mmap, + .fop_flags = FOP_UNSIGNED_OFFSET, +}; + +const struct drm_driver amdxdna_drm_drv = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL | + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + .fops = &amdxdna_fops, + .name = "amdxdna_accel_driver", + .desc = "AMD XDNA DRM implementation", + .major = AMDXDNA_DRIVER_MAJOR, + .minor = AMDXDNA_DRIVER_MINOR, + .open = amdxdna_drm_open, + .postclose = amdxdna_drm_close, + .ioctls = amdxdna_drm_ioctls, + .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), + + .gem_create_object = amdxdna_gem_create_object_cb, + .gem_prime_import = amdxdna_gem_prime_import, +}; + +static const struct amdxdna_dev_info * +amdxdna_get_dev_info(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) { + if (pdev->device == amdxdna_ids[i].device && + pdev->revision == amdxdna_ids[i].revision) + return amdxdna_ids[i].dev_info; + } + return NULL; +} + +static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct amdxdna_dev *xdna; + int ret; + + xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev); + if (IS_ERR(xdna)) + return PTR_ERR(xdna); + + xdna->dev_info = amdxdna_get_dev_info(pdev); + if (!xdna->dev_info) + return -ENODEV; + + drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); + init_rwsem(&xdna->notifier_lock); + INIT_LIST_HEAD(&xdna->client_list); + pci_set_drvdata(pdev, xdna); + + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&xdna->notifier_lock); + fs_reclaim_release(GFP_KERNEL); + } + + xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0); + if (!xdna->notifier_wq) + return -ENOMEM; + + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->init(xdna); + mutex_unlock(&xdna->dev_lock); + if (ret) { + XDNA_ERR(xdna, "Hardware init failed, ret %d", ret); + goto destroy_notifier_wq; + } + + ret = amdxdna_sysfs_init(xdna); + if (ret) { + XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret); + goto failed_dev_fini; + } + + ret = drm_dev_register(&xdna->ddev, 0); + if (ret) { + XDNA_ERR(xdna, "DRM register failed, ret %d", ret); + goto failed_sysfs_fini; + } + + return 0; + +failed_sysfs_fini: + amdxdna_sysfs_fini(xdna); +failed_dev_fini: + mutex_lock(&xdna->dev_lock); + xdna->dev_info->ops->fini(xdna); + mutex_unlock(&xdna->dev_lock); +destroy_notifier_wq: + destroy_workqueue(xdna->notifier_wq); + return ret; +} + +static void amdxdna_remove(struct pci_dev *pdev) +{ + struct amdxdna_dev *xdna = pci_get_drvdata(pdev); + struct amdxdna_client *client; + + destroy_workqueue(xdna->notifier_wq); + + drm_dev_unplug(&xdna->ddev); + amdxdna_sysfs_fini(xdna); + + mutex_lock(&xdna->dev_lock); + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + while (client) { + list_del_init(&client->node); + amdxdna_hwctx_remove_all(client); + + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + } + + xdna->dev_info->ops->fini(xdna); + mutex_unlock(&xdna->dev_lock); +} + +static const struct dev_pm_ops amdxdna_pm_ops = { + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume) + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL) +}; + +static struct pci_driver amdxdna_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = pci_ids, + .probe = amdxdna_probe, + .remove = amdxdna_remove, + .driver.pm = &amdxdna_pm_ops, +}; + +module_pci_driver(amdxdna_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>"); +MODULE_DESCRIPTION("amdxdna driver"); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h new file mode 100644 index 000000000000..c99477f5e454 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_PCI_DRV_H_ +#define _AMDXDNA_PCI_DRV_H_ + +#include <drm/drm_print.h> +#include <linux/workqueue.h> +#include <linux/xarray.h> + +#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) +#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args) +#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args) +#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args) +#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args) + +#define XDNA_MBZ_DBG(xdna, ptr, sz) \ + ({ \ + int __i; \ + int __ret = 0; \ + u8 *__ptr = (u8 *)(ptr); \ + for (__i = 0; __i < (sz); __i++) { \ + if (__ptr[__i]) { \ + XDNA_DBG(xdna, "MBZ check failed"); \ + __ret = -EINVAL; \ + break; \ + } \ + } \ + __ret; \ + }) + +#define to_xdna_dev(drm_dev) \ + ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev)) + +extern const struct drm_driver amdxdna_drm_drv; + +struct amdxdna_client; +struct amdxdna_dev; +struct amdxdna_drm_get_info; +struct amdxdna_drm_set_state; +struct amdxdna_gem_obj; +struct amdxdna_hwctx; +struct amdxdna_sched_job; + +/* + * struct amdxdna_dev_ops - Device hardware operation callbacks + */ +struct amdxdna_dev_ops { + int (*init)(struct amdxdna_dev *xdna); + void (*fini)(struct amdxdna_dev *xdna); + int (*resume)(struct amdxdna_dev *xdna); + int (*suspend)(struct amdxdna_dev *xdna); + int (*hwctx_init)(struct amdxdna_hwctx *hwctx); + void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); + int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); + int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); + void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); + int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); + int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args); + int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args); + int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args); +}; + +/* + * struct amdxdna_dev_info - Device hardware information + * Record device static information, like reg, mbox, PSP, SMU bar index + */ +struct amdxdna_dev_info { + int reg_bar; + int mbox_bar; + int sram_bar; + int psp_bar; + int smu_bar; + int device_type; + int first_col; + u32 dev_mem_buf_shift; + u64 dev_mem_base; + size_t dev_mem_size; + char *vbnv; + const struct amdxdna_dev_priv *dev_priv; + const struct amdxdna_dev_ops *ops; +}; + +struct amdxdna_fw_ver { + u32 major; + u32 minor; + u32 sub; + u32 build; +}; + +struct amdxdna_dev { + struct drm_device ddev; + struct amdxdna_dev_hdl *dev_handle; + const struct amdxdna_dev_info *dev_info; + void *xrs_hdl; + + struct mutex dev_lock; /* per device lock */ + struct list_head client_list; + struct amdxdna_fw_ver fw_ver; + struct rw_semaphore notifier_lock; /* for mmu notifier*/ + struct workqueue_struct *notifier_wq; + bool rpm_on; +}; + +/* + * struct amdxdna_device_id - PCI device info + */ +struct amdxdna_device_id { + unsigned short device; + u8 revision; + const struct amdxdna_dev_info *dev_info; +}; + +/* + * struct amdxdna_client - amdxdna client + * A per fd data structure for managing context and other user process stuffs. + */ +struct amdxdna_client { + struct list_head node; + pid_t pid; + struct srcu_struct hwctx_srcu; + struct xarray hwctx_xa; + u32 next_hwctxid; + struct amdxdna_dev *xdna; + struct drm_file *filp; + + struct mutex mm_lock; /* protect memory related */ + struct amdxdna_gem_obj *dev_heap; + + struct iommu_sva *sva; + int pasid; +}; + +#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ + xa_for_each(&(client)->hwctx_xa, hwctx_id, entry) + +/* Add device info below */ +extern const struct amdxdna_dev_info dev_npu1_info; +extern const struct amdxdna_dev_info dev_npu2_info; +extern const struct amdxdna_dev_info dev_npu4_info; +extern const struct amdxdna_dev_info dev_npu5_info; +extern const struct amdxdna_dev_info dev_npu6_info; + +int amdxdna_sysfs_init(struct amdxdna_dev *xdna); +void amdxdna_sysfs_fini(struct amdxdna_dev *xdna); + +#endif /* _AMDXDNA_PCI_DRV_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c new file mode 100644 index 000000000000..fa38e65d617c --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_drv.h> +#include <linux/pm_runtime.h> + +#include "amdxdna_pm.h" + +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ + +int amdxdna_pm_suspend(struct device *dev) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); + int ret = -EOPNOTSUPP; + bool rpm; + + if (xdna->dev_info->ops->suspend) { + rpm = xdna->rpm_on; + xdna->rpm_on = false; + ret = xdna->dev_info->ops->suspend(xdna); + xdna->rpm_on = rpm; + } + + XDNA_DBG(xdna, "Suspend done ret %d", ret); + return ret; +} + +int amdxdna_pm_resume(struct device *dev) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); + int ret = -EOPNOTSUPP; + bool rpm; + + if (xdna->dev_info->ops->resume) { + rpm = xdna->rpm_on; + xdna->rpm_on = false; + ret = xdna->dev_info->ops->resume(xdna); + xdna->rpm_on = rpm; + } + + XDNA_DBG(xdna, "Resume done ret %d", ret); + return ret; +} + +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + int ret; + + if (!xdna->rpm_on) + return 0; + + ret = pm_runtime_resume_and_get(dev); + if (ret) { + XDNA_ERR(xdna, "Resume failed: %d", ret); + pm_runtime_set_suspended(dev); + } + + return ret; +} + +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + if (!xdna->rpm_on) + return; + + pm_runtime_put_autosuspend(dev); +} + +void amdxdna_pm_init(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + pm_runtime_set_active(dev); + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(dev); + pm_runtime_allow(dev); + pm_runtime_put_autosuspend(dev); + xdna->rpm_on = true; +} + +void amdxdna_pm_fini(struct amdxdna_dev *xdna) +{ + struct device *dev = xdna->ddev.dev; + + xdna->rpm_on = false; + pm_runtime_get_noresume(dev); + pm_runtime_forbid(dev); +} diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h new file mode 100644 index 000000000000..77b2d6e45570 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_pm.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_PM_H_ +#define _AMDXDNA_PM_H_ + +#include "amdxdna_pci_drv.h" + +int amdxdna_pm_suspend(struct device *dev); +int amdxdna_pm_resume(struct device *dev); +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna); +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna); +void amdxdna_pm_init(struct amdxdna_dev *xdna); +void amdxdna_pm_fini(struct amdxdna_dev *xdna); + +#endif /* _AMDXDNA_PM_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c new file mode 100644 index 000000000000..f27e4ee960a0 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_sysfs.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/types.h> + +#include "amdxdna_gem.h" +#include "amdxdna_pci_drv.h" + +static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", xdna->dev_info->vbnv); +} +static DEVICE_ATTR_RO(vbnv); + +static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", xdna->dev_info->device_type); +} +static DEVICE_ATTR_RO(device_type); + +static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct amdxdna_dev *xdna = dev_get_drvdata(dev); + + return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major, + xdna->fw_ver.minor, xdna->fw_ver.sub, + xdna->fw_ver.build); +} +static DEVICE_ATTR_RO(fw_version); + +static struct attribute *amdxdna_attrs[] = { + &dev_attr_device_type.attr, + &dev_attr_vbnv.attr, + &dev_attr_fw_version.attr, + NULL, +}; + +static struct attribute_group amdxdna_attr_group = { + .attrs = amdxdna_attrs, +}; + +int amdxdna_sysfs_init(struct amdxdna_dev *xdna) +{ + int ret; + + ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group); + if (ret) + XDNA_ERR(xdna, "Create attr group failed"); + + return ret; +} + +void amdxdna_sysfs_fini(struct amdxdna_dev *xdna) +{ + sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group); +} diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c new file mode 100644 index 000000000000..077b2261cf2a --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <linux/dma-buf.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> + +#include "amdxdna_pci_drv.h" +#include "amdxdna_ubuf.h" + +struct amdxdna_ubuf_priv { + struct page **pages; + u64 nr_pages; + enum amdxdna_ubuf_flag flags; + struct mm_struct *mm; +}; + +static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach, + enum dma_data_direction direction) +{ + struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv; + struct sg_table *sg; + int ret; + + sg = kzalloc(sizeof(*sg), GFP_KERNEL); + if (!sg) + return ERR_PTR(-ENOMEM); + + ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0, + ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL); + if (ret) + return ERR_PTR(ret); + + if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) { + ret = dma_map_sgtable(attach->dev, sg, direction, 0); + if (ret) + return ERR_PTR(ret); + } + + return sg; +} + +static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sg, + enum dma_data_direction direction) +{ + struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv; + + if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) + dma_unmap_sgtable(attach->dev, sg, direction, 0); + + sg_free_table(sg); + kfree(sg); +} + +static void amdxdna_ubuf_release(struct dma_buf *dbuf) +{ + struct amdxdna_ubuf_priv *ubuf = dbuf->priv; + + unpin_user_pages(ubuf->pages, ubuf->nr_pages); + kvfree(ubuf->pages); + atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm); + mmdrop(ubuf->mm); + kfree(ubuf); +} + +static vm_fault_t amdxdna_ubuf_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct amdxdna_ubuf_priv *ubuf; + unsigned long pfn; + pgoff_t pgoff; + + ubuf = vma->vm_private_data; + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + + pfn = page_to_pfn(ubuf->pages[pgoff]); + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static const struct vm_operations_struct amdxdna_ubuf_vm_ops = { + .fault = amdxdna_ubuf_vm_fault, +}; + +static int amdxdna_ubuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma) +{ + struct amdxdna_ubuf_priv *ubuf = dbuf->priv; + + vma->vm_ops = &amdxdna_ubuf_vm_ops; + vma->vm_private_data = ubuf; + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + + return 0; +} + +static int amdxdna_ubuf_vmap(struct dma_buf *dbuf, struct iosys_map *map) +{ + struct amdxdna_ubuf_priv *ubuf = dbuf->priv; + void *kva; + + kva = vmap(ubuf->pages, ubuf->nr_pages, VM_MAP, PAGE_KERNEL); + if (!kva) + return -EINVAL; + + iosys_map_set_vaddr(map, kva); + return 0; +} + +static void amdxdna_ubuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map) +{ + vunmap(map->vaddr); +} + +static const struct dma_buf_ops amdxdna_ubuf_dmabuf_ops = { + .map_dma_buf = amdxdna_ubuf_map, + .unmap_dma_buf = amdxdna_ubuf_unmap, + .release = amdxdna_ubuf_release, + .mmap = amdxdna_ubuf_mmap, + .vmap = amdxdna_ubuf_vmap, + .vunmap = amdxdna_ubuf_vunmap, +}; + +struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, + enum amdxdna_ubuf_flag flags, + u32 num_entries, void __user *va_entries) +{ + struct amdxdna_dev *xdna = to_xdna_dev(dev); + unsigned long lock_limit, new_pinned; + struct amdxdna_drm_va_entry *va_ent; + struct amdxdna_ubuf_priv *ubuf; + u32 npages, start = 0; + struct dma_buf *dbuf; + int i, ret; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + if (!can_do_mlock()) + return ERR_PTR(-EPERM); + + ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL); + if (!ubuf) + return ERR_PTR(-ENOMEM); + + ubuf->flags = flags; + ubuf->mm = current->mm; + mmgrab(ubuf->mm); + + va_ent = kvcalloc(num_entries, sizeof(*va_ent), GFP_KERNEL); + if (!va_ent) { + ret = -ENOMEM; + goto free_ubuf; + } + + if (copy_from_user(va_ent, va_entries, sizeof(*va_ent) * num_entries)) { + XDNA_DBG(xdna, "Access va entries failed"); + ret = -EINVAL; + goto free_ent; + } + + for (i = 0, exp_info.size = 0; i < num_entries; i++) { + if (!IS_ALIGNED(va_ent[i].vaddr, PAGE_SIZE) || + !IS_ALIGNED(va_ent[i].len, PAGE_SIZE)) { + XDNA_ERR(xdna, "Invalid address or len %llx, %llx", + va_ent[i].vaddr, va_ent[i].len); + ret = -EINVAL; + goto free_ent; + } + + exp_info.size += va_ent[i].len; + } + + ubuf->nr_pages = exp_info.size >> PAGE_SHIFT; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + new_pinned = atomic64_add_return(ubuf->nr_pages, &ubuf->mm->pinned_vm); + if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) { + XDNA_DBG(xdna, "New pin %ld, limit %ld, cap %d", + new_pinned, lock_limit, capable(CAP_IPC_LOCK)); + ret = -ENOMEM; + goto sub_pin_cnt; + } + + ubuf->pages = kvmalloc_array(ubuf->nr_pages, sizeof(*ubuf->pages), GFP_KERNEL); + if (!ubuf->pages) { + ret = -ENOMEM; + goto sub_pin_cnt; + } + + for (i = 0; i < num_entries; i++) { + npages = va_ent[i].len >> PAGE_SHIFT; + + ret = pin_user_pages_fast(va_ent[i].vaddr, npages, + FOLL_WRITE | FOLL_LONGTERM, + &ubuf->pages[start]); + if (ret < 0 || ret != npages) { + ret = -ENOMEM; + XDNA_ERR(xdna, "Failed to pin pages ret %d", ret); + goto destroy_pages; + } + + start += ret; + } + + exp_info.ops = &amdxdna_ubuf_dmabuf_ops; + exp_info.priv = ubuf; + exp_info.flags = O_RDWR | O_CLOEXEC; + + dbuf = dma_buf_export(&exp_info); + if (IS_ERR(dbuf)) { + ret = PTR_ERR(dbuf); + goto destroy_pages; + } + kvfree(va_ent); + + return dbuf; + +destroy_pages: + if (start) + unpin_user_pages(ubuf->pages, start); + kvfree(ubuf->pages); +sub_pin_cnt: + atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm); +free_ent: + kvfree(va_ent); +free_ubuf: + mmdrop(ubuf->mm); + kfree(ubuf); + return ERR_PTR(ret); +} diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.h b/drivers/accel/amdxdna/amdxdna_ubuf.h new file mode 100644 index 000000000000..e5cb3bdb3ec9 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ubuf.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ +#ifndef _AMDXDNA_UBUF_H_ +#define _AMDXDNA_UBUF_H_ + +#include <drm/drm_device.h> +#include <linux/dma-buf.h> + +enum amdxdna_ubuf_flag { + AMDXDNA_UBUF_FLAG_MAP_DMA = 1, +}; + +struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, + enum amdxdna_ubuf_flag flags, + u32 num_entries, void __user *va_entries); + +#endif /* _AMDXDNA_UBUF_H_ */ diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c new file mode 100644 index 000000000000..ec407f3b48fc --- /dev/null +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* Address definition from NPU1 docs */ +#define MPNPU_PUB_SEC_INTR 0x3010090 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010094 +#define MPNPU_PUB_SCRATCH2 0x30100A0 +#define MPNPU_PUB_SCRATCH3 0x30100A4 +#define MPNPU_PUB_SCRATCH4 0x30100A8 +#define MPNPU_PUB_SCRATCH5 0x30100AC +#define MPNPU_PUB_SCRATCH6 0x30100B0 +#define MPNPU_PUB_SCRATCH7 0x30100B4 +#define MPNPU_PUB_SCRATCH9 0x30100BC + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000 +#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000 +#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000 + +#define MPNPU_APERTURE0_BASE 0x3000000 +#define MPNPU_APERTURE1_BASE 0x3080000 +#define MPNPU_APERTURE2_BASE 0x30C0000 + +/* PCIe BAR Index for NPU1 */ +#define NPU1_REG_BAR_INDEX 0 +#define NPU1_MBOX_BAR_INDEX 4 +#define NPU1_PSP_BAR_INDEX 0 +#define NPU1_SMU_BAR_INDEX 0 +#define NPU1_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE +#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE +#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE + +const struct rt_config npu1_default_rt_cfg[] = { + { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 4, 1, AIE2_RT_CFG_INIT }, /* Debug BO */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; + +const struct dpm_clk_freq npu1_dpm_clk_table[] = { + {400, 800}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {720, 1309}, + {720, 1309}, + {847, 1600}, + { 0 } +}; + +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 }, + { 0 } +}; + +static const struct amdxdna_dev_priv npu1_dev_priv = { + .fw_path = "amdnpu/1502_00/npu.sbin", + .protocol_major = 0x5, + .protocol_minor = 0x7, + .rt_config = npu1_default_rt_cfg, + .dpm_clk_tbl = npu1_dpm_clk_table, + .fw_feature_tbl = npu1_fw_feature_table, + .col_align = COL_ALIGN_NONE, + .mbox_dev_addr = NPU1_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU1_SRAM_BAR_BASE, + .hwctx_limit = 6, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), + }, + .hw_ops = { + .set_dpm = npu1_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu1_info = { + .reg_bar = NPU1_REG_BAR_INDEX, + .mbox_bar = NPU1_MBOX_BAR_INDEX, + .sram_bar = NPU1_SRAM_BAR_INDEX, + .psp_bar = NPU1_PSP_BAR_INDEX, + .smu_bar = NPU1_SMU_BAR_INDEX, + .first_col = 1, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu1", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu1_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c new file mode 100644 index 000000000000..86f87d0d1354 --- /dev/null +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU2 */ +#define NPU2_REG_BAR_INDEX 0 +#define NPU2_MBOX_BAR_INDEX 0 +#define NPU2_PSP_BAR_INDEX 4 +#define NPU2_SMU_BAR_INDEX 5 +#define NPU2_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu2_dev_priv = { + .fw_path = "amdnpu/17f0_00/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 0x6, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU2_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU2_SRAM_BAR_BASE, + .hwctx_limit = 16, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu2_info = { + .reg_bar = NPU2_REG_BAR_INDEX, + .mbox_bar = NPU2_MBOX_BAR_INDEX, + .sram_bar = NPU2_SRAM_BAR_INDEX, + .psp_bar = NPU2_PSP_BAR_INDEX, + .smu_bar = NPU2_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu2", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu2_dev_priv, + .ops = &aie2_ops, /* NPU2 can share NPU1's callback */ +}; diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c new file mode 100644 index 000000000000..986a5f28ba24 --- /dev/null +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU4 */ +#define NPU4_REG_BAR_INDEX 0 +#define NPU4_MBOX_BAR_INDEX 0 +#define NPU4_PSP_BAR_INDEX 4 +#define NPU4_SMU_BAR_INDEX 5 +#define NPU4_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +const struct rt_config npu4_default_rt_cfg[] = { + { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */ + { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT }, + { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT }, + { 0 }, +}; + +const struct dpm_clk_freq npu4_dpm_clk_table[] = { + {396, 792}, + {600, 1056}, + {792, 1152}, + {975, 1267}, + {975, 1267}, + {1056, 1408}, + {1152, 1584}, + {1267, 1800}, + { 0 } +}; + +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, + { .feature = AIE2_PREEMPT, .min_minor = 12 }, + { 0 } +}; + +static const struct amdxdna_dev_priv npu4_dev_priv = { + .fw_path = "amdnpu/17f0_10/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU4_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU4_SRAM_BAR_BASE, + .hwctx_limit = 16, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu4_info = { + .reg_bar = NPU4_REG_BAR_INDEX, + .mbox_bar = NPU4_MBOX_BAR_INDEX, + .sram_bar = NPU4_SRAM_BAR_INDEX, + .psp_bar = NPU4_PSP_BAR_INDEX, + .smu_bar = NPU4_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu4", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu4_dev_priv, + .ops = &aie2_ops, /* NPU4 can share NPU1's callback */ +}; diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c new file mode 100644 index 000000000000..75ad97f0b937 --- /dev/null +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU5 */ +#define NPU5_REG_BAR_INDEX 0 +#define NPU5_MBOX_BAR_INDEX 0 +#define NPU5_PSP_BAR_INDEX 4 +#define NPU5_SMU_BAR_INDEX 5 +#define NPU5_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu5_dev_priv = { + .fw_path = "amdnpu/17f0_11/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU5_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU5_SRAM_BAR_BASE, + .hwctx_limit = 16, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, +}; + +const struct amdxdna_dev_info dev_npu5_info = { + .reg_bar = NPU5_REG_BAR_INDEX, + .mbox_bar = NPU5_MBOX_BAR_INDEX, + .sram_bar = NPU5_SRAM_BAR_INDEX, + .psp_bar = NPU5_PSP_BAR_INDEX, + .smu_bar = NPU5_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu5", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu5_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c new file mode 100644 index 000000000000..758dc013fe13 --- /dev/null +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU6 */ +#define NPU6_REG_BAR_INDEX 0 +#define NPU6_MBOX_BAR_INDEX 0 +#define NPU6_PSP_BAR_INDEX 4 +#define NPU6_SMU_BAR_INDEX 5 +#define NPU6_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +static const struct amdxdna_dev_priv npu6_dev_priv = { + .fw_path = "amdnpu/17f0_10/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU6_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU6_SRAM_BAR_BASE, + .hwctx_limit = 16, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, + +}; + +const struct amdxdna_dev_info dev_npu6_info = { + .reg_bar = NPU6_REG_BAR_INDEX, + .mbox_bar = NPU6_MBOX_BAR_INDEX, + .sram_bar = NPU6_SRAM_BAR_INDEX, + .psp_bar = NPU6_PSP_BAR_INDEX, + .smu_bar = NPU6_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu6", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu6_dev_priv, + .ops = &aie2_ops, +}; diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c index 1b69824286fd..ca3357acd127 100644 --- a/drivers/accel/drm_accel.c +++ b/drivers/accel/drm_accel.c @@ -8,22 +8,19 @@ #include <linux/debugfs.h> #include <linux/device.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <drm/drm_accel.h> +#include <drm/drm_auth.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> #include <drm/drm_print.h> -static DEFINE_SPINLOCK(accel_minor_lock); -static struct idr accel_minors_idr; +DEFINE_XARRAY_ALLOC(accel_minors_xa); -static struct dentry *accel_debugfs_root; -static struct class *accel_class; - -static struct device_type accel_sysfs_device_minor = { +static const struct device_type accel_sysfs_device_minor = { .name = "accel_minor" }; @@ -32,23 +29,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev)); } +static const struct class accel_class = { + .name = "accel", + .devnode = accel_devnode, +}; + static int accel_sysfs_init(void) { - accel_class = class_create(THIS_MODULE, "accel"); - if (IS_ERR(accel_class)) - return PTR_ERR(accel_class); - - accel_class->devnode = accel_devnode; - - return 0; + return class_register(&accel_class); } static void accel_sysfs_destroy(void) { - if (IS_ERR_OR_NULL(accel_class)) - return; - class_destroy(accel_class); - accel_class = NULL; + class_unregister(&accel_class); } static int accel_name_info(struct seq_file *m, void *data) @@ -79,29 +72,19 @@ static const struct drm_info_list accel_debugfs_list[] = { #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list) /** - * accel_debugfs_init() - Initialize debugfs for accel minor - * @minor: Pointer to the drm_minor instance. - * @minor_id: The minor's id + * accel_debugfs_register() - Register debugfs for device + * @dev: Pointer to the device instance. * - * This function initializes the drm minor's debugfs members and creates - * a root directory for the minor in debugfs. It also creates common files - * for accelerators and calls the driver's debugfs init callback. + * Creates common files for accelerators. */ -void accel_debugfs_init(struct drm_minor *minor, int minor_id) +void accel_debugfs_register(struct drm_device *dev) { - struct drm_device *dev = minor->dev; - char name[64]; + struct drm_minor *minor = dev->accel; - INIT_LIST_HEAD(&minor->debugfs_list); - mutex_init(&minor->debugfs_lock); - sprintf(name, "%d", minor_id); - minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root); + minor->debugfs_root = dev->debugfs_root; drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES, - minor->debugfs_root, minor); - - if (dev->driver->debugfs_init) - dev->driver->debugfs_init(minor); + dev->debugfs_root, minor); } /** @@ -116,104 +99,11 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id) void accel_set_device_instance_params(struct device *kdev, int index) { kdev->devt = MKDEV(ACCEL_MAJOR, index); - kdev->class = accel_class; + kdev->class = &accel_class; kdev->type = &accel_sysfs_device_minor; } /** - * accel_minor_alloc() - Allocates a new accel minor - * - * This function access the accel minors idr and allocates from it - * a new id to represent a new accel minor - * - * Return: A new id on success or error code in case idr_alloc failed - */ -int accel_minor_alloc(void) -{ - unsigned long flags; - int r; - - spin_lock_irqsave(&accel_minor_lock, flags); - r = idr_alloc(&accel_minors_idr, NULL, 0, ACCEL_MAX_MINORS, GFP_NOWAIT); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - return r; -} - -/** - * accel_minor_remove() - Remove an accel minor - * @index: The minor id to remove. - * - * This function access the accel minors idr and removes from - * it the member with the id that is passed to this function. - */ -void accel_minor_remove(int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_remove(&accel_minors_idr, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/** - * accel_minor_replace() - Replace minor pointer in accel minors idr. - * @minor: Pointer to the new minor. - * @index: The minor id to replace. - * - * This function access the accel minors idr structure and replaces the pointer - * that is associated with an existing id. Because the minor pointer can be - * NULL, we need to explicitly pass the index. - * - * Return: 0 for success, negative value for error - */ -void accel_minor_replace(struct drm_minor *minor, int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_replace(&accel_minors_idr, minor, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/* - * Looks up the given minor-ID and returns the respective DRM-minor object. The - * refence-count of the underlying device is increased so you must release this - * object with accel_minor_release(). - * - * The object can be only a drm_minor that represents an accel device. - * - * As long as you hold this minor, it is guaranteed that the object and the - * minor->dev pointer will stay valid! However, the device may get unplugged and - * unregistered while you hold the minor. - */ -static struct drm_minor *accel_minor_acquire(unsigned int minor_id) -{ - struct drm_minor *minor; - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - minor = idr_find(&accel_minors_idr, minor_id); - if (minor) - drm_dev_get(minor->dev); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - if (!minor) { - return ERR_PTR(-ENODEV); - } else if (drm_dev_is_unplugged(minor->dev)) { - drm_dev_put(minor->dev); - return ERR_PTR(-ENODEV); - } - - return minor; -} - -static void accel_minor_release(struct drm_minor *minor) -{ - drm_dev_put(minor->dev); -} - -/** * accel_open - open method for ACCEL file * @inode: device inode * @filp: file pointer. @@ -230,7 +120,7 @@ int accel_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int retcode; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -249,7 +139,7 @@ int accel_open(struct inode *inode, struct file *filp) err_undo: atomic_dec(&dev->open_count); - accel_minor_release(minor); + drm_minor_release(minor); return retcode; } EXPORT_SYMBOL_GPL(accel_open); @@ -260,7 +150,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int err; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -277,7 +167,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) err = 0; out: - accel_minor_release(minor); + drm_minor_release(minor); return err; } @@ -291,25 +181,20 @@ static const struct file_operations accel_stub_fops = { void accel_core_exit(void) { unregister_chrdev(ACCEL_MAJOR, "accel"); - debugfs_remove(accel_debugfs_root); accel_sysfs_destroy(); - idr_destroy(&accel_minors_idr); + WARN_ON(!xa_empty(&accel_minors_xa)); } int __init accel_core_init(void) { int ret; - idr_init(&accel_minors_idr); - ret = accel_sysfs_init(); if (ret < 0) { DRM_ERROR("Cannot create ACCEL class: %d\n", ret); goto error; } - accel_debugfs_root = debugfs_create_dir("accel", NULL); - ret = register_chrdev(ACCEL_MAJOR, "accel", &accel_stub_fops); if (ret < 0) DRM_ERROR("Cannot register ACCEL major: %d\n", ret); diff --git a/drivers/accel/ethosu/Kconfig b/drivers/accel/ethosu/Kconfig new file mode 100644 index 000000000000..d25f9b3eb317 --- /dev/null +++ b/drivers/accel/ethosu/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_ARM_ETHOSU + tristate "Arm Ethos-U65/U85 NPU" + depends on HAS_IOMEM + depends on DRM_ACCEL + select DRM_GEM_DMA_HELPER + select DRM_SCHED + select GENERIC_ALLOCATOR + help + Enables driver for Arm Ethos-U65/U85 NPUs diff --git a/drivers/accel/ethosu/Makefile b/drivers/accel/ethosu/Makefile new file mode 100644 index 000000000000..17db5a600416 --- /dev/null +++ b/drivers/accel/ethosu/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) := ethosu.o +ethosu-y += ethosu_drv.o ethosu_gem.o ethosu_job.o diff --git a/drivers/accel/ethosu/ethosu_device.h b/drivers/accel/ethosu/ethosu_device.h new file mode 100644 index 000000000000..b189fa783d6a --- /dev/null +++ b/drivers/accel/ethosu/ethosu_device.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0-only or MIT */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_DEVICE_H__ +#define __ETHOSU_DEVICE_H__ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/types.h> + +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> + +#include <drm/ethosu_accel.h> + +struct clk; +struct gen_pool; + +#define NPU_REG_ID 0x0000 +#define NPU_REG_STATUS 0x0004 +#define NPU_REG_CMD 0x0008 +#define NPU_REG_RESET 0x000c +#define NPU_REG_QBASE 0x0010 +#define NPU_REG_QBASE_HI 0x0014 +#define NPU_REG_QREAD 0x0018 +#define NPU_REG_QCONFIG 0x001c +#define NPU_REG_QSIZE 0x0020 +#define NPU_REG_PROT 0x0024 +#define NPU_REG_CONFIG 0x0028 +#define NPU_REG_REGIONCFG 0x003c +#define NPU_REG_AXILIMIT0 0x0040 // U65 +#define NPU_REG_AXILIMIT1 0x0044 // U65 +#define NPU_REG_AXILIMIT2 0x0048 // U65 +#define NPU_REG_AXILIMIT3 0x004c // U65 +#define NPU_REG_MEM_ATTR0 0x0040 // U85 +#define NPU_REG_MEM_ATTR1 0x0044 // U85 +#define NPU_REG_MEM_ATTR2 0x0048 // U85 +#define NPU_REG_MEM_ATTR3 0x004c // U85 +#define NPU_REG_AXI_SRAM 0x0050 // U85 +#define NPU_REG_AXI_EXT 0x0054 // U85 + +#define NPU_REG_BASEP(x) (0x0080 + (x) * 8) +#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8) +#define NPU_BASEP_REGION_MAX 8 + +#define ID_ARCH_MAJOR_MASK GENMASK(31, 28) +#define ID_ARCH_MINOR_MASK GENMASK(27, 20) +#define ID_ARCH_PATCH_MASK GENMASK(19, 16) +#define ID_VER_MAJOR_MASK GENMASK(11, 8) +#define ID_VER_MINOR_MASK GENMASK(7, 4) + +#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0) +#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4) + +#define STATUS_STATE_RUNNING BIT(0) +#define STATUS_IRQ_RAISED BIT(1) +#define STATUS_BUS_STATUS BIT(2) +#define STATUS_RESET_STATUS BIT(3) +#define STATUS_CMD_PARSE_ERR BIT(4) +#define STATUS_CMD_END_REACHED BIT(5) + +#define CMD_CLEAR_IRQ BIT(1) +#define CMD_TRANSITION_TO_RUN BIT(0) + +#define RESET_PENDING_CSL BIT(1) +#define RESET_PENDING_CPL BIT(0) + +#define PROT_ACTIVE_CSL BIT(1) + +enum ethosu_cmds { + NPU_OP_CONV = 0x2, + NPU_OP_DEPTHWISE = 0x3, + NPU_OP_POOL = 0x5, + NPU_OP_ELEMENTWISE = 0x6, + NPU_OP_RESIZE = 0x7, // U85 only + NPU_OP_DMA_START = 0x10, + NPU_SET_IFM_PAD_TOP = 0x100, + NPU_SET_IFM_PAD_LEFT = 0x101, + NPU_SET_IFM_PAD_RIGHT = 0x102, + NPU_SET_IFM_PAD_BOTTOM = 0x103, + NPU_SET_IFM_DEPTH_M1 = 0x104, + NPU_SET_IFM_PRECISION = 0x105, + NPU_SET_IFM_BROADCAST = 0x108, + NPU_SET_IFM_WIDTH0_M1 = 0x10a, + NPU_SET_IFM_HEIGHT0_M1 = 0x10b, + NPU_SET_IFM_HEIGHT1_M1 = 0x10c, + NPU_SET_IFM_REGION = 0x10f, + NPU_SET_OFM_WIDTH_M1 = 0x111, + NPU_SET_OFM_HEIGHT_M1 = 0x112, + NPU_SET_OFM_DEPTH_M1 = 0x113, + NPU_SET_OFM_PRECISION = 0x114, + NPU_SET_OFM_WIDTH0_M1 = 0x11a, + NPU_SET_OFM_HEIGHT0_M1 = 0x11b, + NPU_SET_OFM_HEIGHT1_M1 = 0x11c, + NPU_SET_OFM_REGION = 0x11f, + NPU_SET_KERNEL_WIDTH_M1 = 0x120, + NPU_SET_KERNEL_HEIGHT_M1 = 0x121, + NPU_SET_KERNEL_STRIDE = 0x122, + NPU_SET_WEIGHT_REGION = 0x128, + NPU_SET_SCALE_REGION = 0x129, + NPU_SET_DMA0_SRC_REGION = 0x130, + NPU_SET_DMA0_DST_REGION = 0x131, + NPU_SET_DMA0_SIZE0 = 0x132, + NPU_SET_DMA0_SIZE1 = 0x133, + NPU_SET_IFM2_BROADCAST = 0x180, + NPU_SET_IFM2_PRECISION = 0x185, + NPU_SET_IFM2_WIDTH0_M1 = 0x18a, + NPU_SET_IFM2_HEIGHT0_M1 = 0x18b, + NPU_SET_IFM2_HEIGHT1_M1 = 0x18c, + NPU_SET_IFM2_REGION = 0x18f, + NPU_SET_IFM_BASE0 = 0x4000, + NPU_SET_IFM_BASE1 = 0x4001, + NPU_SET_IFM_BASE2 = 0x4002, + NPU_SET_IFM_BASE3 = 0x4003, + NPU_SET_IFM_STRIDE_X = 0x4004, + NPU_SET_IFM_STRIDE_Y = 0x4005, + NPU_SET_IFM_STRIDE_C = 0x4006, + NPU_SET_OFM_BASE0 = 0x4010, + NPU_SET_OFM_BASE1 = 0x4011, + NPU_SET_OFM_BASE2 = 0x4012, + NPU_SET_OFM_BASE3 = 0x4013, + NPU_SET_OFM_STRIDE_X = 0x4014, + NPU_SET_OFM_STRIDE_Y = 0x4015, + NPU_SET_OFM_STRIDE_C = 0x4016, + NPU_SET_WEIGHT_BASE = 0x4020, + NPU_SET_WEIGHT_LENGTH = 0x4021, + NPU_SET_SCALE_BASE = 0x4022, + NPU_SET_SCALE_LENGTH = 0x4023, + NPU_SET_DMA0_SRC = 0x4030, + NPU_SET_DMA0_DST = 0x4031, + NPU_SET_DMA0_LEN = 0x4032, + NPU_SET_DMA0_SRC_STRIDE0 = 0x4033, + NPU_SET_DMA0_SRC_STRIDE1 = 0x4034, + NPU_SET_DMA0_DST_STRIDE0 = 0x4035, + NPU_SET_DMA0_DST_STRIDE1 = 0x4036, + NPU_SET_IFM2_BASE0 = 0x4080, + NPU_SET_IFM2_BASE1 = 0x4081, + NPU_SET_IFM2_BASE2 = 0x4082, + NPU_SET_IFM2_BASE3 = 0x4083, + NPU_SET_IFM2_STRIDE_X = 0x4084, + NPU_SET_IFM2_STRIDE_Y = 0x4085, + NPU_SET_IFM2_STRIDE_C = 0x4086, + NPU_SET_WEIGHT1_BASE = 0x4090, + NPU_SET_WEIGHT1_LENGTH = 0x4091, + NPU_SET_SCALE1_BASE = 0x4092, + NPU_SET_WEIGHT2_BASE = 0x4092, + NPU_SET_SCALE1_LENGTH = 0x4093, + NPU_SET_WEIGHT2_LENGTH = 0x4093, + NPU_SET_WEIGHT3_BASE = 0x4094, + NPU_SET_WEIGHT3_LENGTH = 0x4095, +}; + +#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */ + +/** + * struct ethosu_device - Ethosu device + */ +struct ethosu_device { + /** @base: Base drm_device. */ + struct drm_device base; + + /** @iomem: CPU mapping of the registers. */ + void __iomem *regs; + + void __iomem *sram; + struct gen_pool *srampool; + dma_addr_t sramphys; + + struct clk_bulk_data *clks; + int num_clks; + int irq; + + struct drm_ethosu_npu_info npu_info; + + struct ethosu_job *in_flight_job; + /* For in_flight_job and ethosu_job_hw_submit() */ + struct mutex job_lock; + + /* For dma_fence */ + spinlock_t fence_lock; + + struct drm_gpu_scheduler sched; + /* For ethosu_job_do_push() */ + struct mutex sched_lock; + u64 fence_context; + u64 emit_seqno; +}; + +#define to_ethosu_device(drm_dev) \ + ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base)) + +static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev) +{ + return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1; +} + +#endif diff --git a/drivers/accel/ethosu/ethosu_drv.c b/drivers/accel/ethosu/ethosu_drv.c new file mode 100644 index 000000000000..e05a69bf5574 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_drv.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0-only or MIT +// Copyright (C) 2025 Arm, Ltd. + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/genalloc.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_drv.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_utils.h> +#include <drm/drm_gem.h> +#include <drm/drm_accel.h> +#include <drm/ethosu_accel.h> + +#include "ethosu_drv.h" +#include "ethosu_device.h" +#include "ethosu_gem.h" +#include "ethosu_job.h" + +static int ethosu_ioctl_dev_query(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct ethosu_device *ethosudev = to_ethosu_device(ddev); + struct drm_ethosu_dev_query *args = data; + + if (!args->pointer) { + switch (args->type) { + case DRM_ETHOSU_DEV_QUERY_NPU_INFO: + args->size = sizeof(ethosudev->npu_info); + return 0; + default: + return -EINVAL; + } + } + + switch (args->type) { + case DRM_ETHOSU_DEV_QUERY_NPU_INFO: + if (args->size < offsetofend(struct drm_ethosu_npu_info, sram_size)) + return -EINVAL; + return copy_struct_to_user(u64_to_user_ptr(args->pointer), + args->size, + ðosudev->npu_info, + sizeof(ethosudev->npu_info), NULL); + default: + return -EINVAL; + } +} + +#define ETHOSU_BO_FLAGS DRM_ETHOSU_BO_NO_MMAP + +static int ethosu_ioctl_bo_create(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_create *args = data; + int cookie, ret; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + if (!args->size || (args->flags & ~ETHOSU_BO_FLAGS)) { + ret = -EINVAL; + goto out_dev_exit; + } + + ret = ethosu_gem_create_with_handle(file, ddev, &args->size, + args->flags, &args->handle); + +out_dev_exit: + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_ioctl_bo_wait(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_wait *args = data; + int cookie, ret; + unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + if (args->pad) + return -EINVAL; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + ret = drm_gem_dma_resv_wait(file, args->handle, true, timeout); + + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_mmap_offset *args = data; + struct drm_gem_object *obj; + + if (args->pad) + return -EINVAL; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + args->offset = drm_vma_node_offset_addr(&obj->vma_node); + drm_gem_object_put(obj); + return 0; +} + +static int ethosu_ioctl_cmdstream_bo_create(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_cmdstream_bo_create *args = data; + int cookie, ret; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + if (!args->size || !args->data || args->pad || args->flags) { + ret = -EINVAL; + goto out_dev_exit; + } + + args->flags |= DRM_ETHOSU_BO_NO_MMAP; + + ret = ethosu_gem_cmdstream_create(file, ddev, args->size, args->data, + args->flags, &args->handle); + +out_dev_exit: + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_open(struct drm_device *ddev, struct drm_file *file) +{ + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + struct ethosu_file_priv __free(kfree) *priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto err_put_mod; + } + priv->edev = to_ethosu_device(ddev); + + ret = ethosu_job_open(priv); + if (ret) + goto err_put_mod; + + file->driver_priv = no_free_ptr(priv); + return 0; + +err_put_mod: + module_put(THIS_MODULE); + return ret; +} + +static void ethosu_postclose(struct drm_device *ddev, struct drm_file *file) +{ + ethosu_job_close(file->driver_priv); + kfree(file->driver_priv); + module_put(THIS_MODULE); +} + +static const struct drm_ioctl_desc ethosu_drm_driver_ioctls[] = { +#define ETHOSU_IOCTL(n, func, flags) \ + DRM_IOCTL_DEF_DRV(ETHOSU_##n, ethosu_ioctl_##func, flags) + + ETHOSU_IOCTL(DEV_QUERY, dev_query, 0), + ETHOSU_IOCTL(BO_CREATE, bo_create, 0), + ETHOSU_IOCTL(BO_WAIT, bo_wait, 0), + ETHOSU_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, 0), + ETHOSU_IOCTL(CMDSTREAM_BO_CREATE, cmdstream_bo_create, 0), + ETHOSU_IOCTL(SUBMIT, submit, 0), +}; + +DEFINE_DRM_ACCEL_FOPS(ethosu_drm_driver_fops); + +/* + * Ethosu driver version: + * - 1.0 - initial interface + */ +static const struct drm_driver ethosu_drm_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM, + .open = ethosu_open, + .postclose = ethosu_postclose, + .ioctls = ethosu_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(ethosu_drm_driver_ioctls), + .fops = ðosu_drm_driver_fops, + .name = "ethosu", + .desc = "Arm Ethos-U Accel driver", + .major = 1, + .minor = 0, + + .gem_create_object = ethosu_gem_create_object, +}; + +#define U65_DRAM_AXI_LIMIT_CFG 0x1f3f0002 +#define U65_SRAM_AXI_LIMIT_CFG 0x1f3f00b0 +#define U85_AXI_EXT_CFG 0x00021f3f +#define U85_AXI_SRAM_CFG 0x00021f3f +#define U85_MEM_ATTR0_CFG 0x00000000 +#define U85_MEM_ATTR2_CFG 0x000000b7 + +static int ethosu_reset(struct ethosu_device *ethosudev) +{ + int ret; + u32 reg; + + writel_relaxed(RESET_PENDING_CSL, ethosudev->regs + NPU_REG_RESET); + ret = readl_poll_timeout(ethosudev->regs + NPU_REG_STATUS, reg, + !FIELD_GET(STATUS_RESET_STATUS, reg), + USEC_PER_MSEC, USEC_PER_SEC); + if (ret) + return ret; + + if (!FIELD_GET(PROT_ACTIVE_CSL, readl_relaxed(ethosudev->regs + NPU_REG_PROT))) { + dev_warn(ethosudev->base.dev, "Could not reset to non-secure mode (PROT = %x)\n", + readl_relaxed(ethosudev->regs + NPU_REG_PROT)); + } + + /* + * Assign region 2 (SRAM) to AXI M0 (AXILIMIT0), + * everything else to AXI M1 (AXILIMIT2) + */ + writel_relaxed(0x0000aa8a, ethosudev->regs + NPU_REG_REGIONCFG); + if (ethosu_is_u65(ethosudev)) { + writel_relaxed(U65_SRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT0); + writel_relaxed(U65_DRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT2); + } else { + writel_relaxed(U85_AXI_SRAM_CFG, ethosudev->regs + NPU_REG_AXI_SRAM); + writel_relaxed(U85_AXI_EXT_CFG, ethosudev->regs + NPU_REG_AXI_EXT); + writel_relaxed(U85_MEM_ATTR0_CFG, ethosudev->regs + NPU_REG_MEM_ATTR0); // SRAM + writel_relaxed(U85_MEM_ATTR2_CFG, ethosudev->regs + NPU_REG_MEM_ATTR2); // DRAM + } + + if (ethosudev->sram) + memset_io(ethosudev->sram, 0, ethosudev->npu_info.sram_size); + + return 0; +} + +static int ethosu_device_resume(struct device *dev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_prepare_enable(ethosudev->num_clks, ethosudev->clks); + if (ret) + return ret; + + ret = ethosu_reset(ethosudev); + if (!ret) + return 0; + + clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks); + return ret; +} + +static int ethosu_device_suspend(struct device *dev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(dev); + + clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks); + return 0; +} + +static int ethosu_sram_init(struct ethosu_device *ethosudev) +{ + ethosudev->npu_info.sram_size = 0; + + ethosudev->srampool = of_gen_pool_get(ethosudev->base.dev->of_node, "sram", 0); + if (!ethosudev->srampool) + return 0; + + ethosudev->npu_info.sram_size = gen_pool_size(ethosudev->srampool); + + ethosudev->sram = (void __iomem *)gen_pool_dma_alloc(ethosudev->srampool, + ethosudev->npu_info.sram_size, + ðosudev->sramphys); + if (!ethosudev->sram) { + dev_err(ethosudev->base.dev, "failed to allocate from SRAM pool\n"); + return -ENOMEM; + } + + return 0; +} + +static int ethosu_init(struct ethosu_device *ethosudev) +{ + int ret; + u32 id, config; + + ret = ethosu_device_resume(ethosudev->base.dev); + if (ret) + return ret; + + pm_runtime_set_autosuspend_delay(ethosudev->base.dev, 50); + pm_runtime_use_autosuspend(ethosudev->base.dev); + ret = devm_pm_runtime_set_active_enabled(ethosudev->base.dev); + if (ret) + return ret; + pm_runtime_get_noresume(ethosudev->base.dev); + + ethosudev->npu_info.id = id = readl_relaxed(ethosudev->regs + NPU_REG_ID); + ethosudev->npu_info.config = config = readl_relaxed(ethosudev->regs + NPU_REG_CONFIG); + + ethosu_sram_init(ethosudev); + + dev_info(ethosudev->base.dev, + "Ethos-U NPU, arch v%ld.%ld.%ld, rev r%ldp%ld, cmd stream ver%ld, %d MACs, %dKB SRAM\n", + FIELD_GET(ID_ARCH_MAJOR_MASK, id), + FIELD_GET(ID_ARCH_MINOR_MASK, id), + FIELD_GET(ID_ARCH_PATCH_MASK, id), + FIELD_GET(ID_VER_MAJOR_MASK, id), + FIELD_GET(ID_VER_MINOR_MASK, id), + FIELD_GET(CONFIG_CMD_STREAM_VER_MASK, config), + 1 << FIELD_GET(CONFIG_MACS_PER_CC_MASK, config), + ethosudev->npu_info.sram_size / 1024); + + return 0; +} + +static int ethosu_probe(struct platform_device *pdev) +{ + int ret; + struct ethosu_device *ethosudev; + + ethosudev = devm_drm_dev_alloc(&pdev->dev, ðosu_drm_driver, + struct ethosu_device, base); + if (IS_ERR(ethosudev)) + return -ENOMEM; + platform_set_drvdata(pdev, ethosudev); + + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + + ethosudev->regs = devm_platform_ioremap_resource(pdev, 0); + + ethosudev->num_clks = devm_clk_bulk_get_all(&pdev->dev, ðosudev->clks); + if (ethosudev->num_clks < 0) + return ethosudev->num_clks; + + ret = ethosu_job_init(ethosudev); + if (ret) + return ret; + + ret = ethosu_init(ethosudev); + if (ret) + return ret; + + ret = drm_dev_register(ðosudev->base, 0); + if (ret) + pm_runtime_dont_use_autosuspend(ethosudev->base.dev); + + pm_runtime_put_autosuspend(ethosudev->base.dev); + return ret; +} + +static void ethosu_remove(struct platform_device *pdev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(&pdev->dev); + + drm_dev_unregister(ðosudev->base); + ethosu_job_fini(ethosudev); + if (ethosudev->sram) + gen_pool_free(ethosudev->srampool, (unsigned long)ethosudev->sram, + ethosudev->npu_info.sram_size); +} + +static const struct of_device_id dt_match[] = { + { .compatible = "arm,ethos-u65" }, + { .compatible = "arm,ethos-u85" }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static DEFINE_RUNTIME_DEV_PM_OPS(ethosu_pm_ops, + ethosu_device_suspend, + ethosu_device_resume, + NULL); + +static struct platform_driver ethosu_driver = { + .probe = ethosu_probe, + .remove = ethosu_remove, + .driver = { + .name = "ethosu", + .pm = pm_ptr(ðosu_pm_ops), + .of_match_table = dt_match, + }, +}; +module_platform_driver(ethosu_driver); + +MODULE_AUTHOR("Rob Herring <robh@kernel.org>"); +MODULE_DESCRIPTION("Arm Ethos-U Accel Driver"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/accel/ethosu/ethosu_drv.h b/drivers/accel/ethosu/ethosu_drv.h new file mode 100644 index 000000000000..9e21dfe94184 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_drv.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright 2025 Arm, Ltd. */ +#ifndef __ETHOSU_DRV_H__ +#define __ETHOSU_DRV_H__ + +#include <drm/gpu_scheduler.h> + +struct ethosu_device; + +struct ethosu_file_priv { + struct ethosu_device *edev; + struct drm_sched_entity sched_entity; +}; + +#endif diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c new file mode 100644 index 000000000000..473b5f5d7514 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -0,0 +1,704 @@ +// SPDX-License-Identifier: GPL-2.0-only or MIT +/* Copyright 2025 Arm, Ltd. */ + +#include <linux/err.h> +#include <linux/slab.h> + +#include <drm/ethosu_accel.h> + +#include "ethosu_device.h" +#include "ethosu_gem.h" + +static void ethosu_gem_free_object(struct drm_gem_object *obj) +{ + struct ethosu_gem_object *bo = to_ethosu_bo(obj); + + kfree(bo->info); + drm_gem_free_mmap_offset(&bo->base.base); + drm_gem_dma_free(&bo->base); +} + +static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct ethosu_gem_object *bo = to_ethosu_bo(obj); + + /* Don't allow mmap on objects that have the NO_MMAP flag set. */ + if (bo->flags & DRM_ETHOSU_BO_NO_MMAP) + return -EINVAL; + + return drm_gem_dma_object_mmap(obj, vma); +} + +static const struct drm_gem_object_funcs ethosu_gem_funcs = { + .free = ethosu_gem_free_object, + .print_info = drm_gem_dma_object_print_info, + .get_sg_table = drm_gem_dma_object_get_sg_table, + .vmap = drm_gem_dma_object_vmap, + .mmap = ethosu_gem_mmap, + .vm_ops = &drm_gem_dma_vm_ops, +}; + +/** + * ethosu_gem_create_object - Implementation of driver->gem_create_object. + * @ddev: DRM device + * @size: Size in bytes of the memory the object will reference + * + * This lets the GEM helpers allocate object structs for us, and keep + * our BO stats correct. + */ +struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size) +{ + struct ethosu_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + obj->base.base.funcs = ðosu_gem_funcs; + return &obj->base.base; +} + +/** + * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle. + * @file: DRM file. + * @ddev: DRM device. + * @size: Size of the GEM object to allocate. + * @flags: Combination of drm_ethosu_bo_flags flags. + * @handle: Pointer holding the handle pointing to the new GEM object. + * + * Return: Zero on success + */ +int ethosu_gem_create_with_handle(struct drm_file *file, + struct drm_device *ddev, + u64 *size, u32 flags, u32 *handle) +{ + struct drm_gem_dma_object *mem; + struct ethosu_gem_object *bo; + int ret; + + mem = drm_gem_dma_create(ddev, *size); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + bo = to_ethosu_bo(&mem->base); + bo->flags = flags; + + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file, &mem->base, handle); + if (!ret) + *size = bo->base.base.size; + + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&mem->base); + + return ret; +} + +struct dma { + s8 region; + u64 len; + u64 offset; + s64 stride[2]; +}; + +struct dma_state { + u16 size0; + u16 size1; + s8 mode; + struct dma src; + struct dma dst; +}; + +struct buffer { + u64 base; + u32 length; + s8 region; +}; + +struct feat_matrix { + u64 base[4]; + s64 stride_x; + s64 stride_y; + s64 stride_c; + s8 region; + u8 broadcast; + u16 stride_kernel; + u16 precision; + u16 depth; + u16 width; + u16 width0; + u16 height[3]; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; +}; + +struct cmd_state { + struct dma_state dma; + struct buffer scale[2]; + struct buffer weight[4]; + struct feat_matrix ofm; + struct feat_matrix ifm; + struct feat_matrix ifm2; +}; + +static void cmd_state_init(struct cmd_state *st) +{ + /* Initialize to all 1s to detect missing setup */ + memset(st, 0xff, sizeof(*st)); +} + +static u64 cmd_to_addr(u32 *cmd) +{ + return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1]; +} + +static u64 dma_length(struct ethosu_validated_cmdstream_info *info, + struct dma_state *dma_st, struct dma *dma) +{ + s8 mode = dma_st->mode; + u64 len = dma->len; + + if (mode >= 1) { + len += dma->stride[0]; + len *= dma_st->size0; + } + if (mode == 2) { + len += dma->stride[1]; + len *= dma_st->size1; + } + if (dma->region >= 0) + info->region_size[dma->region] = max(info->region_size[dma->region], + len + dma->offset); + + return len; +} + +static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info, + struct feat_matrix *fm, + u32 x, u32 y, u32 c) +{ + u32 element_size, storage = fm->precision >> 14; + int tile = 0; + u64 addr; + + if (fm->region < 0) + return U64_MAX; + + switch (storage) { + case 0: + if (x >= fm->width0 + 1) { + x -= fm->width0 + 1; + tile += 1; + } + if (y >= fm->height[tile] + 1) { + y -= fm->height[tile] + 1; + tile += 2; + } + break; + case 1: + if (y >= fm->height[1] + 1) { + y -= fm->height[1] + 1; + tile = 2; + } else if (y >= fm->height[0] + 1) { + y -= fm->height[0] + 1; + tile = 1; + } + break; + } + if (fm->base[tile] == U64_MAX) + return U64_MAX; + + addr = fm->base[tile] + y * fm->stride_y; + + switch ((fm->precision >> 6) & 0x3) { // format + case 0: //nhwc: + addr += x * fm->stride_x + c; + break; + case 1: //nhcwb16: + element_size = BIT((fm->precision >> 1) & 0x3); + + addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size; + break; + } + + info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1); + + return addr; +} + +static int calc_sizes(struct drm_device *ddev, + struct ethosu_validated_cmdstream_info *info, + u16 op, struct cmd_state *st, + bool ifm, bool ifm2, bool weight, bool scale) +{ + u64 len; + + if (ifm) { + if (st->ifm.stride_kernel == U16_MAX) + return -EINVAL; + u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) + + ((st->ifm.stride_kernel >> 1) & 0x1) + 1; + u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) + + (st->ifm.stride_kernel & 0x1) + 1; + u32 ifm_height = st->ofm.height[2] * stride_y + + st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom); + u32 ifm_width = st->ofm.width * stride_x + + st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right); + + len = feat_matrix_length(info, &st->ifm, ifm_width, + ifm_height, st->ifm.depth); + dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", + op, st->ifm.region, st->ifm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (ifm2) { + len = feat_matrix_length(info, &st->ifm2, st->ifm.depth, + 0, st->ofm.depth); + dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n", + op, st->ifm2.region, st->ifm2.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (weight) { + dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n", + op, st->weight[0].region, st->weight[0].base, + st->weight[0].base + st->weight[0].length - 1); + if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX || + st->weight[0].length == U32_MAX) + return -EINVAL; + info->region_size[st->weight[0].region] = + max(info->region_size[st->weight[0].region], + st->weight[0].base + st->weight[0].length); + } + + if (scale) { + dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n", + op, st->scale[0].region, st->scale[0].base, + st->scale[0].base + st->scale[0].length - 1); + if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX || + st->scale[0].length == U32_MAX) + return -EINVAL; + info->region_size[st->scale[0].region] = + max(info->region_size[st->scale[0].region], + st->scale[0].base + st->scale[0].length); + } + + len = feat_matrix_length(info, &st->ofm, st->ofm.width, + st->ofm.height[2], st->ofm.depth); + dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n", + op, st->ofm.region, st->ofm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + info->output_region[st->ofm.region] = true; + + return 0; +} + +static int calc_sizes_elemwise(struct drm_device *ddev, + struct ethosu_validated_cmdstream_info *info, + u16 op, struct cmd_state *st, + bool ifm, bool ifm2) +{ + u32 height, width, depth; + u64 len; + + if (ifm) { + height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2]; + width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width; + depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth; + + len = feat_matrix_length(info, &st->ifm, width, + height, depth); + dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", + op, st->ifm.region, st->ifm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (ifm2) { + height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2]; + width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width; + depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth; + + len = feat_matrix_length(info, &st->ifm2, width, + height, depth); + dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n", + op, st->ifm2.region, st->ifm2.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + len = feat_matrix_length(info, &st->ofm, st->ofm.width, + st->ofm.height[2], st->ofm.depth); + dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n", + op, st->ofm.region, st->ofm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + info->output_region[st->ofm.region] = true; + + return 0; +} + +static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev, + u32 __user *ucmds, + struct ethosu_gem_object *bo, + u32 size) +{ + struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL); + struct ethosu_device *edev = to_ethosu_device(ddev); + u32 *bocmds = bo->base.vaddr; + struct cmd_state st; + int i, ret; + + if (!info) + return -ENOMEM; + info->cmd_size = size; + + cmd_state_init(&st); + + for (i = 0; i < size / 4; i++) { + bool use_ifm, use_ifm2, use_scale; + u64 dstlen, srclen; + u16 cmd, param; + u32 cmds[2]; + u64 addr; + + if (get_user(cmds[0], ucmds++)) + return -EFAULT; + + bocmds[i] = cmds[0]; + + cmd = cmds[0]; + param = cmds[0] >> 16; + + if (cmd & 0x4000) { + if (get_user(cmds[1], ucmds++)) + return -EFAULT; + + i++; + bocmds[i] = cmds[1]; + addr = cmd_to_addr(cmds); + } + + switch (cmd) { + case NPU_OP_DMA_START: + srclen = dma_length(info, &st.dma, &st.dma.src); + dstlen = dma_length(info, &st.dma, &st.dma.dst); + + if (st.dma.dst.region >= 0) + info->output_region[st.dma.dst.region] = true; + dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n", + st.dma.src.region, st.dma.src.offset, srclen, + st.dma.dst.region, st.dma.dst.offset, dstlen); + break; + case NPU_OP_CONV: + case NPU_OP_DEPTHWISE: + use_ifm2 = param & 0x1; // weights_ifm2 + use_scale = !(st.ofm.precision & 0x100); + ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2, + !use_ifm2, use_scale); + if (ret) + return ret; + break; + case NPU_OP_POOL: + use_ifm = param != 0x4; // pooling mode + use_scale = !(st.ofm.precision & 0x100); + ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false, + false, use_scale); + if (ret) + return ret; + break; + case NPU_OP_ELEMENTWISE: + use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) || + (param == 6) || (param == 7) || (param == 0x24)); + use_ifm = st.ifm.broadcast != 8; + ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2); + if (ret) + return ret; + break; + case NPU_OP_RESIZE: // U85 only + WARN_ON(1); // TODO + break; + case NPU_SET_KERNEL_WIDTH_M1: + st.ifm.width = param; + break; + case NPU_SET_KERNEL_HEIGHT_M1: + st.ifm.height[2] = param; + break; + case NPU_SET_KERNEL_STRIDE: + st.ifm.stride_kernel = param; + break; + case NPU_SET_IFM_PAD_TOP: + st.ifm.pad_top = param & 0x7f; + break; + case NPU_SET_IFM_PAD_LEFT: + st.ifm.pad_left = param & 0x7f; + break; + case NPU_SET_IFM_PAD_RIGHT: + st.ifm.pad_right = param & 0xff; + break; + case NPU_SET_IFM_PAD_BOTTOM: + st.ifm.pad_bottom = param & 0xff; + break; + case NPU_SET_IFM_DEPTH_M1: + st.ifm.depth = param; + break; + case NPU_SET_IFM_PRECISION: + st.ifm.precision = param; + break; + case NPU_SET_IFM_BROADCAST: + st.ifm.broadcast = param; + break; + case NPU_SET_IFM_REGION: + st.ifm.region = param & 0x7f; + break; + case NPU_SET_IFM_WIDTH0_M1: + st.ifm.width0 = param; + break; + case NPU_SET_IFM_HEIGHT0_M1: + st.ifm.height[0] = param; + break; + case NPU_SET_IFM_HEIGHT1_M1: + st.ifm.height[1] = param; + break; + case NPU_SET_IFM_BASE0: + case NPU_SET_IFM_BASE1: + case NPU_SET_IFM_BASE2: + case NPU_SET_IFM_BASE3: + st.ifm.base[cmd & 0x3] = addr; + break; + case NPU_SET_IFM_STRIDE_X: + st.ifm.stride_x = addr; + break; + case NPU_SET_IFM_STRIDE_Y: + st.ifm.stride_y = addr; + break; + case NPU_SET_IFM_STRIDE_C: + st.ifm.stride_c = addr; + break; + + case NPU_SET_OFM_WIDTH_M1: + st.ofm.width = param; + break; + case NPU_SET_OFM_HEIGHT_M1: + st.ofm.height[2] = param; + break; + case NPU_SET_OFM_DEPTH_M1: + st.ofm.depth = param; + break; + case NPU_SET_OFM_PRECISION: + st.ofm.precision = param; + break; + case NPU_SET_OFM_REGION: + st.ofm.region = param & 0x7; + break; + case NPU_SET_OFM_WIDTH0_M1: + st.ofm.width0 = param; + break; + case NPU_SET_OFM_HEIGHT0_M1: + st.ofm.height[0] = param; + break; + case NPU_SET_OFM_HEIGHT1_M1: + st.ofm.height[1] = param; + break; + case NPU_SET_OFM_BASE0: + case NPU_SET_OFM_BASE1: + case NPU_SET_OFM_BASE2: + case NPU_SET_OFM_BASE3: + st.ofm.base[cmd & 0x3] = addr; + break; + case NPU_SET_OFM_STRIDE_X: + st.ofm.stride_x = addr; + break; + case NPU_SET_OFM_STRIDE_Y: + st.ofm.stride_y = addr; + break; + case NPU_SET_OFM_STRIDE_C: + st.ofm.stride_c = addr; + break; + + case NPU_SET_IFM2_BROADCAST: + st.ifm2.broadcast = param; + break; + case NPU_SET_IFM2_PRECISION: + st.ifm2.precision = param; + break; + case NPU_SET_IFM2_REGION: + st.ifm2.region = param & 0x7; + break; + case NPU_SET_IFM2_WIDTH0_M1: + st.ifm2.width0 = param; + break; + case NPU_SET_IFM2_HEIGHT0_M1: + st.ifm2.height[0] = param; + break; + case NPU_SET_IFM2_HEIGHT1_M1: + st.ifm2.height[1] = param; + break; + case NPU_SET_IFM2_BASE0: + case NPU_SET_IFM2_BASE1: + case NPU_SET_IFM2_BASE2: + case NPU_SET_IFM2_BASE3: + st.ifm2.base[cmd & 0x3] = addr; + break; + case NPU_SET_IFM2_STRIDE_X: + st.ifm2.stride_x = addr; + break; + case NPU_SET_IFM2_STRIDE_Y: + st.ifm2.stride_y = addr; + break; + case NPU_SET_IFM2_STRIDE_C: + st.ifm2.stride_c = addr; + break; + + case NPU_SET_WEIGHT_REGION: + st.weight[0].region = param & 0x7; + break; + case NPU_SET_SCALE_REGION: + st.scale[0].region = param & 0x7; + break; + case NPU_SET_WEIGHT_BASE: + st.weight[0].base = addr; + break; + case NPU_SET_WEIGHT_LENGTH: + st.weight[0].length = cmds[1]; + break; + case NPU_SET_SCALE_BASE: + st.scale[0].base = addr; + break; + case NPU_SET_SCALE_LENGTH: + st.scale[0].length = cmds[1]; + break; + case NPU_SET_WEIGHT1_BASE: + st.weight[1].base = addr; + break; + case NPU_SET_WEIGHT1_LENGTH: + st.weight[1].length = cmds[1]; + break; + case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85) + if (ethosu_is_u65(edev)) + st.scale[1].base = addr; + else + st.weight[2].base = addr; + break; + case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85) + if (ethosu_is_u65(edev)) + st.scale[1].length = cmds[1]; + else + st.weight[1].length = cmds[1]; + break; + case NPU_SET_WEIGHT3_BASE: + st.weight[3].base = addr; + break; + case NPU_SET_WEIGHT3_LENGTH: + st.weight[3].length = cmds[1]; + break; + + case NPU_SET_DMA0_SRC_REGION: + if (param & 0x100) + st.dma.src.region = -1; + else + st.dma.src.region = param & 0x7; + st.dma.mode = (param >> 9) & 0x3; + break; + case NPU_SET_DMA0_DST_REGION: + if (param & 0x100) + st.dma.dst.region = -1; + else + st.dma.dst.region = param & 0x7; + break; + case NPU_SET_DMA0_SIZE0: + st.dma.size0 = param; + break; + case NPU_SET_DMA0_SIZE1: + st.dma.size1 = param; + break; + case NPU_SET_DMA0_SRC_STRIDE0: + st.dma.src.stride[0] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_SRC_STRIDE1: + st.dma.src.stride[1] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_DST_STRIDE0: + st.dma.dst.stride[0] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_DST_STRIDE1: + st.dma.dst.stride[1] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_SRC: + st.dma.src.offset = addr; + break; + case NPU_SET_DMA0_DST: + st.dma.dst.offset = addr; + break; + case NPU_SET_DMA0_LEN: + st.dma.src.len = st.dma.dst.len = addr; + break; + default: + break; + } + } + + for (i = 0; i < NPU_BASEP_REGION_MAX; i++) { + if (!info->region_size[i]) + continue; + dev_dbg(ddev->dev, "region %d max size: 0x%llx\n", + i, info->region_size[i]); + } + + bo->info = no_free_ptr(info); + return 0; +} + +/** + * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle. + * @file: DRM file. + * @ddev: DRM device. + * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared. + * @size: Size of the GEM object to allocate. + * @flags: Combination of drm_ethosu_bo_flags flags. + * @handle: Pointer holding the handle pointing to the new GEM object. + * + * Return: Zero on success + */ +int ethosu_gem_cmdstream_create(struct drm_file *file, + struct drm_device *ddev, + u32 size, u64 data, u32 flags, u32 *handle) +{ + int ret; + struct drm_gem_dma_object *mem; + struct ethosu_gem_object *bo; + + mem = drm_gem_dma_create(ddev, size); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + bo = to_ethosu_bo(&mem->base); + bo->flags = flags; + + ret = ethosu_gem_cmdstream_copy_and_validate(ddev, + (void __user *)(uintptr_t)data, + bo, size); + if (ret) + goto fail; + + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file, &mem->base, handle); + +fail: + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&mem->base); + + return ret; +} diff --git a/drivers/accel/ethosu/ethosu_gem.h b/drivers/accel/ethosu/ethosu_gem.h new file mode 100644 index 000000000000..3922895a60fb --- /dev/null +++ b/drivers/accel/ethosu/ethosu_gem.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_GEM_H__ +#define __ETHOSU_GEM_H__ + +#include "ethosu_device.h" +#include <drm/drm_gem_dma_helper.h> + +struct ethosu_validated_cmdstream_info { + u32 cmd_size; + u64 region_size[NPU_BASEP_REGION_MAX]; + bool output_region[NPU_BASEP_REGION_MAX]; +}; + +/** + * struct ethosu_gem_object - Driver specific GEM object. + */ +struct ethosu_gem_object { + /** @base: Inherit from drm_gem_shmem_object. */ + struct drm_gem_dma_object base; + + struct ethosu_validated_cmdstream_info *info; + + /** @flags: Combination of drm_ethosu_bo_flags flags. */ + u32 flags; +}; + +static inline +struct ethosu_gem_object *to_ethosu_bo(struct drm_gem_object *obj) +{ + return container_of(to_drm_gem_dma_obj(obj), struct ethosu_gem_object, base); +} + +struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, + size_t size); + +int ethosu_gem_create_with_handle(struct drm_file *file, + struct drm_device *ddev, + u64 *size, u32 flags, uint32_t *handle); + +int ethosu_gem_cmdstream_create(struct drm_file *file, + struct drm_device *ddev, + u32 size, u64 data, u32 flags, u32 *handle); + +#endif /* __ETHOSU_GEM_H__ */ diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c new file mode 100644 index 000000000000..26e7a2f64d71 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_job.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ +/* Copyright 2025 Arm, Ltd. */ + +#include <linux/bitfield.h> +#include <linux/genalloc.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_dma_helper.h> +#include <drm/drm_print.h> +#include <drm/ethosu_accel.h> + +#include "ethosu_device.h" +#include "ethosu_drv.h" +#include "ethosu_gem.h" +#include "ethosu_job.h" + +#define JOB_TIMEOUT_MS 500 + +static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct ethosu_job, base); +} + +static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) +{ + return "ethosu"; +} + +static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) +{ + return "ethosu-npu"; +} + +static const struct dma_fence_ops ethosu_fence_ops = { + .get_driver_name = ethosu_fence_get_driver_name, + .get_timeline_name = ethosu_fence_get_timeline_name, +}; + +static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) +{ + struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); + struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; + + for (int i = 0; i < job->region_cnt; i++) { + struct drm_gem_dma_object *bo; + int region = job->region_bo_num[i]; + + bo = to_drm_gem_dma_obj(job->region_bo[i]); + writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); + writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); + } + + if (job->sram_size) { + writel_relaxed(lower_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); + writel_relaxed(upper_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", + ETHOSU_SRAM_REGION, &dev->sramphys); + } + + writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); + writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); + writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); + + writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); + + dev_dbg(dev->base.dev, + "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); +} + +static int ethosu_acquire_object_fences(struct ethosu_job *job) +{ + int i, ret; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) { + bool is_write; + + if (!bos[i]) + break; + + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + + is_write = info->output_region[job->region_bo_num[i]]; + ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], + is_write); + if (ret) + return ret; + } + + return 0; +} + +static void ethosu_attach_object_fences(struct ethosu_job *job) +{ + int i; + struct dma_fence *fence = job->inference_done_fence; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) + if (info->output_region[job->region_bo_num[i]]) + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); +} + +static int ethosu_job_push(struct ethosu_job *job) +{ + struct ww_acquire_ctx acquire_ctx; + int ret; + + ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + if (ret) + return ret; + + ret = ethosu_acquire_object_fences(job); + if (ret) + goto out; + + ret = pm_runtime_resume_and_get(job->dev->base.dev); + if (!ret) { + guard(mutex)(&job->dev->sched_lock); + + drm_sched_job_arm(&job->base); + job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); + kref_get(&job->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&job->base); + ethosu_attach_object_fences(job); + } + +out: + drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + return ret; +} + +static void ethosu_job_cleanup(struct kref *ref) +{ + struct ethosu_job *job = container_of(ref, struct ethosu_job, + refcount); + unsigned int i; + + pm_runtime_put_autosuspend(job->dev->base.dev); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + for (i = 0; i < job->region_cnt; i++) + drm_gem_object_put(job->region_bo[i]); + + drm_gem_object_put(job->cmd_bo); + + kfree(job); +} + +static void ethosu_job_put(struct ethosu_job *job) +{ + kref_put(&job->refcount, ethosu_job_cleanup); +} + +static void ethosu_job_free(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + + drm_sched_job_cleanup(sched_job); + ethosu_job_put(job); +} + +static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + struct ethosu_device *dev = job->dev; + struct dma_fence *fence = job->done_fence; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock, + dev->fence_context, ++dev->emit_seqno); + dma_fence_get(fence); + + scoped_guard(mutex, &dev->job_lock) { + dev->in_flight_job = job; + ethosu_job_hw_submit(dev, job); + } + + return fence; +} + +static void ethosu_job_handle_irq(struct ethosu_device *dev) +{ + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { + dev_err(dev->base.dev, "Error IRQ - %x\n", status); + drm_sched_fault(&dev->sched); + return; + } + + scoped_guard(mutex, &dev->job_lock) { + if (dev->in_flight_job) { + dma_fence_signal(dev->in_flight_job->done_fence); + dev->in_flight_job = NULL; + } + } +} + +static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) +{ + struct ethosu_device *dev = data; + + ethosu_job_handle_irq(dev); + + return IRQ_HANDLED; +} + +static irqreturn_t ethosu_job_irq_handler(int irq, void *data) +{ + struct ethosu_device *dev = data; + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (!(status & STATUS_IRQ_RAISED)) + return IRQ_NONE; + + writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); + return IRQ_WAKE_THREAD; +} + +static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) +{ + struct ethosu_job *job = to_ethosu_job(bad); + struct ethosu_device *dev = job->dev; + bool running; + u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; + u32 cmdaddr; + + cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); + running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); + + if (running) { + int ret; + u32 reg; + + ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, + reg, + reg != cmdaddr, + USEC_PER_MSEC, 100 * USEC_PER_MSEC); + + /* If still running and progress is being made, just return */ + if (!ret) + return DRM_GPU_SCHED_STAT_NO_HANG; + } + + dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", + running ? "running" : "stopped", + cmdaddr, bocmds[cmdaddr / 4]); + + drm_sched_stop(&dev->sched, bad); + + scoped_guard(mutex, &dev->job_lock) + dev->in_flight_job = NULL; + + /* Proceed with reset now. */ + pm_runtime_force_suspend(dev->base.dev); + pm_runtime_force_resume(dev->base.dev); + + /* Restart the scheduler */ + drm_sched_start(&dev->sched, 0); + + return DRM_GPU_SCHED_STAT_RESET; +} + +static const struct drm_sched_backend_ops ethosu_sched_ops = { + .run_job = ethosu_job_run, + .timedout_job = ethosu_job_timedout, + .free_job = ethosu_job_free +}; + +int ethosu_job_init(struct ethosu_device *edev) +{ + struct device *dev = edev->base.dev; + struct drm_sched_init_args args = { + .ops = ðosu_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = 1, + .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .name = dev_name(dev), + .dev = dev, + }; + int ret; + + spin_lock_init(&edev->fence_lock); + ret = devm_mutex_init(dev, &edev->job_lock); + if (ret) + return ret; + ret = devm_mutex_init(dev, &edev->sched_lock); + if (ret) + return ret; + + edev->irq = platform_get_irq(to_platform_device(dev), 0); + if (edev->irq < 0) + return edev->irq; + + ret = devm_request_threaded_irq(dev, edev->irq, + ethosu_job_irq_handler, + ethosu_job_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME, + edev); + if (ret) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + edev->fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&edev->sched, &args); + if (ret) { + dev_err(dev, "Failed to create scheduler: %d\n", ret); + goto err_sched; + } + + return 0; + +err_sched: + drm_sched_fini(&edev->sched); + return ret; +} + +void ethosu_job_fini(struct ethosu_device *dev) +{ + drm_sched_fini(&dev->sched); +} + +int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) +{ + struct ethosu_device *dev = ethosu_priv->edev; + struct drm_gpu_scheduler *sched = &dev->sched; + int ret; + + ret = drm_sched_entity_init(ðosu_priv->sched_entity, + DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + return WARN_ON(ret); +} + +void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) +{ + struct drm_sched_entity *entity = ðosu_priv->sched_entity; + + drm_sched_entity_destroy(entity); +} + +static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, + struct drm_ethosu_job *job) +{ + struct ethosu_device *edev = to_ethosu_device(dev); + struct ethosu_file_priv *file_priv = file->driver_priv; + struct ethosu_job *ejob = NULL; + struct ethosu_validated_cmdstream_info *cmd_info; + int ret = 0; + + /* BO region 2 is reserved if SRAM is used */ + if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) + return -EINVAL; + + if (edev->npu_info.sram_size < job->sram_size) + return -EINVAL; + + ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); + if (!ejob) + return -ENOMEM; + + kref_init(&ejob->refcount); + + ejob->dev = edev; + ejob->sram_size = job->sram_size; + + ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); + if (!ejob->done_fence) { + ret = -ENOMEM; + goto out_cleanup_job; + } + + ret = drm_sched_job_init(&ejob->base, + &file_priv->sched_entity, + 1, NULL, file->client_id); + if (ret) + goto out_put_job; + + ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); + if (!ejob->cmd_bo) { + ret = -ENOENT; + goto out_cleanup_job; + } + cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; + if (!cmd_info) { + ret = -EINVAL; + goto out_cleanup_job; + } + + for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { + struct drm_gem_object *gem; + + /* Can only omit a BO handle if the region is not used or used for SRAM */ + if (!job->region_bo_handles[i] && + (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) + continue; + + if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { + dev_err(dev->dev, + "Cmdstream BO handle %d set for unused region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); + if (!gem) { + dev_err(dev->dev, + "Invalid BO handle %d for region %d\n", + job->region_bo_handles[i], i); + ret = -ENOENT; + goto out_cleanup_job; + } + + ejob->region_bo[ejob->region_cnt] = gem; + ejob->region_bo_num[ejob->region_cnt] = i; + ejob->region_cnt++; + + if (to_ethosu_bo(gem)->info) { + dev_err(dev->dev, + "Cmdstream BO handle %d used for region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + /* Verify the command stream doesn't have accesses outside the BO */ + if (cmd_info->region_size[i] > gem->size) { + dev_err(dev->dev, + "cmd stream region %d size greater than BO size (%llu > %zu)\n", + i, cmd_info->region_size[i], gem->size); + ret = -EOVERFLOW; + goto out_cleanup_job; + } + } + ret = ethosu_job_push(ejob); + +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&ejob->base); +out_put_job: + ethosu_job_put(ejob); + + return ret; +} + +int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ethosu_submit *args = data; + int ret = 0; + unsigned int i = 0; + + if (args->pad) { + drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); + return -EINVAL; + } + + struct drm_ethosu_job __free(kvfree) *jobs = + kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + if (!jobs) + return -ENOMEM; + + if (copy_from_user(jobs, + (void __user *)(uintptr_t)args->jobs, + args->job_count * sizeof(*jobs))) { + drm_dbg(dev, "Failed to copy incoming job array\n"); + return -EFAULT; + } + + for (i = 0; i < args->job_count; i++) { + ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/accel/ethosu/ethosu_job.h b/drivers/accel/ethosu/ethosu_job.h new file mode 100644 index 000000000000..ff1cf448d094 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_job.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_JOB_H__ +#define __ETHOSU_JOB_H__ + +#include <linux/kref.h> +#include <drm/gpu_scheduler.h> + +struct ethosu_device; +struct ethosu_file_priv; + +struct ethosu_job { + struct drm_sched_job base; + struct ethosu_device *dev; + + struct drm_gem_object *cmd_bo; + struct drm_gem_object *region_bo[NPU_BASEP_REGION_MAX]; + u8 region_bo_num[NPU_BASEP_REGION_MAX]; + u8 region_cnt; + u32 sram_size; + + /* Fence to be signaled by drm-sched once its done with the job */ + struct dma_fence *inference_done_fence; + + /* Fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + struct kref refcount; +}; + +int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file); + +int ethosu_job_init(struct ethosu_device *dev); +void ethosu_job_fini(struct ethosu_device *dev); +int ethosu_job_open(struct ethosu_file_priv *ethosu_priv); +void ethosu_job_close(struct ethosu_file_priv *ethosu_priv); + +#endif diff --git a/drivers/accel/habanalabs/Kconfig b/drivers/accel/habanalabs/Kconfig index be85336107f9..6d1506acbd72 100644 --- a/drivers/accel/habanalabs/Kconfig +++ b/drivers/accel/habanalabs/Kconfig @@ -6,7 +6,7 @@ config DRM_ACCEL_HABANALABS tristate "HabanaLabs AI accelerators" depends on DRM_ACCEL - depends on X86_64 + depends on X86 && X86_64 depends on PCI && HAS_IOMEM select GENERIC_ALLOCATOR select HWMON @@ -27,3 +27,26 @@ config DRM_ACCEL_HABANALABS To compile this driver as a module, choose M here: the module will be called habanalabs. + +if DRM_ACCEL_HABANALABS + +config HL_HLDIO + bool "Habanalabs NVMe Direct I/O (HLDIO)" + depends on PCI_P2PDMA + depends on BLOCK + help + Enable NVMe peer-to-peer direct I/O support for Habanalabs AI + accelerators. + + This allows direct data transfers between NVMe storage devices + and Habanalabs accelerators without involving system memory, + using PCI peer-to-peer DMA capabilities. + + Requirements: + - CONFIG_PCI_P2PDMA=y + - NVMe device and Habanalabs accelerator under same PCI root complex + - IOMMU disabled or in passthrough mode + - Hardware supporting PCI P2P DMA + + If unsure, say N +endif # DRM_ACCEL_HABANALABS diff --git a/drivers/accel/habanalabs/common/Makefile b/drivers/accel/habanalabs/common/Makefile index e6abffea9f87..b6d00de09db5 100644 --- a/drivers/accel/habanalabs/common/Makefile +++ b/drivers/accel/habanalabs/common/Makefile @@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_submission.o common/firmware_if.o \ common/security.o common/state_dump.o \ common/memory_mgr.o common/decoder.o + +# Conditionally add HLDIO support +ifdef CONFIG_HL_HLDIO +HL_COMMON_FILES += common/hldio.o +endif
\ No newline at end of file diff --git a/drivers/accel/habanalabs/common/command_buffer.c b/drivers/accel/habanalabs/common/command_buffer.c index 3a0535ac28b1..0f0d295116e7 100644 --- a/drivers/accel/habanalabs/common/command_buffer.c +++ b/drivers/accel/habanalabs/common/command_buffer.c @@ -27,12 +27,6 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) return -EINVAL; } - if (!hdev->mmu_enable) { - dev_err_ratelimited(hdev->dev, - "Cannot map CB because MMU is disabled\n"); - return -EINVAL; - } - if (cb->is_mmu_mapped) return 0; @@ -45,20 +39,29 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) } mutex_lock(&hdev->mmu_lock); + rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); if (rc) { dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); - goto err_va_umap; + goto err_va_pool_free; } + rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); + if (rc) + goto err_mmu_unmap; + mutex_unlock(&hdev->mmu_lock); cb->is_mmu_mapped = true; - return rc; -err_va_umap: + return 0; + +err_mmu_unmap: + hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); +err_va_pool_free: mutex_unlock(&hdev->mmu_lock); gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); + return rc; } @@ -358,10 +361,11 @@ out: return rc; } -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { - union hl_cb_args *args = data; + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; + union hl_cb_args *args = data; u64 handle = 0, device_va = 0; enum hl_device_status status; u32 usage_cnt = 0; diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 8270db0a72a2..dee487724918 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -14,10 +14,10 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ - HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) + HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) -#define MAX_TS_ITER_NUM 10 +#define MAX_TS_ITER_NUM 100 /** * enum hl_cs_wait_status - cs wait status @@ -31,6 +31,24 @@ enum hl_cs_wait_status { CS_WAIT_STATUS_GONE }; +/* + * Data used while handling wait/timestamp nodes. + * The purpose of this struct is to store the needed data for both operations + * in one variable instead of passing large number of arguments to functions. + */ +struct wait_interrupt_data { + struct hl_user_interrupt *interrupt; + struct hl_mmap_mem_buf *buf; + struct hl_mem_mgr *mmg; + struct hl_cb *cq_cb; + u64 ts_handle; + u64 ts_offset; + u64 cq_handle; + u64 cq_offset; + u64 target_value; + u64 intr_timeout_us; +}; + static void job_wq_completion(struct work_struct *work); static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, enum hl_cs_wait_status *status, s64 *timestamp); @@ -280,14 +298,8 @@ bool cs_needs_timeout(struct hl_cs *cs) static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) { - /* - * Patched CB is created for external queues jobs, and for H/W queues - * jobs if the user CB was allocated by driver and MMU is disabled. - */ - return (job->queue_type == QUEUE_TYPE_EXT || - (job->queue_type == QUEUE_TYPE_HW && - job->is_kernel_allocated_cb && - !hdev->mmu_enable)); + /* Patched CB is created for external queues jobs */ + return (job->queue_type == QUEUE_TYPE_EXT); } /* @@ -363,14 +375,13 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) } } - /* For H/W queue jobs, if a user CB was allocated by driver and MMU is - * enabled, the user CB isn't released in cs_parser() and thus should be + /* For H/W queue jobs, if a user CB was allocated by driver, + * the user CB isn't released in cs_parser() and thus should be * released here. This is also true for INT queues jobs which were * allocated by driver. */ - if ((job->is_kernel_allocated_cb && - ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) || - job->queue_type == QUEUE_TYPE_INT))) { + if (job->is_kernel_allocated_cb && + (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { atomic_dec(&job->user_cb->cs_cnt); hl_cb_put(job->user_cb); } @@ -657,7 +668,7 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev, /* * we get refcount upon reservation of signals or signal/wait cs for the * hw_sob object, and need to put it when the first staged cs - * (which cotains the encaps signals) or cs signal/wait is completed. + * (which contains the encaps signals) or cs signal/wait is completed. */ if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || (hl_cs_cmpl->type == CS_TYPE_WAIT) || @@ -804,12 +815,14 @@ out: static void cs_timedout(struct work_struct *work) { + struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); + bool skip_reset_on_timeout, device_reset = false; struct hl_device *hdev; u64 event_mask = 0x0; + uint timeout_sec; int rc; - struct hl_cs *cs = container_of(work, struct hl_cs, - work_tdr.work); - bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false; + + skip_reset_on_timeout = cs->skip_reset_on_timeout; rc = cs_get_unless_zero(cs); if (!rc) @@ -840,29 +853,31 @@ static void cs_timedout(struct work_struct *work) event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; } + timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; + switch (cs->type) { case CS_TYPE_SIGNAL: dev_err(hdev->dev, - "Signal command submission %llu has not finished in time!\n", - cs->sequence); + "Signal command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; case CS_TYPE_WAIT: dev_err(hdev->dev, - "Wait command submission %llu has not finished in time!\n", - cs->sequence); + "Wait command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; case CS_TYPE_COLLECTIVE_WAIT: dev_err(hdev->dev, - "Collective Wait command submission %llu has not finished in time!\n", - cs->sequence); + "Collective Wait command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; default: dev_err(hdev->dev, - "Command submission %llu has not finished in time!\n", - cs->sequence); + "Command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; } @@ -1085,17 +1100,19 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) unsigned long flags; spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { - if (pend->ts_reg_info.buf) { - list_del(&pend->wait_list_node); - hl_mmap_mem_buf_put(pend->ts_reg_info.buf); - hl_cb_put(pend->ts_reg_info.cq_cb); - } else { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); - } + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); } spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + spin_lock_irqsave(&interrupt->ts_list_lock, flags); + list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { + list_del(&pend->list_node); + hl_mmap_mem_buf_put(pend->ts_reg_info.buf); + hl_cb_put(pend->ts_reg_info.cq_cb); + } + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); } void hl_release_pending_user_interrupts(struct hl_device *hdev) @@ -1140,11 +1157,10 @@ static void force_complete_cs(struct hl_device *hdev) spin_unlock(&hdev->cs_mirror_lock); } -void hl_abort_waitings_for_completion(struct hl_device *hdev) +void hl_abort_waiting_for_cs_completions(struct hl_device *hdev) { force_complete_cs(hdev); force_complete_multi_cs(hdev); - hl_release_pending_user_interrupts(hdev); } static void job_wq_completion(struct work_struct *work) @@ -1168,6 +1184,22 @@ static void cs_completion(struct work_struct *work) hl_complete_job(hdev, job); } +u32 hl_get_active_cs_num(struct hl_device *hdev) +{ + u32 active_cs_num = 0; + struct hl_cs *cs; + + spin_lock(&hdev->cs_mirror_lock); + + list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) + if (!cs->completed) + active_cs_num++; + + spin_unlock(&hdev->cs_mirror_lock); + + return active_cs_num; +} + static int validate_queue_index(struct hl_device *hdev, struct hl_cs_chunk *chunk, enum hl_queue_type *queue_type, @@ -1304,6 +1336,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_UNRESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) return CS_TYPE_ENGINE_CORE; + else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) + return CS_TYPE_ENGINES; else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) return CS_TYPE_FLUSH_PCI_HBW_WRITES; else @@ -1326,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) return -EINVAL; } - if (!hl_device_operational(hdev, &status)) { + if (!hl_device_operational(hdev, &status)) return -EBUSY; - } if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && !hdev->supports_staged_submission) { @@ -1717,16 +1750,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, /* Need to wait for restore completion before execution phase */ if (num_chunks) { enum hl_cs_wait_status status; -wait_again: + ret = _hl_cs_wait_ioctl(hdev, ctx, jiffies_to_usecs(hdev->timeout_jiffies), *cs_seq, &status, NULL); if (ret) { - if (ret == -ERESTARTSYS) { - usleep_range(100, 200); - goto wait_again; - } - dev_err(hdev->dev, "Restore CS for context %d failed to complete %d\n", ctx->asid, ret); @@ -1931,8 +1959,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, else cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); - cb = hl_cb_kernel_create(hdev, cb_size, - q_type == QUEUE_TYPE_HW && hdev->mmu_enable); + cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW); if (!cb) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -2135,7 +2162,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) hdev->asic_funcs->hw_queues_unlock(hdev); rc = -EINVAL; - goto out; + goto out_unlock; } /* @@ -2150,15 +2177,21 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) /* Release the id and free allocated memory of the handle */ idr_remove(&mgr->handles, handle_id); + + /* unlock before calling ctx_put, where we might sleep */ + spin_unlock(&mgr->lock); hl_ctx_put(encaps_sig_hdl->ctx); kfree(encaps_sig_hdl); + goto out; } else { rc = -EINVAL; dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); } -out: + +out_unlock: spin_unlock(&mgr->lock); +out: return rc; } @@ -2429,10 +2462,13 @@ out: static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, u32 num_engine_cores, u32 core_command) { - int rc; struct hl_device *hdev = hpriv->hdev; void __user *engine_cores_arr; u32 *cores; + int rc; + + if (!hdev->asic_prop.supports_engine_modes) + return -EPERM; if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); @@ -2461,6 +2497,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, return rc; } +static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, + u32 num_engines, enum hl_engine_command command) +{ + struct hl_device *hdev = hpriv->hdev; + u32 *engines, max_num_of_engines; + void __user *engines_arr; + int rc; + + if (!hdev->asic_prop.supports_engine_modes) + return -EPERM; + + if (command >= HL_ENGINE_COMMAND_MAX) { + dev_err(hdev->dev, "Engine command is invalid\n"); + return -EINVAL; + } + + max_num_of_engines = hdev->asic_prop.max_num_of_engines; + if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) + max_num_of_engines = hdev->asic_prop.num_engine_cores; + + if (!num_engines || num_engines > max_num_of_engines) { + dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); + return -EINVAL; + } + + engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; + engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL); + if (!engines) + return -ENOMEM; + + if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) { + dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); + kfree(engines); + return -EFAULT; + } + + rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); + kfree(engines); + + return rc; +} + static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) { struct hl_device *hdev = hpriv->hdev; @@ -2476,8 +2554,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) return 0; } -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; union hl_cs_args *args = data; enum hl_cs_type cs_type = 0; u64 cs_seq = ULONG_MAX; @@ -2507,7 +2586,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) cs_seq = args->in.seq; timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT - ? msecs_to_jiffies(args->in.timeout * 1000) + ? secs_to_jiffies(args->in.timeout) : hpriv->hdev->timeout_jiffies; switch (cs_type) { @@ -2532,6 +2611,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, args->in.num_engine_cores, args->in.core_command); break; + case CS_TYPE_ENGINES: + rc = cs_ioctl_engines(hpriv, args->in.engines, + args->in.num_engines, args->in.engine_command); + break; case CS_TYPE_FLUSH_PCI_HBW_WRITES: rc = cs_ioctl_flush_pci_hbw_writes(hpriv); break; @@ -3130,162 +3213,228 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, - struct hl_cb *cq_cb, - u64 ts_offset, u64 cq_offset, u64 target_value, - spinlock_t *wait_list_lock, - struct hl_user_pending_interrupt **pend) +static inline void set_record_cq_info(struct hl_user_pending_interrupt *record, + struct hl_cb *cq_cb, u32 cq_offset, u32 target_value) +{ + record->ts_reg_info.cq_cb = cq_cb; + record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; + record->cq_target_value = target_value; +} + +static int validate_and_get_ts_record(struct device *dev, + struct hl_ts_buff *ts_buff, u64 ts_offset, + struct hl_user_pending_interrupt **req_event_record) { - struct hl_ts_buff *ts_buff = buf->private; - struct hl_user_pending_interrupt *requested_offset_record = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + - ts_offset; - struct hl_user_pending_interrupt *cb_last = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + struct hl_user_pending_interrupt *ts_cb_last; + + *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); - unsigned long flags, iter_counter = 0; - u64 current_cq_counter; /* Validate ts_offset not exceeding last max */ - if (requested_offset_record >= cb_last) { - dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", - (u64)(uintptr_t)cb_last); + if (*req_event_record >= ts_cb_last) { + dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n", + ts_offset, (u64)(uintptr_t)ts_cb_last); return -EINVAL; } -start_over: - spin_lock_irqsave(wait_list_lock, flags); + return 0; +} - /* Unregister only if we didn't reach the target value - * since in this case there will be no handling in irq context - * and then it's safe to delete the node out of the interrupt list - * then re-use it on other interrupt - */ - if (requested_offset_record->ts_reg_info.in_use) { - current_cq_counter = *requested_offset_record->cq_kernel_addr; - if (current_cq_counter < requested_offset_record->cq_target_value) { - list_del(&requested_offset_record->wait_list_node); - spin_unlock_irqrestore(wait_list_lock, flags); +static void unregister_timestamp_node(struct hl_device *hdev, + struct hl_user_pending_interrupt *record, bool need_lock) +{ + struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; + bool ts_rec_found = false; + unsigned long flags; - hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); - hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + if (need_lock) + spin_lock_irqsave(&interrupt->ts_list_lock, flags); - dev_dbg(buf->mmg->dev, - "ts node removed from interrupt list now can re-use\n"); - } else { - dev_dbg(buf->mmg->dev, - "ts node in middle of irq handling\n"); - - /* irq handling in the middle give it time to finish */ - spin_unlock_irqrestore(wait_list_lock, flags); - usleep_range(1, 10); - if (++iter_counter == MAX_TS_ITER_NUM) { - dev_err(buf->mmg->dev, - "handling registration interrupt took too long!!\n"); - return -EINVAL; - } + if (record->ts_reg_info.in_use) { + record->ts_reg_info.in_use = false; + list_del(&record->list_node); + ts_rec_found = true; + } + + if (need_lock) + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); + + /* Put refcounts that were taken when we registered the event */ + if (ts_rec_found) { + hl_mmap_mem_buf_put(record->ts_reg_info.buf); + hl_cb_put(record->ts_reg_info.cq_cb); + } +} + +static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, unsigned long *flags, + struct hl_user_pending_interrupt **pend) +{ + struct hl_user_pending_interrupt *req_offset_record; + struct hl_ts_buff *ts_buff = data->buf->private; + bool need_lock = false; + int rc; + + rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, + &req_offset_record); + if (rc) + return rc; + + /* In case the node already registered, need to unregister first then re-use */ + if (req_offset_record->ts_reg_info.in_use) { + /* + * Since interrupt here can be different than the one the node currently registered + * on, and we don't want to lock two lists while we're doing unregister, so + * unlock the new interrupt wait list here and acquire the lock again after you done + */ + if (data->interrupt->interrupt_id != + req_offset_record->ts_reg_info.interrupt->interrupt_id) { - goto start_over; + need_lock = true; + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); } - } else { - /* Fill up the new registration node info */ - requested_offset_record->ts_reg_info.buf = buf; - requested_offset_record->ts_reg_info.cq_cb = cq_cb; - requested_offset_record->ts_reg_info.timestamp_kernel_addr = - (u64 *) ts_buff->user_buff_address + ts_offset; - requested_offset_record->cq_kernel_addr = - (u64 *) cq_cb->kernel_address + cq_offset; - requested_offset_record->cq_target_value = target_value; - spin_unlock_irqrestore(wait_list_lock, flags); + unregister_timestamp_node(hdev, req_offset_record, need_lock); + + if (need_lock) + spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); } - *pend = requested_offset_record; + /* Fill up the new registration node info and add it to the list */ + req_offset_record->ts_reg_info.in_use = true; + req_offset_record->ts_reg_info.buf = data->buf; + req_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + data->ts_offset; + req_offset_record->ts_reg_info.interrupt = data->interrupt; + set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, + data->target_value); - dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", - requested_offset_record); - return 0; + *pend = req_offset_record; + + return rc; +} + +static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, + u32 *status, u64 *timestamp) +{ + struct hl_user_pending_interrupt *pend; + unsigned long flags; + int rc = 0; + + hl_ctx_get(ctx); + + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { + rc = -EINVAL; + goto put_ctx; + } + + /* Validate the cq offset */ + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { + rc = -EINVAL; + goto put_cq_cb; + } + + data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); + if (!data->buf) { + rc = -EINVAL; + goto put_cq_cb; + } + + spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); + + /* get ts buffer record */ + rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend); + if (rc) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + goto put_ts_buff; + } + + /* We check for completion value as interrupt could have been received + * before we add the timestamp node to the ts list. + */ + if (*pend->cq_kernel_addr >= data->target_value) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + pend->ts_reg_info.in_use = 0; + *status = HL_WAIT_CS_STATUS_COMPLETED; + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + + goto put_ts_buff; + } + + list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_mmap_mem_buf_put(data->buf); +put_cq_cb: + hl_cb_put(data->cq_cb); +put_ctx: + hl_ctx_put(ctx); + + return rc; } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, - u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, - u64 target_value, struct hl_user_interrupt *interrupt, - bool register_ts_record, u64 ts_handle, u64 ts_offset, + struct wait_interrupt_data *data, u32 *status, u64 *timestamp) { struct hl_user_pending_interrupt *pend; - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; unsigned long timeout, flags; long completion_rc; int rc = 0; - timeout = hl_usecs64_to_jiffies(timeout_us); + timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); hl_ctx_get(ctx); - cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); - if (!cq_cb) { + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { rc = -EINVAL; goto put_ctx; } /* Validate the cq offset */ - if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= - ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { rc = -EINVAL; goto put_cq_cb; } - if (register_ts_record) { - dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", - interrupt->interrupt_id, ts_offset, cq_counters_offset); - buf = hl_mmap_mem_buf_get(mmg, ts_handle); - if (!buf) { - rc = -EINVAL; - goto put_cq_cb; - } - - /* get ts buffer record */ - rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, - cq_counters_offset, target_value, - &interrupt->wait_list_lock, &pend); - if (rc) - goto put_ts_buff; - } else { - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - rc = -ENOMEM; - goto put_cq_cb; - } - hl_fence_init(&pend->fence, ULONG_MAX); - pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; } - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; + pend->cq_target_value = data->target_value; + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + /* We check for completion value as interrupt could have been received - * before we added the node to the wait list + * before we add the wait node to the wait list. */ - if (*pend->cq_kernel_addr >= target_value) { - if (register_ts_record) - pend->ts_reg_info.in_use = 0; - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (*pend->cq_kernel_addr >= data->target_value) + *status = HL_WAIT_CS_STATUS_COMPLETED; + else + *status = HL_WAIT_CS_STATUS_BUSY; - if (register_ts_record) { - *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); - goto put_ts_buff; - } else { - pend->fence.timestamp = ktime_get(); - goto set_timestamp; - } - } else if (!timeout_us) { - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); - *status = HL_WAIT_CS_STATUS_BUSY; pend->fence.timestamp = ktime_get(); goto set_timestamp; } @@ -3295,55 +3444,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * Note that we cannot have sorted list by target value, * in order to shorten the list pass loop, since * same list could have nodes for different cq counter handle. - * Note: - * Mark ts buff offset as in use here in the spinlock protection area - * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record - * before adding the node to the list. this scenario might happen when - * multiple threads are racing on same offset and one thread could - * set the ts buff in ts_buff_get_kernel_ts_record then the other thread - * takes over and get to ts_buff_get_kernel_ts_record and then we will try - * to re-use the same ts buff offset, and will try to delete a non existing - * node from the list. */ - if (register_ts_record) - pend->ts_reg_info.in_use = 1; - - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); - - if (register_ts_record) { - rc = *status = HL_WAIT_CS_STATUS_COMPLETED; - goto ts_registration_exit; - } + list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); if (completion_rc > 0) { - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (pend->fence.error == -EIO) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + rc = -EIO; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + *status = HL_WAIT_CS_STATUS_COMPLETED; + } } else { if (completion_rc == -ERESTARTSYS) { dev_err_ratelimited(hdev->dev, "user process got signal while waiting for interrupt ID %d\n", - interrupt->interrupt_id); + data->interrupt->interrupt_id); rc = -EINTR; *status = HL_WAIT_CS_STATUS_ABORTED; } else { - if (pend->fence.error == -EIO) { - dev_err_ratelimited(hdev->dev, - "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", - pend->fence.error); - rc = -EIO; - *status = HL_WAIT_CS_STATUS_ABORTED; - } else { - /* The wait has timed-out. We don't know anything beyond that - * because the workload wasn't submitted through the driver. - * Therefore, from driver's perspective, the workload is still - * executing. - */ - rc = 0; - *status = HL_WAIT_CS_STATUS_BUSY; - } + /* The wait has timed-out. We don't know anything beyond that + * because the workload was not submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; + *status = HL_WAIT_CS_STATUS_BUSY; } } @@ -3353,23 +3485,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * for ts record, the node will be deleted in the irq handler after * we reach the target value. */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_del(&pend->wait_list_node); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + list_del(&pend->list_node); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); kfree(pend); - hl_cb_put(cq_cb); -ts_registration_exit: + hl_cb_put(data->cq_cb); hl_ctx_put(ctx); return rc; -put_ts_buff: - hl_mmap_mem_buf_put(buf); put_cq_cb: - hl_cb_put(cq_cb); + hl_cb_put(data->cq_cb); put_ctx: hl_ctx_put(ctx); @@ -3404,7 +3533,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ * handler to monitor */ spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + list_add_tail(&pend->list_node, &interrupt->wait_list_head); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); /* We check for completion value as interrupt could have been received @@ -3481,7 +3610,7 @@ wait_again: remove_pending_user_interrupt: spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_del(&pend->wait_list_node); + list_del(&pend->list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); *timestamp = ktime_to_ns(pend->fence.timestamp); @@ -3540,19 +3669,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return -EINVAL; } - if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, - args->in.interrupt_timeout_us, args->in.cq_counters_handle, - args->in.cq_counters_offset, - args->in.target, interrupt, - !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), - args->in.timestamp_handle, args->in.timestamp_offset, - &status, ×tamp); - else + if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { + struct wait_interrupt_data wait_intr_data = {0}; + + wait_intr_data.interrupt = interrupt; + wait_intr_data.mmg = &hpriv->mem_mgr; + wait_intr_data.cq_handle = args->in.cq_counters_handle; + wait_intr_data.cq_offset = args->in.cq_counters_offset; + wait_intr_data.ts_handle = args->in.timestamp_handle; + wait_intr_data.ts_offset = args->in.timestamp_offset; + wait_intr_data.target_value = args->in.target; + wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; + + if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { + /* + * Allow only one registration at a time. this is needed in order to prevent + * issues while handling the flow of re-use of the same offset. + * Since the registration flow is protected only by the interrupt lock, + * re-use flow might request to move ts node to another interrupt list, + * and in such case we're not protected. + */ + mutex_lock(&hpriv->ctx->ts_reg_lock); + + rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + + mutex_unlock(&hpriv->ctx->ts_reg_lock); + } else + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + } else { rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); + } + if (rc) return rc; @@ -3567,8 +3719,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; u32 flags = args->in.flags; diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c index 9c8b1b37b510..9f212b17611a 100644 --- a/drivers/accel/habanalabs/common/context.c +++ b/drivers/accel/habanalabs/common/context.c @@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) kfree(ctx->cs_pending); if (ctx->asid != HL_KERNEL_ASID_ID) { - dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid); + dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid); /* The engines are stopped as there is no executing CS, but the * Coresight might be still working by accessing addresses @@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr); + mutex_destroy(&ctx->ts_reg_lock); } else { dev_dbg(hdev->dev, "closing kernel context\n"); hdev->asic_funcs->ctx_fini(ctx); @@ -267,7 +268,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) hl_encaps_sig_mgr_init(&ctx->sig_mgr); - dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); + mutex_init(&ctx->ts_reg_lock); + + dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n", + current->comm, ctx->asid); } return 0; diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 945c0e6758ca..5f0820b19ccb 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -6,6 +6,7 @@ */ #include "habanalabs.h" +#include "hldio.h" #include "../include/hw_ip/mmu/mmu_general.h" #include <linux/pci.h> @@ -18,8 +19,6 @@ #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) #define I2C_MAX_TRANSACTION_LEN 8 -static struct dentry *hl_debug_root; - static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, u8 i2c_len, u64 *val) { @@ -44,9 +43,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; pkt.i2c_len = i2c_len; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, val); - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); return rc; @@ -77,10 +75,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_len = i2c_len; pkt.value = cpu_to_le64(val); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); return rc; @@ -101,10 +97,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.led_index = cpu_to_le32(led); pkt.value = cpu_to_le64(state); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); } @@ -255,10 +249,7 @@ static int vm_show(struct seq_file *s, void *data) u64 j; int i; - if (!dev_entry->hdev->mmu_enable) - return 0; - - spin_lock(&dev_entry->ctx_mem_hash_spinlock); + mutex_lock(&dev_entry->ctx_mem_hash_mutex); list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) { once = false; @@ -329,7 +320,7 @@ static int vm_show(struct seq_file *s, void *data) } - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + mutex_unlock(&dev_entry->ctx_mem_hash_mutex); ctx = hl_get_compute_ctx(dev_entry->hdev); if (ctx) { @@ -436,9 +427,6 @@ static int mmu_show(struct seq_file *s, void *data) u64 virt_addr = dev_entry->mmu_addr, phys_addr; int i; - if (!hdev->mmu_enable) - return 0; - if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) ctx = hdev->kernel_ctx; else @@ -492,13 +480,10 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE]; + char kbuf[MMU_KBUF_SIZE] = {0}; char *c; ssize_t rc; - if (!hdev->mmu_enable) - return count; - if (count > sizeof(kbuf) - 1) goto err; if (copy_from_user(kbuf, buf, count)) @@ -535,9 +520,6 @@ static int mmu_ack_error(struct seq_file *s, void *data) struct hl_device *hdev = dev_entry->hdev; int rc; - if (!hdev->mmu_enable) - return 0; - if (!dev_entry->mmu_cap_mask) { dev_err(hdev->dev, "mmu_cap_mask is not set\n"); goto err; @@ -560,12 +542,9 @@ static ssize_t mmu_ack_error_value_write(struct file *file, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE]; + char kbuf[MMU_KBUF_SIZE] = {0}; ssize_t rc; - if (!hdev->mmu_enable) - return count; - if (count > sizeof(kbuf) - 1) goto err; @@ -624,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data) return 0; } +#ifdef CONFIG_HL_HLDIO +/* DIO debugfs functions following the standard pattern */ +static int dio_ssd2hl_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (!hdev->asic_prop.supports_nvme) { + seq_puts(s, "NVMe Direct I/O not supported\\n"); + return 0; + } + + seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n"); + seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read); + seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n"); + + return 0; +} + +static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct hl_ctx *ctx = hdev->kernel_ctx; + char kbuf[128]; + u64 device_va = 0, off_bytes = 0, len_bytes = 0; + u32 fd = 0; + size_t len_read = 0; + int rc, parsed; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + if (count >= sizeof(kbuf)) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count] = 0; + + /* Parse: fd=N va=0xADDR off=N len=N */ + parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu", + &fd, &device_va, &off_bytes, &len_bytes); + if (parsed != 4) { + dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n"); + return -EINVAL; + } + + /* Validate file descriptor */ + if (fd == 0) { + dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd); + return -EINVAL; + } + + /* Validate alignment requirements */ + if (!IS_ALIGNED(device_va, PAGE_SIZE) || + !IS_ALIGNED(off_bytes, PAGE_SIZE) || + !IS_ALIGNED(len_bytes, PAGE_SIZE)) { + dev_err(hdev->dev, + "All parameters must be page-aligned (4KB)\\n"); + return -EINVAL; + } + + /* Validate transfer size */ + if (len_bytes == 0 || len_bytes > SZ_1G) { + dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n", + len_bytes); + return -EINVAL; + } + + dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n", + fd, device_va, off_bytes, len_bytes); + + rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read); + if (rc < 0) { + dev_entry->dio_stats.failed_ops++; + dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc); + return rc; + } + + /* Update statistics */ + dev_entry->dio_stats.total_ops++; + dev_entry->dio_stats.successful_ops++; + dev_entry->dio_stats.bytes_transferred += len_read; + dev_entry->dio_stats.last_len_read = len_read; + + dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read); + + return count; +} + +static int dio_hl2ssd_show(struct seq_file *s, void *data) +{ + seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n"); + return 0; +} + +static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n"); + return -EOPNOTSUPP; +} + +static int dio_stats_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct hl_dio_stats *stats = &dev_entry->dio_stats; + u64 avg_bytes_per_op = 0, success_rate = 0; + + if (!hdev->asic_prop.supports_nvme) { + seq_puts(s, "NVMe Direct I/O not supported\\n"); + return 0; + } + + if (stats->successful_ops > 0) + avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops; + + if (stats->total_ops > 0) + success_rate = (stats->successful_ops * 100) / stats->total_ops; + + seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n"); + seq_printf(s, "Total operations: %llu\\n", stats->total_ops); + seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops); + seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops); + seq_printf(s, "Success rate: %llu%%\\n", success_rate); + seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred); + seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op); + seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read); + + return 0; +} + +static int dio_reset_show(struct seq_file *s, void *data) +{ + seq_puts(s, "Write '1' to reset DIO statistics\\n"); + return 0; +} + +static ssize_t dio_reset_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + char kbuf[8]; + unsigned long val; + int rc; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + if (count >= sizeof(kbuf)) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count] = 0; + + rc = kstrtoul(kbuf, 0, &val); + if (rc) + return rc; + + if (val == 1) { + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); + dev_dbg(hdev->dev, "DIO statistics reset\\n"); + } else { + dev_err(hdev->dev, "Write '1' to reset statistics\\n"); + return -EINVAL; + } + + return count; +} +#endif + static ssize_t hl_memory_scrub(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { @@ -661,9 +832,6 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; - if (!hdev->mmu_enable) - goto out; - if (prop->dram_supports_virtual_memory && (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; @@ -675,7 +843,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) if (addr >= prop->pmmu_huge.start_addr && addr < prop->pmmu_huge.end_addr) return true; -out: + return false; } @@ -685,9 +853,6 @@ static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr, struct asic_fixed_properties *prop = &hdev->asic_prop; u64 dram_start_addr, dram_end_addr; - if (!hdev->mmu_enable) - return false; - if (prop->dram_supports_virtual_memory) { dram_start_addr = prop->dmmu.start_addr; dram_end_addr = prop->dmmu.end_addr; @@ -816,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val, } } +static void dump_cfg_access_entry(struct hl_device *hdev, + struct hl_debugfs_cfg_access_entry *entry) +{ + char *access_type = ""; + struct tm tm; + + switch (entry->debugfs_type) { + case DEBUGFS_READ32: + access_type = "READ32 from"; + break; + case DEBUGFS_WRITE32: + access_type = "WRITE32 to"; + break; + case DEBUGFS_READ64: + access_type = "READ64 from"; + break; + case DEBUGFS_WRITE64: + access_type = "WRITE64 to"; + break; + default: + dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type); + return; + } + + time64_to_tm(entry->seconds_since_epoch, 0, &tm); + dev_info(hdev->dev, + "%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1, + tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr); +} + +void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) +{ + struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses; + u32 i, head, count = 0; + time64_t entry_time, now; + unsigned long flags; + + now = ktime_get_real_seconds(); + + spin_lock_irqsave(&dbgfs->lock, flags); + head = dbgfs->head; + if (head == 0) + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; + else + i = head - 1; + + /* Walk back until timeout or invalid entry */ + while (dbgfs->cfg_access_list[i].valid) { + entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch; + /* Stop when entry is older than timeout */ + if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC) + break; + + /* print single entry under lock */ + { + struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i]; + /* + * We copy the entry out under lock and then print after + * releasing the lock to minimize time under lock. + */ + spin_unlock_irqrestore(&dbgfs->lock, flags); + dump_cfg_access_entry(hdev, &entry); + spin_lock_irqsave(&dbgfs->lock, flags); + } + + /* mark consumed */ + dbgfs->cfg_access_list[i].valid = false; + + if (i == 0) + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; + else + i--; + count++; + if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN) + break; + } + spin_unlock_irqrestore(&dbgfs->lock, flags); +} + +static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size, + enum debugfs_access_type access_type) +{ + struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses; + struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG]; + struct hl_debugfs_cfg_access_entry *new_entry; + unsigned long flags; + + /* Check if address is in config memory */ + if (addr >= mem_reg->region_base && + mem_reg->region_size >= access_size && + addr <= mem_reg->region_base + mem_reg->region_size - access_size) { + + spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags); + + new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head]; + new_entry->seconds_since_epoch = ktime_get_real_seconds(); + new_entry->addr = addr; + new_entry->debugfs_type = access_type; + new_entry->valid = true; + dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1) + % HL_DBGFS_CFG_ACCESS_HIST_LEN; + + spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags); + + } +} + static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type) { @@ -833,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, return rc; } + check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type); rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found); if (rc) { dev_err(hdev->dev, @@ -1431,7 +1704,7 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, return rc; if (value) - hdev->timeout_jiffies = msecs_to_jiffies(value * 1000); + hdev->timeout_jiffies = secs_to_jiffies(value); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; @@ -1553,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = { {"mmu", mmu_show, mmu_asid_va_write}, {"mmu_error", mmu_ack_error, mmu_ack_error_value_write}, {"engines", engines_show, NULL}, +#ifdef CONFIG_HL_HLDIO + /* DIO entries - only created if NVMe is supported */ + {"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write}, + {"dio_stats", dio_stats_show, NULL}, + {"dio_reset", dio_reset_show, dio_reset_write}, + {"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write}, +#endif }; static int hl_debugfs_open(struct inode *inode, struct file *file) @@ -1583,218 +1863,235 @@ static const struct file_operations hl_debugfs_fops = { .release = single_release, }; -static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry) +static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root) { debugfs_create_u8("i2c_bus", 0644, - dev_entry->root, + root, &dev_entry->i2c_bus); debugfs_create_u8("i2c_addr", 0644, - dev_entry->root, + root, &dev_entry->i2c_addr); debugfs_create_u8("i2c_reg", 0644, - dev_entry->root, + root, &dev_entry->i2c_reg); debugfs_create_u8("i2c_len", 0644, - dev_entry->root, + root, &dev_entry->i2c_len); debugfs_create_file("i2c_data", 0644, - dev_entry->root, + root, dev_entry, &hl_i2c_data_fops); debugfs_create_file("led0", 0200, - dev_entry->root, + root, dev_entry, &hl_led0_fops); debugfs_create_file("led1", 0200, - dev_entry->root, + root, dev_entry, &hl_led1_fops); debugfs_create_file("led2", 0200, - dev_entry->root, + root, dev_entry, &hl_led2_fops); } -void hl_debugfs_add_device(struct hl_device *hdev) +static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry, + struct dentry *root) { - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; int count = ARRAY_SIZE(hl_debugfs_list); struct hl_debugfs_entry *entry; int i; - dev_entry->hdev = hdev; - dev_entry->entry_arr = kmalloc_array(count, - sizeof(struct hl_debugfs_entry), - GFP_KERNEL); - if (!dev_entry->entry_arr) - return; - - dev_entry->data_dma_blob_desc.size = 0; - dev_entry->data_dma_blob_desc.data = NULL; - dev_entry->mon_dump_blob_desc.size = 0; - dev_entry->mon_dump_blob_desc.data = NULL; - - INIT_LIST_HEAD(&dev_entry->file_list); - INIT_LIST_HEAD(&dev_entry->cb_list); - INIT_LIST_HEAD(&dev_entry->cs_list); - INIT_LIST_HEAD(&dev_entry->cs_job_list); - INIT_LIST_HEAD(&dev_entry->userptr_list); - INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); - mutex_init(&dev_entry->file_mutex); - init_rwsem(&dev_entry->state_dump_sem); - spin_lock_init(&dev_entry->cb_spinlock); - spin_lock_init(&dev_entry->cs_spinlock); - spin_lock_init(&dev_entry->cs_job_spinlock); - spin_lock_init(&dev_entry->userptr_spinlock); - spin_lock_init(&dev_entry->ctx_mem_hash_spinlock); - - dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), - hl_debug_root); - debugfs_create_x64("memory_scrub_val", 0644, - dev_entry->root, + root, &hdev->memory_scrub_val); debugfs_create_file("memory_scrub", 0200, - dev_entry->root, + root, dev_entry, &hl_mem_scrub_fops); debugfs_create_x64("addr", 0644, - dev_entry->root, + root, &dev_entry->addr); debugfs_create_file("data32", 0644, - dev_entry->root, + root, dev_entry, &hl_data32b_fops); debugfs_create_file("data64", 0644, - dev_entry->root, + root, dev_entry, &hl_data64b_fops); debugfs_create_file("set_power_state", - 0200, - dev_entry->root, + 0644, + root, dev_entry, &hl_power_fops); debugfs_create_file("device", - 0200, - dev_entry->root, + 0644, + root, dev_entry, &hl_device_fops); debugfs_create_file("clk_gate", - 0200, - dev_entry->root, + 0644, + root, dev_entry, &hl_clk_gate_fops); debugfs_create_file("stop_on_err", 0644, - dev_entry->root, + root, dev_entry, &hl_stop_on_err_fops); debugfs_create_file("dump_security_violations", - 0644, - dev_entry->root, + 0400, + root, dev_entry, &hl_security_violations_fops); debugfs_create_file("dump_razwi_events", - 0644, - dev_entry->root, + 0400, + root, dev_entry, &hl_razwi_check_fops); debugfs_create_file("dma_size", 0200, - dev_entry->root, + root, dev_entry, &hl_dma_size_fops); debugfs_create_blob("data_dma", 0400, - dev_entry->root, + root, &dev_entry->data_dma_blob_desc); debugfs_create_file("monitor_dump_trig", 0200, - dev_entry->root, + root, dev_entry, &hl_monitor_dump_fops); debugfs_create_blob("monitor_dump", 0400, - dev_entry->root, + root, &dev_entry->mon_dump_blob_desc); debugfs_create_x8("skip_reset_on_timeout", 0644, - dev_entry->root, + root, &hdev->reset_info.skip_reset_on_timeout); debugfs_create_file("state_dump", - 0600, - dev_entry->root, + 0644, + root, dev_entry, &hl_state_dump_fops); debugfs_create_file("timeout_locked", 0644, - dev_entry->root, + root, dev_entry, &hl_timeout_locked_fops); debugfs_create_u32("device_release_watchdog_timeout", 0644, - dev_entry->root, + root, &hdev->device_release_watchdog_timeout_sec); + debugfs_create_u16("server_type", + 0444, + root, + &hdev->asic_prop.server_type); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { + /* Skip DIO entries if NVMe is not supported */ + if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 && + !hdev->asic_prop.supports_nvme) + continue; + debugfs_create_file(hl_debugfs_list[i].name, - 0444, - dev_entry->root, + 0644, + root, entry, &hl_debugfs_fops); entry->info_ent = &hl_debugfs_list[i]; entry->dev_entry = dev_entry; } +} - if (!hdev->asic_prop.fw_security_enabled) - add_secured_nodes(dev_entry); +int hl_debugfs_device_init(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + int count = ARRAY_SIZE(hl_debugfs_list); + + dev_entry->hdev = hdev; + dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL); + if (!dev_entry->entry_arr) + return -ENOMEM; + + dev_entry->data_dma_blob_desc.size = 0; + dev_entry->data_dma_blob_desc.data = NULL; + dev_entry->mon_dump_blob_desc.size = 0; + dev_entry->mon_dump_blob_desc.data = NULL; + + INIT_LIST_HEAD(&dev_entry->file_list); + INIT_LIST_HEAD(&dev_entry->cb_list); + INIT_LIST_HEAD(&dev_entry->cs_list); + INIT_LIST_HEAD(&dev_entry->cs_job_list); + INIT_LIST_HEAD(&dev_entry->userptr_list); + INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); + mutex_init(&dev_entry->file_mutex); + init_rwsem(&dev_entry->state_dump_sem); + spin_lock_init(&dev_entry->cb_spinlock); + spin_lock_init(&dev_entry->cs_spinlock); + spin_lock_init(&dev_entry->cs_job_spinlock); + spin_lock_init(&dev_entry->userptr_spinlock); + mutex_init(&dev_entry->ctx_mem_hash_mutex); + + spin_lock_init(&hdev->debugfs_cfg_accesses.lock); + hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */ + +#ifdef CONFIG_HL_HLDIO + /* Initialize DIO statistics */ + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); +#endif + + return 0; } -void hl_debugfs_remove_device(struct hl_device *hdev) +void hl_debugfs_device_fini(struct hl_device *hdev) { struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; int i; - debugfs_remove_recursive(entry->root); - + mutex_destroy(&entry->ctx_mem_hash_mutex); mutex_destroy(&entry->file_mutex); vfree(entry->data_dma_blob_desc.data); @@ -1804,6 +2101,20 @@ void hl_debugfs_remove_device(struct hl_device *hdev) vfree(entry->state_dump[i]); kfree(entry->entry_arr); + +} + +void hl_debugfs_add_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + dev_entry->root = hdev->drm.accel->debugfs_root; + + add_files_to_device(hdev, dev_entry, dev_entry->root); + + if (!hdev->asic_prop.fw_security_enabled) + add_secured_nodes(dev_entry, dev_entry->root); + } void hl_debugfs_add_file(struct hl_fpriv *hpriv) @@ -1901,18 +2212,18 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) { struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - spin_lock(&dev_entry->ctx_mem_hash_spinlock); + mutex_lock(&dev_entry->ctx_mem_hash_mutex); list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list); - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + mutex_unlock(&dev_entry->ctx_mem_hash_mutex); } void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) { struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - spin_lock(&dev_entry->ctx_mem_hash_spinlock); + mutex_lock(&dev_entry->ctx_mem_hash_mutex); list_del(&ctx->debugfs_list); - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + mutex_unlock(&dev_entry->ctx_mem_hash_mutex); } /** @@ -1937,12 +2248,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, up_write(&dev_entry->state_dump_sem); } -void __init hl_debugfs_init(void) -{ - hl_debug_root = debugfs_create_dir("habanalabs", NULL); -} - -void hl_debugfs_fini(void) -{ - debugfs_remove_recursive(hl_debug_root); -} diff --git a/drivers/accel/habanalabs/common/decoder.c b/drivers/accel/habanalabs/common/decoder.c index 2aab14d74b53..c03a6da45d00 100644 --- a/drivers/accel/habanalabs/common/decoder.c +++ b/drivers/accel/habanalabs/common/decoder.c @@ -43,36 +43,44 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status) intr_source[2], intr_source[3], intr_source[4], intr_source[5]); } -static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id) +static void dec_abnrm_intr_work(struct work_struct *work) { + struct hl_dec *dec = container_of(work, struct hl_dec, abnrm_intr_work); + struct hl_device *hdev = dec->hdev; + u32 irq_status, event_mask = 0; bool reset_required = false; - u32 irq_status; - irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET); + irq_status = RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET); - dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, core_id); + dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, dec->core_id); dec_print_abnrm_intr_source(hdev, irq_status); - if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) - reset_required = true; - /* Clear the interrupt */ - WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status); + WREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status); /* Flush the interrupt clear */ - RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET); - - if (reset_required) - hl_device_reset(hdev, HL_DRV_RESET_HARD); -} + RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET); -static void dec_completion_abnrm(struct work_struct *work) -{ - struct hl_dec *dec = container_of(work, struct hl_dec, completion_abnrm_work); - struct hl_device *hdev = dec->hdev; + if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) { + reset_required = true; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + } - dec_error_intr_work(hdev, dec->base_addr, dec->core_id); + if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK) + event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; + + if (irq_status & (VCMD_IRQ_STATUS_ENDCMD_MASK | + VCMD_IRQ_STATUS_BUSERR_MASK | + VCMD_IRQ_STATUS_ABORT_MASK)) + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + + if (reset_required) { + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + hl_device_cond_reset(hdev, 0, event_mask); + } else if (event_mask) { + hl_notifier_event_send_all(hdev, event_mask); + } } void hl_dec_fini(struct hl_device *hdev) @@ -98,7 +106,7 @@ int hl_dec_init(struct hl_device *hdev) dec = hdev->dec + j; dec->hdev = hdev; - INIT_WORK(&dec->completion_abnrm_work, dec_completion_abnrm); + INIT_WORK(&dec->abnrm_intr_work, dec_abnrm_intr_work); dec->core_id = j; dec->base_addr = hdev->asic_funcs->get_dec_base_addr(hdev, j); if (!dec->base_addr) { diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 9933e5858a36..999c92d7036e 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -14,20 +14,24 @@ #include <linux/hwmon.h> #include <linux/vmalloc.h> +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> + #include <trace/events/habanalabs.h> #define HL_RESET_DELAY_USEC 10000 /* 10ms */ -#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 +#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30 enum dma_alloc_type { DMA_ALLOC_COHERENT, - DMA_ALLOC_CPU_ACCESSIBLE, DMA_ALLOC_POOL, }; #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 +static void hl_device_heartbeat(struct work_struct *work); + /* * hl_set_dram_bar- sets the bar to allow later access to address * @@ -53,7 +57,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi if (is_power_of_2(prop->dram_pci_bar_size)) bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); else - bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * + bar_base_addr = region->region_base + + div64_u64((addr - region->region_base), prop->dram_pci_bar_size) * prop->dram_pci_bar_size; old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); @@ -121,17 +126,14 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t case DMA_ALLOC_COHERENT: ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag); break; - case DMA_ALLOC_CPU_ACCESSIBLE: - ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); - break; case DMA_ALLOC_POOL: ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle); break; } if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) - trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, - caller); + trace_habanalabs_dma_alloc(&(hdev)->pdev->dev, (u64) (uintptr_t) ptr, *dma_handle, + size, caller); return ptr; } @@ -147,15 +149,12 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c case DMA_ALLOC_COHERENT: hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); break; - case DMA_ALLOC_CPU_ACCESSIBLE: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr); - break; case DMA_ALLOC_POOL: hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); break; } - trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); + trace_habanalabs_dma_free(&(hdev)->pdev->dev, store_cpu_addr, dma_handle, size, caller); } void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, @@ -170,18 +169,6 @@ void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); } -void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle, const char *caller) -{ - return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); -} - -void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, - const char *caller) -{ - hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); -} - void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle, const char *caller) { @@ -194,7 +181,46 @@ void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_ hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); } -int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) +{ + return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); +} + +void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) +{ + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr); +} + +int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int rc, i; + + rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir); + if (rc) + return rc; + + if (!trace_habanalabs_dma_map_page_enabled()) + return 0; + + for_each_sgtable_dma_sg(sgt, sg, i) + trace_habanalabs_dma_map_page(&(hdev)->pdev->dev, + page_to_phys(sg_page(sg)), + sg->dma_address - prop->device_dma_offset_for_host_access, +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length, +#else + sg->length, +#endif + dir, caller); + + return 0; +} + +int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; @@ -212,7 +238,31 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da return 0; } -void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int i; + + hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir); + + if (trace_habanalabs_dma_unmap_page_enabled()) { + for_each_sgtable_dma_sg(sgt, sg, i) + trace_habanalabs_dma_unmap_page(&(hdev)->pdev->dev, + page_to_phys(sg_page(sg)), + sg->dma_address - prop->device_dma_offset_for_host_access, +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length, +#else + sg->length, +#endif + dir, caller); + } +} + +void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; @@ -324,7 +374,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->reset_info.in_reset) { + if (hdev->device_fini_pending) { + status = HL_DEVICE_STATUS_MALFUNCTION; + } else if (hdev->reset_info.in_reset) { if (hdev->reset_info.in_compute_reset) status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; else @@ -352,9 +404,9 @@ bool hl_device_operational(struct hl_device *hdev, *status = current_status; switch (current_status) { + case HL_DEVICE_STATUS_MALFUNCTION: case HL_DEVICE_STATUS_IN_RESET: case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: - case HL_DEVICE_STATUS_MALFUNCTION: case HL_DEVICE_STATUS_NEEDS_RESET: return false; case HL_DEVICE_STATUS_OPERATIONAL: @@ -389,18 +441,20 @@ bool hl_ctrl_device_operational(struct hl_device *hdev, static void print_idle_status_mask(struct hl_device *hdev, const char *message, u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) { - u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {}; - - BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4); - - pad_width[3] = idle_mask[3] ? 16 : 0; - pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0; - pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0; - pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0; - - dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n", - message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2], - pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]); + if (idle_mask[3]) + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, + idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); + else if (idle_mask[2]) + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, + idle_mask[2], idle_mask[1], idle_mask[0]); + else if (idle_mask[1]) + dev_err(hdev->dev, "%s %s (mask %#llx_%016llx)\n", + dev_name(&hdev->pdev->dev), message, idle_mask[1], idle_mask[0]); + else + dev_err(hdev->dev, "%s %s (mask %#llx)\n", dev_name(&hdev->pdev->dev), message, + idle_mask[0]); } static void hpriv_release(struct kref *ref) @@ -416,13 +470,14 @@ static void hpriv_release(struct kref *ref) hdev->asic_funcs->send_device_activity(hdev, false); - put_pid(hpriv->taskpid); - hl_debugfs_remove_file(hpriv); mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); + /* There should be no memory buffers at this point and handles IDR can be destroyed */ + hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); + /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending * reset that waits for device release. */ @@ -431,7 +486,7 @@ static void hpriv_release(struct kref *ref) /* Check the device idle status and reset if not idle. * Skip it if already in reset, or if device is going to be reset in any case. */ - if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm) + if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm) device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); if (!device_is_idle) { @@ -453,14 +508,18 @@ static void hpriv_release(struct kref *ref) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); + put_pid(hpriv->taskpid); + if (reset_device) { hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); } else { /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ int rc = hdev->asic_funcs->scrub_device_mem(hdev); - if (rc) + if (rc) { dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); + hl_device_reset(hdev, HL_DRV_RESET_HARD); + } } /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different @@ -492,41 +551,77 @@ int hl_hpriv_put(struct hl_fpriv *hpriv) return kref_put(&hpriv->refcount, hpriv_release); } +static void print_device_in_use_info(struct hl_device *hdev, + struct hl_mem_mgr_fini_stats *mm_fini_stats, const char *message) +{ + u32 active_cs_num, dmabuf_export_cnt; + bool unknown_reason = true; + char buf[128]; + size_t size; + int offset; + + size = sizeof(buf); + offset = 0; + + active_cs_num = hl_get_active_cs_num(hdev); + if (active_cs_num) { + unknown_reason = false; + offset += scnprintf(buf + offset, size - offset, " [%u active CS]", active_cs_num); + } + + dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt); + if (dmabuf_export_cnt) { + unknown_reason = false; + offset += scnprintf(buf + offset, size - offset, " [%u exported dma-buf]", + dmabuf_export_cnt); + } + + if (mm_fini_stats->n_busy_cb) { + unknown_reason = false; + offset += scnprintf(buf + offset, size - offset, " [%u live CB handles]", + mm_fini_stats->n_busy_cb); + } + + if (unknown_reason) + scnprintf(buf + offset, size - offset, " [unknown reason]"); + + dev_notice(hdev->dev, "%s%s\n", message, buf); +} + /* - * hl_device_release - release function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure + * hl_device_release() - release function for habanalabs device. + * @ddev: pointer to DRM device structure. + * @file: pointer to DRM file private data structure. * * Called when process closes an habanalabs device */ -static int hl_device_release(struct inode *inode, struct file *filp) +void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv) { - struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev = hpriv->hdev; - - filp->private_data = NULL; + struct hl_fpriv *hpriv = file_priv->driver_priv; + struct hl_device *hdev = to_hl_device(ddev); + struct hl_mem_mgr_fini_stats mm_fini_stats; if (!hdev) { pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); put_pid(hpriv->taskpid); - return 0; } hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); - hl_mem_mgr_fini(&hpriv->mem_mgr); + + /* Memory buffers might be still in use at this point and thus the handles IDR destruction + * is postponed to hpriv_release(). + */ + hl_mem_mgr_fini(&hpriv->mem_mgr, &mm_fini_stats); hdev->compute_ctx_in_release = 1; if (!hl_hpriv_put(hpriv)) { - dev_notice(hdev->dev, "User process closed FD but device still in use\n"); + print_device_in_use_info(hdev, &mm_fini_stats, + "User process closed FD but device still in use"); hl_device_reset(hdev, HL_DRV_RESET_HARD); } - hdev->last_open_session_duration_jif = - jiffies - hdev->last_successful_open_jif; - - return 0; + hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; } static int hl_device_release_ctrl(struct inode *inode, struct file *filp) @@ -545,11 +640,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: - /* release the eventfd */ - if (hpriv->notifier_event.eventfd) - eventfd_ctx_put(hpriv->notifier_event.eventfd); - - mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -557,18 +647,8 @@ out: return 0; } -/* - * hl_mmap - mmap function for habanalabs device - * - * @*filp: pointer to file structure - * @*vma: pointer to vm_area_struct of the process - * - * Called when process does an mmap on habanalabs device. Call the relevant mmap - * function at the end of the common code. - */ -static int hl_mmap(struct file *filp, struct vm_area_struct *vma) +static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { - struct hl_fpriv *hpriv = filp->private_data; struct hl_device *hdev = hpriv->hdev; unsigned long vm_pgoff; @@ -591,14 +671,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } -static const struct file_operations hl_ops = { - .owner = THIS_MODULE, - .open = hl_device_open, - .release = hl_device_release, - .mmap = hl_mmap, - .unlocked_ioctl = hl_ioctl, - .compat_ioctl = hl_ioctl -}; +/* + * hl_mmap - mmap function for habanalabs device + * + * @*filp: pointer to file structure + * @*vma: pointer to vm_area_struct of the process + * + * Called when process does an mmap on habanalabs device. Call the relevant mmap + * function at the end of the common code. + */ +int hl_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *file_priv = filp->private_data; + struct hl_fpriv *hpriv = file_priv->driver_priv; + + return __hl_mmap(hpriv, vma); +} static const struct file_operations hl_ctrl_ops = { .owner = THIS_MODULE, @@ -617,16 +705,16 @@ static void device_release_func(struct device *dev) * device_init_cdev - Initialize cdev and device for habanalabs device * * @hdev: pointer to habanalabs device structure - * @hclass: pointer to the class object of the device + * @class: pointer to the class object of the device * @minor: minor number of the specific device - * @fpos: file operations to install for this device + * @fops: file operations to install for this device * @name: name of the device as it will appear in the filesystem * @cdev: pointer to the char device object that will be initialized * @dev: pointer to the device object that will be initialized * * Initialize a cdev and a Linux device for habanalabs's device. */ -static int device_init_cdev(struct hl_device *hdev, struct class *hclass, +static int device_init_cdev(struct hl_device *hdev, const struct class *class, int minor, const struct file_operations *fops, char *name, struct cdev *cdev, struct device **dev) @@ -640,7 +728,7 @@ static int device_init_cdev(struct hl_device *hdev, struct class *hclass, device_initialize(*dev); (*dev)->devt = MKDEV(hdev->major, minor); - (*dev)->class = hclass; + (*dev)->class = class; (*dev)->release = device_release_func; dev_set_drvdata(*dev, hdev); dev_set_name(*dev, "%s", name); @@ -648,53 +736,55 @@ static int device_init_cdev(struct hl_device *hdev, struct class *hclass, return 0; } -static int device_cdev_sysfs_add(struct hl_device *hdev) +static int cdev_sysfs_debugfs_add(struct hl_device *hdev) { + const struct class *accel_class = hdev->drm.accel->kdev->class; + char name[32]; int rc; - rc = cdev_device_add(&hdev->cdev, hdev->dev); - if (rc) { - dev_err(hdev->dev, - "failed to add a char device to the system\n"); + hdev->cdev_idx = hdev->drm.accel->index; + + /* Initialize cdev and device structures for the control device */ + snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx); + rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name, + &hdev->cdev_ctrl, &hdev->dev_ctrl); + if (rc) return rc; - } rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); if (rc) { - dev_err(hdev->dev, - "failed to add a control char device to the system\n"); - goto delete_cdev_device; + dev_err(hdev->dev_ctrl, + "failed to add an accel control char device to the system\n"); + goto free_ctrl_device; } - /* hl_sysfs_init() must be done after adding the device to the system */ rc = hl_sysfs_init(hdev); if (rc) { dev_err(hdev->dev, "failed to initialize sysfs\n"); goto delete_ctrl_cdev_device; } - hdev->cdev_sysfs_created = true; + hl_debugfs_add_device(hdev); + + hdev->cdev_sysfs_debugfs_created = true; return 0; delete_ctrl_cdev_device: cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); -delete_cdev_device: - cdev_device_del(&hdev->cdev, hdev->dev); +free_ctrl_device: + put_device(hdev->dev_ctrl); return rc; } -static void device_cdev_sysfs_del(struct hl_device *hdev) +static void cdev_sysfs_debugfs_remove(struct hl_device *hdev) { - if (!hdev->cdev_sysfs_created) - goto put_devices; + if (!hdev->cdev_sysfs_debugfs_created) + return; hl_sysfs_fini(hdev); - cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); - cdev_device_del(&hdev->cdev, hdev->dev); -put_devices: - put_device(hdev->dev); + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); put_device(hdev->dev_ctrl); } @@ -727,20 +817,20 @@ static void device_hard_reset_pending(struct work_struct *work) } queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, - msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); + secs_to_jiffies(HL_PENDING_RESET_PER_SEC)); } } static void device_release_watchdog_func(struct work_struct *work) { - struct hl_device_reset_work *device_release_watchdog_work = - container_of(work, struct hl_device_reset_work, reset_work.work); - struct hl_device *hdev = device_release_watchdog_work->hdev; + struct hl_device_reset_work *watchdog_work = + container_of(work, struct hl_device_reset_work, reset_work.work); + struct hl_device *hdev = watchdog_work->hdev; u32 flags; - dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n"); + dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n"); - flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR; + flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR; hl_device_reset(hdev, flags); } @@ -779,6 +869,13 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI2C: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name)); + break; + case ASIC_GAUDI2D: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2D", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -805,7 +902,7 @@ static int device_early_init(struct hl_device *hdev) } for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { - snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i); + snprintf(workq_name, 32, "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i); hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); if (hdev->cq_wq[i] == NULL) { dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); @@ -814,14 +911,16 @@ static int device_early_init(struct hl_device *hdev) } } - hdev->eq_wq = create_singlethread_workqueue("hl-events"); + snprintf(workq_name, 32, "hl%u-events", hdev->cdev_idx); + hdev->eq_wq = create_singlethread_workqueue(workq_name); if (hdev->eq_wq == NULL) { dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); rc = -ENOMEM; goto free_cq_wq; } - hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0); + snprintf(workq_name, 32, "hl%u-cs-completions", hdev->cdev_idx); + hdev->cs_cmplt_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); if (!hdev->cs_cmplt_wq) { dev_err(hdev->dev, "Failed to allocate CS completions workqueue\n"); @@ -829,7 +928,8 @@ static int device_early_init(struct hl_device *hdev) goto free_eq_wq; } - hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0); + snprintf(workq_name, 32, "hl%u-ts-free-obj", hdev->cdev_idx); + hdev->ts_free_obj_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); if (!hdev->ts_free_obj_wq) { dev_err(hdev->dev, "Failed to allocate Timestamp registration free workqueue\n"); @@ -837,15 +937,15 @@ static int device_early_init(struct hl_device *hdev) goto free_cs_cmplt_wq; } - hdev->prefetch_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); + snprintf(workq_name, 32, "hl%u-prefetch", hdev->cdev_idx); + hdev->prefetch_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); if (!hdev->prefetch_wq) { dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); rc = -ENOMEM; goto free_ts_free_wq; } - hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), - GFP_KERNEL); + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; goto free_prefetch_wq; @@ -857,13 +957,16 @@ static int device_early_init(struct hl_device *hdev) hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); - hdev->reset_wq = create_singlethread_workqueue("hl_device_reset"); + snprintf(workq_name, 32, "hl%u_device_reset", hdev->cdev_idx); + hdev->reset_wq = create_singlethread_workqueue(workq_name); if (!hdev->reset_wq) { rc = -ENOMEM; dev_err(hdev->dev, "Failed to create device reset WQ\n"); goto free_cb_mgr; } + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); hdev->device_reset_work.hdev = hdev; hdev->device_fini_pending = 0; @@ -886,7 +989,8 @@ static int device_early_init(struct hl_device *hdev) return 0; free_cb_mgr: - hl_mem_mgr_fini(&hdev->kernel_mem_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL); + hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); free_prefetch_wq: @@ -929,7 +1033,8 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->clk_throttling.lock); - hl_mem_mgr_fini(&hdev->kernel_mem_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr, NULL); + hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); kfree(hdev->hl_chip_info); @@ -949,21 +1054,77 @@ static void device_early_fini(struct hl_device *hdev) hdev->asic_funcs->early_fini(hdev); } +static bool is_pci_link_healthy(struct hl_device *hdev) +{ + u16 device_id; + + if (!hdev->pdev) + return false; + + pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id); + + return (device_id == hdev->pdev->device); +} + +static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) +{ + struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info; + u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1; + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!prop->cpucp_info.eq_health_check_supported) + return true; + + if (!hdev->eq_heartbeat_received) { + dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); + + dev_err(hdev->dev, + "EQ: {CI %u, HB counter %u, last HB time: %ptTs}, PQ: {PI: %u, CI: %u (%u), last HB time: %ptTs}\n", + hdev->event_queue.ci, + heartbeat_debug_info->heartbeat_event_counter, + &hdev->heartbeat_debug_info.last_eq_heartbeat_ts, + hdev->kernel_queues[cpu_q_id].pi, + atomic_read(&hdev->kernel_queues[cpu_q_id].ci), + atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask, + &hdev->heartbeat_debug_info.last_pq_heartbeat_ts); + + hl_eq_dump(hdev, &hdev->event_queue); + + return false; + } + + hdev->eq_heartbeat_received = false; + + return true; +} + static void hl_device_heartbeat(struct work_struct *work) { struct hl_device *hdev = container_of(work, struct hl_device, work_heartbeat.work); + struct hl_info_fw_err_info info = {0}; + u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; - if (!hl_device_operational(hdev, NULL)) + /* Start heartbeat checks only after driver has enabled events from FW */ + if (!hl_device_operational(hdev, NULL) || !hdev->init_done) goto reschedule; - if (!hdev->asic_funcs->send_heartbeat(hdev)) + /* + * For EQ health check need to check if driver received the heartbeat eq event + * in order to validate the eq is working. + * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule. + */ + if (hl_device_eq_heartbeat_received(hdev) && (!hdev->asic_funcs->send_heartbeat(hdev))) goto reschedule; if (hl_device_operational(hdev, NULL)) - dev_err(hdev->dev, "Device heartbeat failed!\n"); + dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n", + is_pci_link_healthy(hdev) ? "healthy" : "broken"); - hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT); + info.err_type = HL_INFO_FW_HEARTBEAT_ERR; + info.event_mask = &event_mask; + hl_handle_fw_err(hdev, &info); + hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask); return; @@ -1007,13 +1168,6 @@ static int device_late_init(struct hl_device *hdev) } hdev->high_pll = hdev->asic_prop.high_pll; - - if (hdev->heartbeat) { - INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); - schedule_delayed_work(&hdev->work_heartbeat, - usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); - } - hdev->late_init_done = true; return 0; @@ -1030,9 +1184,6 @@ static void device_late_fini(struct hl_device *hdev) if (!hdev->late_init_done) return; - if (hdev->heartbeat) - cancel_delayed_work_sync(&hdev->work_heartbeat); - if (hdev->asic_funcs->late_fini) hdev->asic_funcs->late_fini(hdev); @@ -1120,11 +1271,25 @@ static void take_release_locks(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_ctrl_list_lock); } +static void hl_abort_waiting_for_completions(struct hl_device *hdev) +{ + hl_abort_waiting_for_cs_completions(hdev); + + /* Release all pending user interrupts, each pending user interrupt + * holds a reference to a user context. + */ + hl_release_pending_user_interrupts(hdev); +} + static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, bool skip_wq_flush) { - if (hard_reset) + if (hard_reset) { + if (hdev->heartbeat) + cancel_delayed_work_sync(&hdev->work_heartbeat); + device_late_fini(hdev); + } /* * Halt the engines and disable interrupts so we won't get any more @@ -1139,10 +1304,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r /* flush the MMU prefetch workqueue */ flush_workqueue(hdev->prefetch_wq); - /* Release all pending user interrupts, each pending user interrupt - * holds a reference to user context - */ - hl_release_pending_user_interrupts(hdev); + hl_abort_waiting_for_completions(hdev); } /* @@ -1234,7 +1396,6 @@ int hl_device_resume(struct hl_device *hdev) return 0; disable_device: - pci_clear_master(hdev->pdev); pci_disable_device(hdev->pdev); return rc; @@ -1243,18 +1404,18 @@ disable_device: static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) { struct task_struct *task = NULL; - struct list_head *fd_list; - struct hl_fpriv *hpriv; - struct mutex *fd_lock; + struct list_head *hpriv_list; + struct hl_fpriv *hpriv; + struct mutex *hpriv_lock; u32 pending_cnt; - fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; - fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; /* Giving time for user to close FD, and for processes that are inside * hl_device_open to finish */ - if (!list_empty(fd_list)) + if (!list_empty(hpriv_list)) ssleep(1); if (timeout) { @@ -1270,12 +1431,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool } } - mutex_lock(fd_lock); + mutex_lock(hpriv_lock); /* This section must be protected because we are dereferencing * pointers that are freed if the process exits */ - list_for_each_entry(hpriv, fd_list, dev_node) { + list_for_each_entry(hpriv, hpriv_list, dev_node) { task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); if (task) { dev_info(hdev->dev, "Killing user process pid=%d\n", @@ -1285,17 +1446,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool put_task_struct(task); } else { - /* - * If we got here, it means that process was killed from outside the driver - * right after it started looping on fd_list and before get_pid_task, thus - * we don't need to kill it. - */ dev_dbg(hdev->dev, - "Can't get task struct for user process, assuming process was killed from outside the driver\n"); + "Can't get task struct for user process %d, process was killed from outside the driver\n", + pid_nr(hpriv->taskpid)); } } - mutex_unlock(fd_lock); + mutex_unlock(hpriv_lock); /* * We killed the open users, but that doesn't mean they are closed. @@ -1307,7 +1464,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool */ wait_for_processes: - while ((!list_empty(fd_list)) && (pending_cnt)) { + while ((!list_empty(hpriv_list)) && (pending_cnt)) { dev_dbg(hdev->dev, "Waiting for all unmap operations to finish before hard reset\n"); @@ -1317,7 +1474,7 @@ wait_for_processes: } /* All processes exited successfully */ - if (list_empty(fd_list)) + if (list_empty(hpriv_list)) return 0; /* Give up waiting for processes to exit */ @@ -1331,17 +1488,44 @@ wait_for_processes: static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) { - struct list_head *fd_list; + struct list_head *hpriv_list; struct hl_fpriv *hpriv; - struct mutex *fd_lock; + struct mutex *hpriv_lock; - fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; - fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; - mutex_lock(fd_lock); - list_for_each_entry(hpriv, fd_list, dev_node) + mutex_lock(hpriv_lock); + list_for_each_entry(hpriv, hpriv_list, dev_node) hpriv->hdev = NULL; - mutex_unlock(fd_lock); + mutex_unlock(hpriv_lock); +} + +static void send_disable_pci_access(struct hl_device *hdev, u32 flags) +{ + /* If reset is due to heartbeat, device CPU is no responsive in + * which case no point sending PCI disable message to it. + */ + if ((flags & HL_DRV_RESET_HARD) && + !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { + /* Disable PCI access from device F/W so he won't send + * us additional interrupts. We disable MSI/MSI-X at + * the halt_engines function and we can't have the F/W + * sending us interrupts after that. We need to disable + * the access here because if the device is marked + * disable, the message won't be send. Also, in case + * of heartbeat, the device CPU is marked as disable + * so this message won't be sent + */ + if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) + return; + + /* disable_irq also generates sync irq, this verifies that last EQs are handled + * before disabled is set. The IRQ will be enabled again in request_irq call. + */ + if (hdev->cpu_queues_enable) + disable_irq(pci_irq_vector(hdev->pdev, hdev->asic_prop.eq_interrupt_id)); + } } static void handle_reset_trigger(struct hl_device *hdev, u32 flags) @@ -1382,28 +1566,31 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) } else { hdev->reset_info.reset_trigger_repeated = 1; } +} - /* If reset is due to heartbeat, device CPU is no responsive in - * which case no point sending PCI disable message to it. - * - * If F/W is performing the reset, no need to send it a message to disable - * PCI access +static void reset_heartbeat_debug_info(struct hl_device *hdev) +{ + hdev->heartbeat_debug_info.last_pq_heartbeat_ts = 0; + hdev->heartbeat_debug_info.last_eq_heartbeat_ts = 0; + hdev->heartbeat_debug_info.heartbeat_event_counter = 0; +} + +static inline void device_heartbeat_schedule(struct hl_device *hdev) +{ + if (!hdev->heartbeat) + return; + + reset_heartbeat_debug_info(hdev); + + /* + * Before scheduling the heartbeat driver will check if eq event has received. + * for the first schedule we need to set the indication as true then for the next + * one this indication will be true only if eq event was sent by FW. */ - if ((flags & HL_DRV_RESET_HARD) && - !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { - /* Disable PCI access from device F/W so he won't send - * us additional interrupts. We disable MSI/MSI-X at - * the halt_engines function and we can't have the F/W - * sending us interrupts after that. We need to disable - * the access here because if the device is marked - * disable, the message won't be send. Also, in case - * of heartbeat, the device CPU is marked as disable - * so this message won't be sent - */ - if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) - dev_warn(hdev->dev, - "Failed to disable PCI access by F/W\n"); - } + hdev->eq_heartbeat_received = true; + + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); } /* @@ -1424,12 +1611,11 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) */ int hl_device_reset(struct hl_device *hdev, u32 flags) { - bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false, schedule_hard_reset = false, - delay_reset, from_dev_release, from_watchdog_thread; + bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release, + schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; - int i, rc; + int i, rc, hw_fini_rc; if (!hdev->init_done) { dev_err(hdev->dev, "Can't reset before initialization is done\n"); @@ -1442,6 +1628,12 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); delay_reset = !!(flags & HL_DRV_RESET_DELAY); from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); + reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; + + if (hdev->cpld_shutdown) { + dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n"); + return -EIO; + } if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); @@ -1449,30 +1641,26 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) } if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { - hard_instead_soft = true; + dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n"); hard_reset = true; } - if (hdev->reset_upon_device_release && from_dev_release) { + if (reset_upon_device_release) { if (hard_reset) { dev_crit(hdev->dev, "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); return -EINVAL; } - reset_upon_device_release = true; - goto do_reset; } if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { - hard_instead_soft = true; + dev_dbg(hdev->dev, + "asic doesn't allow inference soft reset - do hard-reset instead\n"); hard_reset = true; } - if (hard_instead_soft) - dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n"); - do_reset: /* Re-entry of reset thread */ if (from_hard_reset_thread && hdev->process_kill_trial_cnt) @@ -1480,14 +1668,14 @@ do_reset: /* * Prevent concurrency in this function - only one reset should be - * done at any given time. Only need to perform this if we didn't - * get from the dedicated hard reset thread + * done at any given time. We need to perform this only if we didn't + * get here from a dedicated hard reset thread. */ if (!from_hard_reset_thread) { /* Block future CS/VM/JOB completion operations */ spin_lock(&hdev->reset_info.lock); if (hdev->reset_info.in_reset) { - /* We only allow scheduling of a hard reset during compute reset */ + /* We allow scheduling of a hard reset only during a compute reset */ if (hard_reset && hdev->reset_info.in_compute_reset) hdev->reset_info.hard_reset_schedule_flags = flags; spin_unlock(&hdev->reset_info.lock); @@ -1505,15 +1693,17 @@ do_reset: /* Cancel the device release watchdog work if required. * In case of reset-upon-device-release while the release watchdog work is - * scheduled, do hard-reset instead of compute-reset. + * scheduled due to a hard-reset, do hard-reset instead of compute-reset. */ if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { + struct hl_device_reset_work *watchdog_work = + &hdev->device_release_watchdog_work; + hdev->reset_info.watchdog_active = 0; if (!from_watchdog_thread) - cancel_delayed_work_sync( - &hdev->device_release_watchdog_work.reset_work); + cancel_delayed_work_sync(&watchdog_work->reset_work); - if (from_dev_release) { + if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) { hdev->reset_info.in_compute_reset = 0; flags |= HL_DRV_RESET_HARD; flags &= ~HL_DRV_RESET_DEV_RELEASE; @@ -1524,7 +1714,9 @@ do_reset: if (delay_reset) usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); +escalate_reset_flow: handle_reset_trigger(hdev, flags); + send_disable_pci_access(hdev, flags); /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; @@ -1539,7 +1731,6 @@ do_reset: dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); } -again: if ((hard_reset) && (!from_hard_reset_thread)) { hdev->reset_info.hard_reset_pending = true; @@ -1592,7 +1783,7 @@ kill_processes: } /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); + hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); if (hard_reset) { hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; @@ -1619,20 +1810,26 @@ kill_processes: hl_ctx_put(ctx); } + if (hw_fini_rc) { + rc = hw_fini_rc; + goto out_err; + } /* Finished tear-down, starting to re-initialize */ if (hard_reset) { hdev->device_cpu_disabled = false; hdev->reset_info.hard_reset_pending = false; + /* + * Put the device in an unusable state if there are 2 back to back resets due to + * fatal errors. + */ if (hdev->reset_info.reset_trigger_repeated && - (hdev->reset_info.prev_reset_trigger == - HL_DRV_RESET_FW_FATAL_ERR)) { - /* if there 2 back to back resets from FW, - * ensure driver puts the driver in a unusable state - */ + (hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR || + hdev->reset_info.prev_reset_trigger == + HL_DRV_RESET_HEARTBEAT)) { dev_crit(hdev->dev, - "%s Consecutive FW fatal errors received, stopping hard reset\n", + "%s Consecutive fatal errors, stopping hard reset\n", dev_name(&(hdev)->pdev->dev)); rc = -EIO; goto out_err; @@ -1770,6 +1967,8 @@ kill_processes: if (hard_reset) { hdev->reset_info.hard_reset_cnt++; + device_heartbeat_schedule(hdev); + /* After reset is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events * and if those events are fatal, we won't know about it and @@ -1784,10 +1983,8 @@ kill_processes: dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); flags = hdev->reset_info.hard_reset_schedule_flags; hdev->reset_info.hard_reset_schedule_flags = 0; - hdev->disabled = true; hard_reset = true; - handle_reset_trigger(hdev, flags); - goto again; + goto escalate_reset_flow; } } @@ -1804,20 +2001,19 @@ out_err: "%s Failed to reset! Device is NOT usable\n", dev_name(&(hdev)->pdev->dev)); hdev->reset_info.hard_reset_cnt++; - } else if (reset_upon_device_release) { - spin_unlock(&hdev->reset_info.lock); - dev_err(hdev->dev, "Failed to reset device after user release\n"); - flags |= HL_DRV_RESET_HARD; - flags &= ~HL_DRV_RESET_DEV_RELEASE; - hard_reset = true; - goto again; } else { + if (reset_upon_device_release) { + dev_err(hdev->dev, "Failed to reset device after user release\n"); + flags &= ~HL_DRV_RESET_DEV_RELEASE; + } else { + dev_err(hdev->dev, "Failed to do compute reset\n"); + hdev->reset_info.compute_reset_cnt++; + } + spin_unlock(&hdev->reset_info.lock); - dev_err(hdev->dev, "Failed to do compute reset\n"); - hdev->reset_info.compute_reset_cnt++; flags |= HL_DRV_RESET_HARD; hard_reset = true; - goto again; + goto escalate_reset_flow; } hdev->reset_info.in_reset = 0; @@ -1840,10 +2036,6 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) { struct hl_ctx *ctx = NULL; - /* Device release watchdog is only for hard reset */ - if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset) - goto device_reset; - /* F/W reset cannot be postponed */ if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) goto device_reset; @@ -1855,7 +2047,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) } ctx = hl_get_compute_ctx(hdev); - if (!ctx || !ctx->hpriv->notifier_event.eventfd) + if (!ctx) + goto device_reset; + + /* + * There is no point in postponing the reset if user is not registered for events. + * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it + * just implies that user has unregistered as part of handling a previous event. In this + * case an immediate reset is not required. + */ + if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active) goto device_reset; /* Schedule the device release watchdog work unless reset is already in progress or if the @@ -1867,14 +2068,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) goto device_reset; } - if (hdev->reset_info.watchdog_active) + if (hdev->reset_info.watchdog_active) { + hdev->device_release_watchdog_work.flags |= flags; goto out; + } hdev->device_release_watchdog_work.flags = flags; - dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n", + dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", hdev->device_release_watchdog_timeout_sec); schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, - msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); + secs_to_jiffies(hdev->device_release_watchdog_timeout_sec)); hdev->reset_info.watchdog_active = 1; out: spin_unlock(&hdev->reset_info.lock); @@ -1883,7 +2086,7 @@ out: hl_ctx_put(ctx); - hl_abort_waitings_for_completion(hdev); + hl_abort_waiting_for_completions(hdev); return 0; @@ -1893,7 +2096,7 @@ device_reset: if (ctx) hl_ctx_put(ctx); - return hl_device_reset(hdev, flags); + return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD); } static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) @@ -1902,7 +2105,7 @@ static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 notifier_event->events_mask |= event_mask; if (notifier_event->eventfd) - eventfd_signal(notifier_event->eventfd, 1); + eventfd_signal(notifier_event->eventfd); mutex_unlock(¬ifier_event->lock); } @@ -1929,14 +2132,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) hl_notifier_event_send(&hpriv->notifier_event, event_mask); mutex_unlock(&hdev->fpriv_list_lock); - - /* control device */ - mutex_lock(&hdev->fpriv_ctrl_list_lock); - - list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) - hl_notifier_event_send(&hpriv->notifier_event, event_mask); - - mutex_unlock(&hdev->fpriv_ctrl_list_lock); } /* @@ -1948,48 +2143,17 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) * ASIC specific initialization functions. Finally, create the cdev and the * Linux device to expose it to the user */ -int hl_device_init(struct hl_device *hdev, struct class *hclass) +int hl_device_init(struct hl_device *hdev) { int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; - char *name; - bool add_cdev_sysfs_on_err = false; - - hdev->cdev_idx = hdev->id / 2; - - name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); - if (!name) { - rc = -ENOMEM; - goto out_disabled; - } - - /* Initialize cdev and device structures */ - rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, - &hdev->cdev, &hdev->dev); - - kfree(name); - - if (rc) - goto out_disabled; - - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); - if (!name) { - rc = -ENOMEM; - goto free_dev; - } - - /* Initialize cdev and device structures for control device */ - rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, - name, &hdev->cdev_ctrl, &hdev->dev_ctrl); - - kfree(name); - - if (rc) - goto free_dev; + struct hl_ts_free_jobs *free_jobs_data; + bool expose_interfaces_on_err = false; + void *p; /* Initialize ASIC function pointers and perform early init */ rc = device_early_init(hdev); if (rc) - goto free_dev_ctrl; + goto out_disabled; user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + hdev->asic_prop.user_interrupt_count; @@ -2001,15 +2165,43 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) rc = -ENOMEM; goto early_fini; } + + /* Timestamp records supported only if CQ supported in device */ + if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { + for (i = 0 ; i < user_interrupt_cnt ; i++) { + p = vzalloc(TIMESTAMP_FREE_NODES_NUM * + sizeof(struct timestamp_reg_free_node)); + if (!p) { + rc = -ENOMEM; + goto free_usr_intr_mem; + } + free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data; + free_jobs_data->free_nodes_pool = p; + free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; + free_jobs_data->next_avail_free_node_idx = 0; + } + } + } + + free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data; + p = vzalloc(TIMESTAMP_FREE_NODES_NUM * + sizeof(struct timestamp_reg_free_node)); + if (!p) { + rc = -ENOMEM; + goto free_usr_intr_mem; } + free_jobs_data->free_nodes_pool = p; + free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; + free_jobs_data->next_avail_free_node_idx = 0; + /* * Start calling ASIC initialization. First S/W then H/W and finally * late init */ rc = hdev->asic_funcs->sw_init(hdev); if (rc) - goto free_usr_intr_mem; + goto free_common_usr_intr_mem; /* initialize completion structure for multi CS wait */ @@ -2096,16 +2288,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; - hl_debugfs_add_device(hdev); - /* debugfs nodes are created in hl_ctx_init so it must be called after - * hl_debugfs_add_device. + rc = hl_debugfs_device_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize debugfs entry structure\n"); + kfree(hdev->kernel_ctx); + goto mmu_fini; + } + + /* The debugfs entry structure is accessed in hl_ctx_init(), so it must be called after + * hl_debugfs_device_init(). */ rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { dev_err(hdev->dev, "failed to initialize kernel context\n"); kfree(hdev->kernel_ctx); - goto remove_device_from_debugfs; + goto debugfs_device_fini; } rc = hl_cb_pool_init(hdev); @@ -2121,11 +2319,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) } /* - * From this point, override rc (=0) in case of an error to allow - * debugging (by adding char devices and create sysfs nodes as part of - * the error flow). + * From this point, override rc (=0) in case of an error to allow debugging + * (by adding char devices and creating sysfs/debugfs files as part of the error flow). */ - add_cdev_sysfs_on_err = true; + expose_interfaces_on_err = true; /* Device is now enabled as part of the initialization requires * communication with the device firmware to get information that @@ -2167,15 +2364,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) } /* - * Expose devices and sysfs nodes to user. - * From here there is no need to add char devices and create sysfs nodes - * in case of an error. + * Expose devices and sysfs/debugfs files to user. + * From here there is no need to expose them in case of an error. */ - add_cdev_sysfs_on_err = false; - rc = device_cdev_sysfs_add(hdev); + expose_interfaces_on_err = false; + + rc = drm_dev_register(&hdev->drm, 0); if (rc) { - dev_err(hdev->dev, - "Failed to add char devices and sysfs nodes\n"); + dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc); + rc = 0; + goto out_disabled; + } + + rc = cdev_sysfs_debugfs_add(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n"); rc = 0; goto out_disabled; } @@ -2200,12 +2403,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto out_disabled; } + /* Scheduling the EQ heartbeat thread must come after driver is done with all + * initializations, as we want to make sure the FW gets enough time to be prepared + * to respond to heartbeat packets. + */ + device_heartbeat_schedule(hdev); + dev_notice(hdev->dev, "Successfully added device %s to habanalabs driver\n", dev_name(&(hdev)->pdev->dev)); - hdev->init_done = true; - /* After initialization is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events and if * those events are fatal, we won't know about it and the device will @@ -2213,6 +2420,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) */ hdev->asic_funcs->enable_events_from_fw(hdev); + hdev->init_done = true; + return 0; cb_pool_fini: @@ -2221,8 +2430,8 @@ release_ctx: if (hl_ctx_put(hdev->kernel_ctx) != 1) dev_err(hdev->dev, "kernel ctx is still alive on initialization failure\n"); -remove_device_from_debugfs: - hl_debugfs_remove_device(hdev); +debugfs_device_fini: + hl_debugfs_device_fini(hdev); mmu_fini: hl_mmu_fini(hdev); eq_fini: @@ -2237,25 +2446,28 @@ hw_queues_destroy: hl_hw_queues_destroy(hdev); sw_fini: hdev->asic_funcs->sw_fini(hdev); +free_common_usr_intr_mem: + vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); free_usr_intr_mem: - kfree(hdev->user_interrupt); + if (user_interrupt_cnt) { + for (i = 0 ; i < user_interrupt_cnt ; i++) { + if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool) + break; + vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); + } + kfree(hdev->user_interrupt); + } early_fini: device_early_fini(hdev); -free_dev_ctrl: - put_device(hdev->dev_ctrl); -free_dev: - put_device(hdev->dev); out_disabled: hdev->disabled = true; - if (add_cdev_sysfs_on_err) - device_cdev_sysfs_add(hdev); - if (hdev->pdev) - dev_err(&hdev->pdev->dev, - "Failed to initialize hl%d. Device %s is NOT usable !\n", - hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); - else - pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n", - hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); + if (expose_interfaces_on_err) { + drm_dev_register(&hdev->drm, 0); + cdev_sysfs_debugfs_add(hdev); + } + + pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n", + hdev->cdev_idx, dev_name(&hdev->pdev->dev)); return rc; } @@ -2269,12 +2481,13 @@ out_disabled: */ void hl_device_fini(struct hl_device *hdev) { + u32 user_interrupt_cnt; bool device_in_reset; ktime_t timeout; u64 reset_sec; int i, rc; - dev_info(hdev->dev, "Removing device\n"); + dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev)); hdev->device_fini_pending = 1; flush_delayed_work(&hdev->device_reset_work.reset_work); @@ -2350,21 +2563,31 @@ void hl_device_fini(struct hl_device *hdev) hdev->process_kill_trial_cnt = 0; rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); if (rc) { - dev_crit(hdev->dev, "Failed to kill all open processes\n"); + dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc); device_disable_open_processes(hdev, false); } hdev->process_kill_trial_cnt = 0; rc = device_kill_open_processes(hdev, 0, true); if (rc) { - dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); + dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc); device_disable_open_processes(hdev, true); } hl_cb_pool_fini(hdev); /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) + dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); + + /* Reset the H/W (if it accessible). It will be in idle state after this returns */ + if (!hdev->cpld_shutdown) { + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) + dev_err(hdev->dev, + "hw_fini failed in device fini while removing device %d\n", rc); + } hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; @@ -2372,8 +2595,6 @@ void hl_device_fini(struct hl_device *hdev) if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) dev_err(hdev->dev, "kernel ctx is still alive\n"); - hl_debugfs_remove_device(hdev); - hl_dec_fini(hdev); hl_vm_fini(hdev); @@ -2389,7 +2610,20 @@ void hl_device_fini(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); kfree(hdev->completion_queue); - kfree(hdev->user_interrupt); + + user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + + hdev->asic_prop.user_interrupt_count; + + if (user_interrupt_cnt) { + if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { + for (i = 0 ; i < user_interrupt_cnt ; i++) + vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); + } + + kfree(hdev->user_interrupt); + } + + vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); hl_hw_queues_destroy(hdev); @@ -2398,8 +2632,11 @@ void hl_device_fini(struct hl_device *hdev) device_early_fini(hdev); - /* Hide devices and sysfs nodes from user */ - device_cdev_sysfs_del(hdev); + /* Hide devices and sysfs/debugfs files from user */ + cdev_sysfs_debugfs_remove(hdev); + drm_dev_unregister(&hdev->drm); + + hl_debugfs_device_fini(hdev); pr_info("removed device successfully\n"); } @@ -2422,7 +2659,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg) u32 val = readl(hdev->rmmio + reg); if (unlikely(trace_habanalabs_rreg32_enabled())) - trace_habanalabs_rreg32(hdev->dev, reg, val); + trace_habanalabs_rreg32(&(hdev)->pdev->dev, reg, val); return val; } @@ -2440,7 +2677,7 @@ inline u32 hl_rreg(struct hl_device *hdev, u32 reg) inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) { if (unlikely(trace_habanalabs_wreg32_enabled())) - trace_habanalabs_wreg32(hdev->dev, reg, val); + trace_habanalabs_wreg32(&(hdev)->pdev->dev, reg, val); writel(val, hdev->rmmio + reg); } @@ -2566,3 +2803,166 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_ if (event_mask) *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; } + +static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id) +{ + struct hw_err_info *info = &hdev->captured_err_info.hw_err; + + /* Capture only the first HW err */ + if (atomic_cmpxchg(&info->event_detected, 0, 1)) + return; + + info->event.timestamp = ktime_to_ns(ktime_get()); + info->event.event_id = event_id; + + info->event_info_available = true; +} + +void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask) +{ + hl_capture_hw_err(hdev, event_id); + + if (event_mask) + *event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR; +} + +static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info) +{ + struct fw_err_info *info = &hdev->captured_err_info.fw_err; + + /* Capture only the first FW error */ + if (atomic_cmpxchg(&info->event_detected, 0, 1)) + return; + + info->event.timestamp = ktime_to_ns(ktime_get()); + info->event.err_type = fw_info->err_type; + if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR) + info->event.event_id = fw_info->event_id; + + info->event_info_available = true; +} + +void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info) +{ + hl_capture_fw_err(hdev, info); + + if (info->event_mask) + *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; +} + +void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count) +{ + struct engine_err_info *info = &hdev->captured_err_info.engine_err; + + /* Capture only the first engine error */ + if (atomic_cmpxchg(&info->event_detected, 0, 1)) + return; + + info->event.timestamp = ktime_to_ns(ktime_get()); + info->event.engine_id = engine_id; + info->event.error_count = error_count; + info->event_info_available = true; +} + +void hl_enable_err_info_capture(struct hl_error_info *captured_err_info) +{ + vfree(captured_err_info->page_fault_info.user_mappings); + memset(captured_err_info, 0, sizeof(struct hl_error_info)); + atomic_set(&captured_err_info->cs_timeout.write_enable, 1); + captured_err_info->undef_opcode.write_enable = true; +} + +void hl_init_cpu_for_irq(struct hl_device *hdev) +{ +#ifdef CONFIG_NUMA + struct cpumask *available_mask = &hdev->irq_affinity_mask; + int numa_node = hdev->pdev->dev.numa_node, i; + static struct cpumask cpu_mask; + + if (numa_node < 0) + return; + + if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) { + dev_err(hdev->dev, "No available affinities in current numa node\n"); + return; + } + + /* Remove HT siblings */ + for_each_cpu(i, &cpu_mask) + cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask); +#endif +} + +void hl_set_irq_affinity(struct hl_device *hdev, int irq) +{ + if (cpumask_empty(&hdev->irq_affinity_mask)) { + dev_dbg(hdev->dev, "affinity mask is empty\n"); + return; + } + + if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask)) + dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq); +} + +void hl_eq_heartbeat_event_handle(struct hl_device *hdev) +{ + hdev->heartbeat_debug_info.heartbeat_event_counter++; + hdev->heartbeat_debug_info.last_eq_heartbeat_ts = ktime_get_real_seconds(); + hdev->eq_heartbeat_received = true; +} + +void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask) +{ + struct hl_clk_throttle *clk_throttle = &hdev->clk_throttling; + ktime_t zero_time = ktime_set(0, 0); + + mutex_lock(&clk_throttle->lock); + + switch (event_type) { + case EQ_EVENT_POWER_EVT_START: + clk_throttle->current_reason |= HL_CLK_THROTTLE_POWER; + clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_POWER; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; + dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); + break; + + case EQ_EVENT_POWER_EVT_END: + clk_throttle->current_reason &= ~HL_CLK_THROTTLE_POWER; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); + dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); + break; + + case EQ_EVENT_THERMAL_EVT_START: + clk_throttle->current_reason |= HL_CLK_THROTTLE_THERMAL; + clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_THERMAL; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; + *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); + break; + + case EQ_EVENT_THERMAL_EVT_END: + clk_throttle->current_reason &= ~HL_CLK_THROTTLE_THERMAL; + clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); + *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); + break; + + default: + dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); + break; + } + + mutex_unlock(&clk_throttle->lock); +} + +void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask) +{ + hl_handle_critical_hw_err(hdev, event_id, event_mask); + *event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + + /* Avoid any new accesses to the H/W */ + hdev->disabled = true; + hdev->cpld_shutdown = true; +} diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index da892d8fb3d6..eeb6b2a80fc7 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -6,8 +6,9 @@ */ #include "habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> +#include <linux/pci.h> #include <linux/firmware.h> #include <linux/crc32.h> #include <linux/slab.h> @@ -40,6 +41,31 @@ static char *comms_sts_str_arr[COMMS_STS_INVLD_LAST] = { [COMMS_STS_TIMEOUT_ERR] = __stringify(COMMS_STS_TIMEOUT_ERR), }; +/** + * hl_fw_version_cmp() - compares the FW version to a specific version + * + * @hdev: pointer to hl_device structure + * @major: major number of a reference version + * @minor: minor number of a reference version + * @subminor: sub-minor number of a reference version + * + * Return 1 if FW version greater than the reference version, -1 if it's + * smaller and 0 if versions are identical. + */ +int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor) +{ + if (hdev->fw_sw_major_ver != major) + return (hdev->fw_sw_major_ver > major) ? 1 : -1; + + if (hdev->fw_sw_minor_ver != minor) + return (hdev->fw_sw_minor_ver > minor) ? 1 : -1; + + if (hdev->fw_sw_sub_minor_ver != subminor) + return (hdev->fw_sw_sub_minor_ver > subminor) ? 1 : -1; + + return 0; +} + static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; @@ -71,41 +97,124 @@ free_fw_ver: return NULL; } -static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver) +/** + * extract_u32_until_given_char() - given a string of the format "<u32><char>*", extract the u32. + * @str: the given string + * @ver_num: the pointer to the extracted u32 to be returned to the caller. + * @given_char: the given char at the end of the u32 in the string + * + * Return: Upon success, return a pointer to the given_char in the string. Upon failure, return NULL + */ +static char *extract_u32_until_given_char(char *str, u32 *ver_num, char given_char) { - char major[8], minor[8], *first_dot, *second_dot; - int rc; + char num_str[8] = {}, *ch; - first_dot = strnstr(preboot_ver, ".", 10); - if (first_dot) { - strscpy(major, preboot_ver, first_dot - preboot_ver + 1); - rc = kstrtou32(major, 10, &hdev->fw_major_version); - } else { - rc = -EINVAL; - } + ch = strchrnul(str, given_char); + if (*ch == '\0' || ch == str || ch - str >= sizeof(num_str)) + return NULL; - if (rc) { - dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc); - goto out; + memcpy(num_str, str, ch - str); + if (kstrtou32(num_str, 10, ver_num)) + return NULL; + return ch; +} + +/** + * hl_get_sw_major_minor_subminor() - extract the FW's SW version major, minor, sub-minor + * from the version string + * @hdev: pointer to the hl_device + * @fw_str: the FW's version string + * + * The extracted version is set in the hdev fields: fw_sw_{major/minor/sub_minor}_ver. + * + * fw_str is expected to have one of two possible formats, examples: + * 1) 'Preboot version hl-gaudi2-1.9.0-fw-42.0.1-sec-3' + * 2) 'Preboot version hl-gaudi2-1.9.0-rc-fw-42.0.1-sec-3' + * In those examples, the SW major,minor,subminor are correspondingly: 1,9,0. + * + * Return: 0 for success or a negative error code for failure. + */ +static int hl_get_sw_major_minor_subminor(struct hl_device *hdev, const char *fw_str) +{ + char *end, *start; + + end = strnstr(fw_str, "-rc-", VERSION_MAX_LEN); + if (end == fw_str) + return -EINVAL; + + if (!end) + end = strnstr(fw_str, "-fw-", VERSION_MAX_LEN); + + if (end == fw_str) + return -EINVAL; + + if (!end) + return -EINVAL; + + for (start = end - 1; start != fw_str; start--) { + if (*start == '-') + break; } - /* skip the first dot */ - first_dot++; + if (start == fw_str) + return -EINVAL; - second_dot = strnstr(first_dot, ".", 10); - if (second_dot) { - strscpy(minor, first_dot, second_dot - first_dot + 1); - rc = kstrtou32(minor, 10, &hdev->fw_minor_version); - } else { - rc = -EINVAL; + /* start/end point each to the starting and ending hyphen of the sw version e.g. -1.9.0- */ + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_major_ver, '.'); + if (!start) + goto err_zero_ver; + + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_minor_ver, '.'); + if (!start) + goto err_zero_ver; + + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_sub_minor_ver, '-'); + if (!start) + goto err_zero_ver; + + return 0; + +err_zero_ver: + hdev->fw_sw_major_ver = 0; + hdev->fw_sw_minor_ver = 0; + hdev->fw_sw_sub_minor_ver = 0; + return -EINVAL; +} + +/** + * hl_get_preboot_major_minor() - extract the FW's version major, minor from the version string. + * @hdev: pointer to the hl_device + * @preboot_ver: the FW's version string + * + * preboot_ver is expected to be the format of <major>.<minor>.<sub minor>*, e.g: 42.0.1-sec-3 + * The extracted version is set in the hdev fields: fw_inner_{major/minor}_ver. + * + * Return: 0 on success, negative error code for failure. + */ +static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) +{ + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_major_ver, '.'); + if (!preboot_ver) { + dev_err(hdev->dev, "Error parsing preboot major version\n"); + goto err_zero_ver; } - if (rc) - dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc); + preboot_ver++; -out: - kfree(preboot_ver); - return rc; + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_minor_ver, '.'); + if (!preboot_ver) { + dev_err(hdev->dev, "Error parsing preboot minor version\n"); + goto err_zero_ver; + } + return 0; + +err_zero_ver: + hdev->fw_inner_major_ver = 0; + hdev->fw_inner_minor_ver = 0; + return -EINVAL; } static int hl_request_fw(struct hl_device *hdev, @@ -262,43 +371,63 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value) { struct cpucp_packet pkt = {}; + int rc; pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(value); - return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "Failed to disable FW's PCI access\n"); + + return rc; } +/** + * hl_fw_send_cpu_message() - send CPU message to the device. + * + * @hdev: pointer to hl_device structure. + * @hw_queue_id: HW queue ID + * @msg: raw data of the message/packet + * @size: size of @msg in bytes + * @timeout_us: timeout in usec to wait for CPU reply on the message + * @result: return code reported by FW + * + * send message to the device CPU. + * + * Return: 0 on success, non-zero for failure. + * -ENOMEM: memory allocation failure + * -EAGAIN: CPU is disabled (try again when enabled) + * -ETIMEDOUT: timeout waiting for FW response + * -EIO: protocol error + */ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, u64 *result) + u16 size, u32 timeout_us, u64 *result) { struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id]; struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 tmp, expected_ack_val, pi, opcode; struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; - u32 tmp, expected_ack_val, pi, opcode; - int rc; + int rc = 0, fw_rc; - pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); + pkt = hl_cpu_accessible_dma_pool_alloc(hdev, size, &pkt_dma_addr); if (!pkt) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for packet to CPU\n"); + dev_err(hdev->dev, "Failed to allocate DMA memory for packet to CPU\n"); return -ENOMEM; } - memcpy(pkt, msg, len); + memcpy(pkt, msg, size); mutex_lock(&hdev->send_cpu_message_lock); /* CPU-CP messages can be sent during soft-reset */ - if (hdev->disabled && !hdev->reset_info.in_compute_reset) { - rc = 0; + if (hdev->disabled && !hdev->reset_info.in_compute_reset) goto out; - } if (hdev->device_cpu_disabled) { - rc = -EIO; + rc = -EAGAIN; goto out; } @@ -314,7 +443,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, * Which means that we don't need to lock the access to the entire H/W * queues module when submitting a JOB to the CPU queue. */ - hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr); + hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), size, pkt_dma_addr); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) expected_ack_val = queue->pi; @@ -323,7 +452,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, (tmp == expected_ack_val), 1000, - timeout, true); + timeout_us, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); @@ -331,19 +460,27 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* If FW performed reset just before sending it a packet, we will get a timeout. * This is expected behavior, hence no need for error message. */ - if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) + if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) { dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n", tmp); - else - dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp); + } else { + struct hl_bd *bd = queue->kernel_address; + + bd += hl_pi_2_offset(pi); + + dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n" + "Pkt info[%u]: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n", + tmp, pi, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr, + queue->dram_bd); + } hdev->device_cpu_disabled = true; goto out; } tmp = le32_to_cpu(pkt->ctl); - rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; - if (rc) { + fw_rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; + if (fw_rc) { opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT; if (!prop->supports_advanced_cpucp_rc) { @@ -352,7 +489,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, goto scrub_descriptor; } - switch (rc) { + switch (fw_rc) { case cpucp_packet_invalid: dev_err(hdev->dev, "CPU packet %d is not supported by F/W\n", opcode); @@ -377,7 +514,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* propagate the return code from the f/w to the callers who want to check it */ if (result) - *result = rc; + *result = fw_rc; rc = -EIO; @@ -397,7 +534,7 @@ scrub_descriptor: out: mutex_unlock(&hdev->send_cpu_message_lock); - hl_cpu_accessible_dma_pool_free(hdev, len, pkt); + hl_cpu_accessible_dma_pool_free(hdev, size, pkt); return rc; } @@ -418,7 +555,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) 0, &result); if (rc) - dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); + dev_err(hdev->dev, "failed to unmask event %d", event_type); return rc; } @@ -457,7 +594,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, total_pkt_size, 0, &result); if (rc) - dev_err(hdev->dev, "failed to unmask IRQ array\n"); + dev_err(hdev->dev, "failed to unmask event array\n"); kfree(pkt); @@ -467,7 +604,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, int hl_fw_test_cpu_queue(struct hl_device *hdev) { struct cpucp_packet test_pkt = {}; - u64 result; + u64 result = 0; int rc; test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << @@ -508,6 +645,20 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, size); } +int hl_fw_send_soft_reset(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_SOFT_RESET << CPUCP_PKT_CTL_OPCODE_SHIFT); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "failed to send soft-reset msg (err = %d)\n", rc); + + return rc; +} + int hl_fw_send_device_activity(struct hl_device *hdev, bool open) { struct cpucp_packet pkt; @@ -526,16 +677,14 @@ int hl_fw_send_device_activity(struct hl_device *hdev, bool open) int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt; - u64 result; + u64 result = 0; int rc; memset(&hb_pkt, 0, sizeof(hb_pkt)); - hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << - CPUCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << CPUCP_PKT_CTL_OPCODE_SHIFT); hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), 0, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result); if ((rc) || (result != CPUCP_PACKET_FENCE_VAL)) return -EIO; @@ -546,42 +695,32 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) rc = -EIO; } + hdev->heartbeat_debug_info.last_pq_heartbeat_ts = ktime_get_real_seconds(); + return rc; } -static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, - u32 sts_val) +static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val) { bool err_exists = false; if (!(err_val & CPU_BOOT_ERR0_ENABLED)) return false; - if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) { - dev_err(hdev->dev, - "Device boot error - DRAM initialization failed\n"); - err_exists = true; - } + if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) + dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) { + if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); - err_exists = true; - } - if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) { - dev_err(hdev->dev, - "Device boot error - Thermal Sensor initialization failed\n"); - err_exists = true; - } + if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) + dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n"); if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) { if (hdev->bmc_enable) { - dev_err(hdev->dev, - "Device boot error - Skipped waiting for BMC\n"); - err_exists = true; + dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n"); } else { - dev_info(hdev->dev, - "Device boot message - Skipped waiting for BMC\n"); + dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n"); /* This is an info so we don't want it to disable the * device */ @@ -589,43 +728,29 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, } } - if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) { - dev_err(hdev->dev, - "Device boot error - Serdes data from BMC not available\n"); - err_exists = true; - } + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) + dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n"); - if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) { - dev_err(hdev->dev, - "Device boot error - NIC F/W initialization failed\n"); - err_exists = true; - } + if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) + dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { - dev_err(hdev->dev, - "Device boot warning - security not ready\n"); - err_exists = true; - } + if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) + dev_err(hdev->dev, "Device boot warning - security not ready\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { + if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) dev_err(hdev->dev, "Device boot error - security failure\n"); - err_exists = true; - } - if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) { + if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) dev_err(hdev->dev, "Device boot error - eFuse failure\n"); - err_exists = true; - } - if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) { + if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n"); - err_exists = true; - } - if (err_val & CPU_BOOT_ERR0_PLL_FAIL) { + if (err_val & CPU_BOOT_ERR0_PLL_FAIL) dev_err(hdev->dev, "Device boot error - PLL failure\n"); - err_exists = true; - } + + if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) + dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n"); if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) { /* Ignore this bit, don't prevent driver loading */ @@ -633,52 +758,32 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL; } - if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) { + if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) dev_err(hdev->dev, "Device boot error - binning failure\n"); - err_exists = true; - } if (sts_val & CPU_BOOT_DEV_STS0_ENABLED) dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) + dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); + + if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) + dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n"); + + /* All warnings should go here in order not to reach the unknown error validation */ if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n"); err_exists = true; } - /* All warnings should go here in order not to reach the unknown error validation */ - if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { - dev_warn(hdev->dev, - "Device boot warning - Skipped DRAM initialization\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED; - } + if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) + dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n"); - if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) { - dev_warn(hdev->dev, - "Device boot warning - Failed to load preboot primary image\n"); - /* This is a warning so we don't want it to disable the - * device as we have a secondary preboot image - */ - err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL; - } - - if (err_val & CPU_BOOT_ERR0_TPM_FAIL) { - dev_warn(hdev->dev, - "Device boot warning - TPM failure\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_TPM_FAIL; - } + if (err_val & CPU_BOOT_ERR0_TPM_FAIL) + dev_warn(hdev->dev, "Device boot warning - TPM failure\n"); - if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) { - dev_err(hdev->dev, - "Device boot error - unknown ERR0 error 0x%08x\n", err_val); + if (err_val & CPU_BOOT_ERR_FATAL_MASK) err_exists = true; - } /* return error only if it's in the predefined mask */ if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) & @@ -834,7 +939,7 @@ static int hl_fw_send_msi_info_msg(struct hl_device *hdev) { struct cpucp_array_data_packet *pkt; size_t total_pkt_size, data_size; - u64 result; + u64 result = 0; int rc; /* skip sending this info for unsupported ASICs */ @@ -925,11 +1030,10 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_EEPROM_TIMEOUT_USEC, &result); - if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP EEPROM packet, error %d\n", - rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP EEPROM packet, error %d\n", rc); goto out; } @@ -970,7 +1074,9 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); goto out; } @@ -1004,8 +1110,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->rx_throughput = result; @@ -1019,8 +1126,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->tx_throughput = result; @@ -1033,8 +1141,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->replay_cnt = (u32) result; @@ -1054,9 +1163,9 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CpuCP total energy pkt, error %d\n", - rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CpuCP total energy pkt, error %d\n", rc); return rc; } @@ -1132,7 +1241,8 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); return rc; } @@ -1159,7 +1269,8 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, "Failed to read power, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to read power, error %d\n", rc); return rc; } @@ -1196,8 +1307,9 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc); goto out; } @@ -1222,7 +1334,8 @@ int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num) rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); if (rc) { - dev_err(hdev->dev, + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to handle CPU-CP pending rows info pkt, error %d\n", rc); goto out; } @@ -1263,7 +1376,7 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev) COMMS_RST_DEV, 0, false, hdev->fw_loader.cpu_timeout); if (rc) - dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n"); + dev_err(hdev->dev, "Failed sending COMMS_RST_DEV\n"); } else { WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV); } @@ -1271,8 +1384,10 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev) void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) { - struct static_fw_load_mgr *static_loader = - &hdev->fw_loader.static_loader; + struct fw_load_mgr *fw_loader = &hdev->fw_loader; + u32 status, cpu_boot_status_reg, cpu_timeout; + struct static_fw_load_mgr *static_loader; + struct pre_fw_load_props *pre_fw_load; int rc; if (hdev->device_cpu_is_halted) @@ -1280,12 +1395,28 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) /* Stop device CPU to make sure nothing bad happens */ if (hdev->asic_prop.dynamic_fw_load) { + pre_fw_load = &fw_loader->pre_fw_load; + cpu_timeout = fw_loader->cpu_timeout; + cpu_boot_status_reg = pre_fw_load->cpu_boot_status_reg; + rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader, - COMMS_GOTO_WFE, 0, true, - hdev->fw_loader.cpu_timeout); - if (rc) - dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n"); + COMMS_GOTO_WFE, 0, false, cpu_timeout); + if (rc) { + dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n"); + } else { + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + status == CPU_BOOT_STATUS_IN_WFE, + hdev->fw_poll_interval_usec, + cpu_timeout); + if (rc) + dev_err(hdev->dev, "Current status=%u. Timed-out updating to WFE\n", + status); + } } else { + static_loader = &hdev->fw_loader.static_loader; WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE); msleep(static_loader->cpu_reset_wait_msec); @@ -1344,6 +1475,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) dev_err(hdev->dev, "Device boot progress - Stuck in preboot after security initialization\n"); break; + case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP: + dev_err(hdev->dev, + "Device boot progress - Stuck in preparation for shutdown\n"); + break; default: dev_err(hdev->dev, "Device boot progress - Invalid or unexpected status code %d\n", status); @@ -1354,8 +1489,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) int hl_fw_wait_preboot_ready(struct hl_device *hdev) { struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; - u32 status; - int rc; + u32 status = 0, timeout; + int rc, tries = 1, fw_err = 0; + bool preboot_still_runs; /* Need to check two possible scenarios: * @@ -1365,6 +1501,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) * All other status values - for older firmwares where the uboot was * loaded from the FLASH */ + timeout = pre_fw_load->wait_for_preboot_timeout; +retry: rc = hl_poll_timeout( hdev, pre_fw_load->cpu_boot_status_reg, @@ -1373,20 +1511,37 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) (status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), hdev->fw_poll_interval_usec, - pre_fw_load->wait_for_preboot_timeout); + timeout); + /* + * if F/W reports "security-ready" it means preboot might take longer. + * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again + * with that timeout + */ + preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY || + status == CPU_BOOT_STATUS_IN_PREBOOT || + status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP || + status == CPU_BOOT_STATUS_DRAM_RDY); + + if (rc && tries && preboot_still_runs) { + tries--; + if (pre_fw_load->wait_for_preboot_extended_timeout) { + timeout = pre_fw_load->wait_for_preboot_extended_timeout; + goto retry; + } + } - if (rc) { + /* If we read all FF, then something is totally wrong, no point + * of reading specific errors + */ + if (status != -1) + fw_err = fw_read_errors(hdev, pre_fw_load->boot_err0_reg, + pre_fw_load->boot_err1_reg, + pre_fw_load->sts_boot_dev_sts0_reg, + pre_fw_load->sts_boot_dev_sts1_reg); + if (rc || fw_err) { detect_cpu_boot_status(hdev, status); - dev_err(hdev->dev, "CPU boot ready timeout (status = %d)\n", status); - - /* If we read all FF, then something is totally wrong, no point - * of reading specific errors - */ - if (status != -1) - fw_read_errors(hdev, pre_fw_load->boot_err0_reg, - pre_fw_load->boot_err1_reg, - pre_fw_load->sts_boot_dev_sts0_reg, - pre_fw_load->sts_boot_dev_sts1_reg); + dev_err(hdev->dev, "CPU boot %s (status = %d)\n", + fw_err ? "failed due to an error" : "ready timeout", status); return -EIO; } @@ -1657,7 +1812,7 @@ static void hl_fw_dynamic_send_cmd(struct hl_device *hdev, val = FIELD_PREP(COMMS_COMMAND_CMD_MASK, cmd); val |= FIELD_PREP(COMMS_COMMAND_SIZE_MASK, size); - trace_habanalabs_comms_send_cmd(hdev->dev, comms_cmd_str_arr[cmd]); + trace_habanalabs_comms_send_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]); WREG32(le32_to_cpu(dyn_regs->kmd_msg_to_cpu), val); } @@ -1715,7 +1870,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; - trace_habanalabs_comms_wait_status(hdev->dev, comms_sts_str_arr[expected_status]); + trace_habanalabs_comms_wait_status(&hdev->pdev->dev, comms_sts_str_arr[expected_status]); /* Wait for expected status */ rc = hl_poll_timeout( @@ -1732,7 +1887,8 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, return -EIO; } - trace_habanalabs_comms_wait_status_done(hdev->dev, comms_sts_str_arr[expected_status]); + trace_habanalabs_comms_wait_status_done(&hdev->pdev->dev, + comms_sts_str_arr[expected_status]); /* * skip storing FW response for NOOP to preserve the actual desired @@ -1806,7 +1962,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, { int rc; - trace_habanalabs_comms_protocol_cmd(hdev->dev, comms_cmd_str_arr[cmd]); + trace_habanalabs_comms_protocol_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]); /* first send clear command to clean former commands */ rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader); @@ -1945,7 +2101,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, * note that no alignment/stride address issues here as all structures * are 64 bit padded. */ - data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header); + data_ptr = (u8 *)fw_desc + sizeof(struct comms_msg_header); data_size = le16_to_cpu(fw_desc->header.size); data_crc32 = hl_fw_compat_crc32(data_ptr, data_size); @@ -2099,11 +2255,11 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev, memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc)); fw_data_size = le16_to_cpu(fw_desc->header.size); - temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size); + temp_fw_desc = vzalloc(sizeof(struct comms_msg_header) + fw_data_size); if (!temp_fw_desc) return -ENOMEM; - memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size); + memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_msg_header) + fw_data_size); rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader, (struct lkd_fw_comms_desc *) temp_fw_desc); @@ -2154,6 +2310,7 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, struct asic_fixed_properties *prop = &hdev->asic_prop; char *preboot_ver, *boot_ver; char btl_ver[32]; + int rc; switch (fwc) { case FW_COMP_BOOT_FIT: @@ -2167,22 +2324,22 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, break; case FW_COMP_PREBOOT: strscpy(prop->preboot_ver, fw_version, VERSION_MAX_LEN); - preboot_ver = strnstr(prop->preboot_ver, "Preboot", - VERSION_MAX_LEN); + preboot_ver = strnstr(prop->preboot_ver, "Preboot", VERSION_MAX_LEN); + dev_info(hdev->dev, "preboot full version: '%s'\n", preboot_ver); + if (preboot_ver && preboot_ver != prop->preboot_ver) { strscpy(btl_ver, prop->preboot_ver, min((int) (preboot_ver - prop->preboot_ver), 31)); dev_info(hdev->dev, "%s\n", btl_ver); } + rc = hl_get_sw_major_minor_subminor(hdev, preboot_ver); + if (rc) + return rc; preboot_ver = extract_fw_ver_from_str(prop->preboot_ver); if (preboot_ver) { - int rc; - - dev_info(hdev->dev, "preboot version %s\n", preboot_ver); - - /* This function takes care of freeing preboot_ver */ - rc = extract_fw_sub_versions(hdev, preboot_ver); + rc = hl_get_preboot_major_minor(hdev, preboot_ver); + kfree(preboot_ver); if (rc) return rc; } @@ -2370,16 +2527,6 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev, if (rc) goto release_fw; - /* update state according to boot stage */ - if (cur_fwc == FW_COMP_BOOT_FIT) { - struct cpu_dyn_regs *dyn_regs; - - dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; - hl_fw_boot_fit_update_state(hdev, - le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), - le32_to_cpu(dyn_regs->cpu_boot_dev_sts1)); - } - /* copy boot fit to space allocated by FW */ rc = hl_fw_dynamic_copy_image(hdev, fw, fw_loader); if (rc) @@ -2634,17 +2781,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; } + rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms)); + if (rc) + goto protocol_err; + + if (hdev->asic_prop.support_dynamic_resereved_fw_size) + hdev->asic_prop.reserved_fw_mem_size = + le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M; + if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) { struct lkd_fw_binning_info *binning_info; - rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0); - if (rc) - goto protocol_err; - /* read preboot version */ rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - if (rc) return rc; @@ -2682,6 +2832,14 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, goto protocol_err; } + rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader); + if (rc) + goto protocol_err; + + hl_fw_boot_fit_update_state(hdev, + le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), + le32_to_cpu(dyn_regs->cpu_boot_dev_sts1)); + /* * when testing FW load (without Linux) on PLDM we don't want to * wait until boot fit is active as it may take several hours. @@ -2691,17 +2849,13 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX)) return 0; - rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader); - if (rc) - goto protocol_err; - /* Enable DRAM scrambling before Linux boot and after successful * UBoot */ hdev->asic_funcs->init_cpu_scrambler_dram(hdev); if (!(hdev->fw_components & FW_TYPE_LINUX)) { - dev_info(hdev->dev, "Skip loading Linux F/W\n"); + dev_dbg(hdev->dev, "Skip loading Linux F/W\n"); return 0; } @@ -2728,7 +2882,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, if (rc) goto protocol_err; - hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), + hl_fw_linux_update_state(hdev, + le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), le32_to_cpu(dyn_regs->cpu_boot_dev_sts1)); hl_fw_dynamic_update_linux_interrupt_if(hdev); @@ -3030,10 +3185,10 @@ long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32((u32)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); - if (rc) { - dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", - used_pll_idx, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", + used_pll_idx, rc); return rc; } @@ -3057,8 +3212,7 @@ void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", used_pll_idx, rc); } @@ -3074,9 +3228,9 @@ long hl_fw_get_max_power(struct hl_device *hdev) pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); - if (rc) { - dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); return rc; } @@ -3098,8 +3252,7 @@ void hl_fw_set_max_power(struct hl_device *hdev) pkt.value = cpu_to_le64(hdev->max_power); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) + if (rc && rc != -EAGAIN) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); } @@ -3125,11 +3278,11 @@ static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void pkt.data_max_size = cpu_to_le32(size); pkt.nonce = cpu_to_le32(nonce); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - timeout, NULL); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), timeout, NULL); if (rc) { - dev_err(hdev->dev, - "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); goto out; } @@ -3149,10 +3302,18 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); } +int hl_fw_get_dev_info_signed(struct hl_device *hdev, + struct cpucp_dev_info_signed *dev_info_signed, u32 nonce) +{ + return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed, + sizeof(struct cpucp_dev_info_signed), nonce, + HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); +} + int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode, dma_addr_t buff, u32 *size) { - struct cpucp_packet pkt = {0}; + struct cpucp_packet pkt = {}; u64 result; int rc = 0; @@ -3163,10 +3324,12 @@ int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); - if (rc) - dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n"); - else + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n"); + } else { dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result); + } *size = (u32)result; diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index fa05e76d3d21..d94c2ba22a6a 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2022 HabanaLabs, Ltd. + * Copyright 2016-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/cpucp_if.h" +#include <linux/habanalabs/cpucp_if.h> #include "../include/common/qman_if.h" #include "../include/hw_ip/mmu/mmu_general.h" #include <uapi/drm/habanalabs_accel.h> @@ -29,6 +29,9 @@ #include <linux/coresight.h> #include <linux/dma-buf.h> +#include <drm/drm_device.h> +#include <drm/drm_file.h> + #include "security.h" #define HL_NAME "habanalabs" @@ -36,6 +39,8 @@ struct hl_device; struct hl_fpriv; +#define PCI_VENDOR_ID_HABANALABS 0x1da3 + /* Use upper bits of mmap offset to store habana driver specific information. * bits[63:59] - Encode mmap type * bits[45:0] - mmap offset value @@ -66,7 +71,7 @@ struct hl_fpriv; #define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ -#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ +#define HL_HEARTBEAT_PER_USEC 10000000 /* 10 s */ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ @@ -80,14 +85,14 @@ struct hl_fpriv; #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ -#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */ - #define HL_INVALID_QUEUE UINT_MAX #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF #define HL_COMMON_DEC_INTERRUPT_ID 0xFFE -#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20 +#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */ /* Default value for device reset trigger , an invalid value */ #define HL_RESET_TRIGGER_DEFAULT 0xFF @@ -101,6 +106,8 @@ struct hl_fpriv; /* MMU */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +#define TIMESTAMP_FREE_NODES_NUM 512 + /** * enum hl_mmu_page_table_location - mmu page table location * @MMU_DR_PGT: page-table is located on device DRAM. @@ -113,18 +120,6 @@ enum hl_mmu_page_table_location { MMU_NUM_PGT_LOCATIONS /* num of PGT locations */ }; -/** - * enum hl_mmu_enablement - what mmu modules to enable - * @MMU_EN_NONE: mmu disabled. - * @MMU_EN_ALL: enable all. - * @MMU_EN_PMMU_ONLY: Enable only the PMMU leaving the DMMU disabled. - */ -enum hl_mmu_enablement { - MMU_EN_NONE = 0, - MMU_EN_ALL = 1, - MMU_EN_PMMU_ONLY = 3, /* N/A for Goya/Gaudi */ -}; - /* * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream @@ -155,21 +150,20 @@ enum hl_mmu_enablement { #define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \ hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__) -#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \ - hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__) - #define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \ hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__) #define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \ hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__) -#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \ - hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__) - #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) +#define hl_dma_map_sgtable(hdev, sgt, dir) \ + hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__) +#define hl_dma_unmap_sgtable(hdev, sgt, dir) \ + hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__) + /* * Reset Flags * @@ -378,6 +372,7 @@ enum hl_cs_type { CS_RESERVE_SIGNALS, CS_UNRESERVE_SIGNALS, CS_TYPE_ENGINE_CORE, + CS_TYPE_ENGINES, CS_TYPE_FLUSH_PCI_HBW_WRITES, }; @@ -450,18 +445,22 @@ enum hl_collective_mode { * a CB handle can be provided for jobs on this queue. * Otherwise, a CB address must be provided. * @collective_mode: collective mode of current queue + * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. * @binned: True if the queue is binned out and should not be used * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hw_queue_properties { enum hl_queue_type type; enum queue_cb_alloc_flags cb_alloc_flags; enum hl_collective_mode collective_mode; + u64 q_dram_bd_address; u8 driver_only; u8 binned; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -560,8 +559,7 @@ struct hl_hints_range { * allocated with huge pages. * @hints_dram_reserved_va_range: dram hint addresses reserved range. * @hints_host_reserved_va_range: host hint addresses reserved range. - * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved - * range. + * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -592,13 +590,13 @@ struct hl_hints_range { * @host_base_address: host physical start address for host DMA from device * @host_end_address: host physical end address for host DMA from device * @max_freq_value: current max clk frequency. + * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use + * in order to raise events toward FW. * @clk_pll_index: clock PLL index that specify which PLL determines the clock * we display to the user * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. - * @mmu_hop_table_size: MMU hop table size. - * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. - * @dram_page_size: page size for MMU DRAM allocation. + * @dram_page_size: The DRAM physical page size. * @cfg_size: configuration space size on SRAM. * @sram_size: total size of SRAM. * @max_asid: maximum number of open contexts (ASIDs). @@ -612,8 +610,8 @@ struct hl_hints_range { * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @decoder_enabled_mask: which decoders are enabled. - * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 - * means binned (at most one binned decoder per dcore). + * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 means binned. + * @rotator_enabled_mask: which rotators are enabled. * @edma_enabled_mask: which EDMAs are enabled. * @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means * binned (at most one binned DMA). @@ -648,11 +646,15 @@ struct hl_hints_range { * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. - * @num_engine_cores: number of engine cpu cores + * @num_engine_cores: number of engine cpu cores. + * @max_num_of_engines: maximum number of all engines in the ASIC. * @num_of_special_blocks: special_blocks array size. - * @glbl_err_cause_num: global err cause number. + * @glbl_err_max_cause_num: global err max cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * not supported. + * @reserved_fw_mem_size: size of dram memory reserved for FW. + * @fw_event_queue_size: queue size for events from CPU-CP. + * A value of 0 means using the default HL_EQ_SIZE_IN_BYTES value. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -663,6 +665,8 @@ struct hl_hints_range { * @first_available_cq: first available CQ for the user. * @user_interrupt_count: number of user interrupts. * @user_dec_intr_count: number of decoder interrupts exposed to user. + * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. + * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset. * @cache_line_size: device cache line size. * @server_type: Server type that the ASIC is currently installed in. * The value is according to enum hl_server_type in uapi file. @@ -696,8 +700,11 @@ struct hl_hints_range { * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. - * @dma_mask: the dma mask to be set for this device + * @dma_mask: the dma mask to be set for this device. * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. + * @supports_engine_modes: true if changing engines/engine_cores modes is supported. + * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw. + * @supports_nvme: indicates whether the asic supports NVMe P2P DMA. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -739,11 +746,10 @@ struct asic_fixed_properties { u64 host_base_address; u64 host_end_address; u64 max_freq_value; + u64 engine_core_interrupt_reg_addr; u32 clk_pll_index; u32 mmu_pgt_size; u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; u32 dram_page_size; u32 cfg_size; u32 sram_size; @@ -759,6 +765,7 @@ struct asic_fixed_properties { u32 cb_pool_cb_size; u32 decoder_enabled_mask; u32 decoder_binning_mask; + u32 rotator_enabled_mask; u32 edma_enabled_mask; u32 edma_binning_mask; u32 max_pending_cs; @@ -775,9 +782,12 @@ struct asic_fixed_properties { u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; u32 num_engine_cores; + u32 max_num_of_engines; u32 num_of_special_blocks; - u32 glbl_err_cause_num; + u32 glbl_err_max_cause_num; u32 hbw_flush_reg; + u32 reserved_fw_mem_size; + u32 fw_event_queue_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -788,6 +798,8 @@ struct asic_fixed_properties { u16 first_available_cq[HL_MAX_DCORES]; u16 user_interrupt_count; u16 user_dec_intr_count; + u16 tpc_interrupt_id; + u16 eq_interrupt_id; u16 cache_line_size; u16 server_type; u8 completion_queues_count; @@ -811,6 +823,9 @@ struct asic_fixed_properties { u8 supports_user_set_page_size; u8 dma_mask; u8 supports_advanced_cpucp_rc; + u8 supports_engine_modes; + u8 support_dynamic_resereved_fw_size; + u8 supports_nvme; }; /** @@ -894,6 +909,18 @@ struct hl_mem_mgr { }; /** + * struct hl_mem_mgr_fini_stats - describes statistics returned during memory manager teardown. + * @n_busy_cb: the amount of CB handles that could not be removed + * @n_busy_ts: the amount of TS handles that could not be removed + * @n_busy_other: the amount of any other type of handles that could not be removed + */ +struct hl_mem_mgr_fini_stats { + u32 n_busy_cb; + u32 n_busy_ts; + u32 n_busy_other; +}; + +/** * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior * @topic: string identifier used for logging * @mem_id: memory type identifier, embedded in the handle and used to identify @@ -1044,6 +1071,8 @@ struct hl_encaps_signals_mgr { * @collective_mode: collective mode of current queue * @kernel_address: holds the queue's kernel virtual address. * @bus_address: holds the queue's DMA address. + * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in + * queue properites. * @pi: holds the queue's pi value. * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). * @hw_queue_id: the id of the H/W queue. @@ -1053,6 +1082,7 @@ struct hl_encaps_signals_mgr { * @valid: is the queue valid (we have array of 32 queues, not all of them * exist). * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hl_hw_queue { struct hl_cs_job **shadow_queue; @@ -1061,6 +1091,7 @@ struct hl_hw_queue { enum hl_collective_mode collective_mode; void *kernel_address; dma_addr_t bus_address; + u64 pq_dram_address; u32 pi; atomic_t ci; u32 hw_queue_id; @@ -1069,6 +1100,7 @@ struct hl_hw_queue { u16 int_queue_len; u8 valid; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -1096,21 +1128,47 @@ struct hl_cq { enum hl_user_interrupt_type { HL_USR_INTERRUPT_CQ = 0, HL_USR_INTERRUPT_DECODER, + HL_USR_INTERRUPT_TPC, + HL_USR_INTERRUPT_UNEXPECTED +}; + +/** + * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data + * @free_nodes_pool: pool of nodes to be used for free timestamp jobs + * @free_nodes_length: number of nodes in free_nodes_pool + * @next_avail_free_node_idx: index of the next free node in the pool + * + * the free nodes pool must be protected by the user interrupt lock + * to avoid race between different interrupts which are using the same + * ts buffer with different offsets. + */ +struct hl_ts_free_jobs { + struct timestamp_reg_free_node *free_nodes_pool; + u32 free_nodes_length; + u32 next_avail_free_node_idx; }; /** * struct hl_user_interrupt - holds user interrupt information * @hdev: pointer to the device structure + * @ts_free_jobs_data: timestamp free jobs related data * @type: user interrupt type * @wait_list_head: head to the list of user threads pending on this interrupt + * @ts_list_head: head to the list of timestamp records * @wait_list_lock: protects wait_list_head + * @ts_list_lock: protects ts_list_head + * @timestamp: last timestamp taken upon interrupt * @interrupt_id: msix interrupt id */ struct hl_user_interrupt { struct hl_device *hdev; + struct hl_ts_free_jobs ts_free_jobs_data; enum hl_user_interrupt_type type; struct list_head wait_list_head; + struct list_head ts_list_head; spinlock_t wait_list_lock; + spinlock_t ts_list_lock; + ktime_t timestamp; u32 interrupt_id; }; @@ -1119,11 +1177,15 @@ struct hl_user_interrupt { * @free_objects_node: node in the list free_obj_jobs * @cq_cb: pointer to cq command buffer to be freed * @buf: pointer to timestamp buffer to be freed + * @in_use: indicates whether the node still in use in workqueue thread. + * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler */ struct timestamp_reg_free_node { struct list_head free_objects_node; struct hl_cb *cq_cb; struct hl_mmap_mem_buf *buf; + atomic_t in_use; + u8 dynamic_alloc; }; /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job @@ -1132,17 +1194,21 @@ struct timestamp_reg_free_node { * @free_obj: workqueue object to free timestamp registration node objects * @hdev: pointer to the device structure * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) + * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the + * interrupt handler. */ struct timestamp_reg_work_obj { struct work_struct free_obj; struct hl_device *hdev; struct list_head *free_obj_head; + struct list_head *dynamic_alloc_free_obj_head; }; /* struct timestamp_reg_info - holds the timestamp registration related data. * @buf: pointer to the timestamp buffer which include both user/kernel buffers. * relevant only when doing timestamps records registration. * @cq_cb: pointer to CQ counter CB. + * @interrupt: interrupt that the node hanged on it's wait list. * @timestamp_kernel_addr: timestamp handle address, where to set timestamp * relevant only when doing timestamps records * registration. @@ -1152,17 +1218,18 @@ struct timestamp_reg_work_obj { * allocating records dynamically. */ struct timestamp_reg_info { - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; - u64 *timestamp_kernel_addr; - u8 in_use; + struct hl_mmap_mem_buf *buf; + struct hl_cb *cq_cb; + struct hl_user_interrupt *interrupt; + u64 *timestamp_kernel_addr; + bool in_use; }; /** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt * @ts_reg_info: holds the timestamps registration nodes info - * @wait_list_node: node in the list of user threads pending on an interrupt + * @list_node: node in the list of user threads pending on an interrupt or timestamp * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt @@ -1170,7 +1237,7 @@ struct timestamp_reg_info { */ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; - struct list_head wait_list_node; + struct list_head list_node; struct hl_fence fence; u64 cq_target_value; u64 *cq_kernel_addr; @@ -1181,6 +1248,7 @@ struct hl_user_pending_interrupt { * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address * @bus_address: holds the queue's DMA address + * @size: the event queue size * @ci: ci inside the queue * @prev_eqe_index: the index of the previous event queue entry. The index of * the current entry's index must be +1 of the previous one. @@ -1192,6 +1260,7 @@ struct hl_eq { struct hl_device *hdev; void *kernel_address; dma_addr_t bus_address; + u32 size; u32 ci; u32 prev_eqe_index; bool check_eqe_index; @@ -1200,15 +1269,15 @@ struct hl_eq { /** * struct hl_dec - describes a decoder sw instance. * @hdev: pointer to the device structure. - * @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt + * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt. * @core_id: ID of the decoder. * @base_addr: base address of the decoder. */ struct hl_dec { - struct hl_device *hdev; - struct work_struct completion_abnrm_work; - u32 core_id; - u32 base_addr; + struct hl_device *hdev; + struct work_struct abnrm_intr_work; + u32 core_id; + u32 base_addr; }; /** @@ -1219,14 +1288,19 @@ struct hl_dec { * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). * @ASIC_GAUDI2: Gaudi2 device. * @ASIC_GAUDI2B: Gaudi2B device. + * @ASIC_GAUDI2C: Gaudi2C device. + * @ASIC_GAUDI2D: Gaudi2D device. */ enum hl_asic_type { ASIC_INVALID, + ASIC_GOYA, ASIC_GAUDI, ASIC_GAUDI_SEC, ASIC_GAUDI2, ASIC_GAUDI2B, + ASIC_GAUDI2C, + ASIC_GAUDI2D, }; struct hl_cs_parser; @@ -1369,6 +1443,8 @@ struct dynamic_fw_load_mgr { * @boot_err0_reg: boot_err0 register address * @boot_err1_reg: boot_err1 register address * @wait_for_preboot_timeout: timeout to poll for preboot ready + * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know + * preboot needs longer time. */ struct pre_fw_load_props { u32 cpu_boot_status_reg; @@ -1377,6 +1453,7 @@ struct pre_fw_load_props { u32 boot_err0_reg; u32 boot_err1_reg; u32 wait_for_preboot_timeout; + u32 wait_for_preboot_extended_timeout; }; /** @@ -1476,11 +1553,9 @@ struct engines_data { * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @asic_dma_unmap_single: unmap a single DMA buffer - * @asic_dma_map_single: map a single buffer to a DMA - * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_map_sgtable: DMA map scatter-gather table. * @cs_parser: parse Command Submission. - * @asic_dma_map_sgtable: DMA map scatter-gather table. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. @@ -1562,6 +1637,7 @@ struct engines_data { * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR * @set_engine_cores: set a config command to engine cores + * @set_engines: set a config command to user engines * @send_device_activity: indication to FW about device availability * @set_dram_properties: set DRAM related properties. * @set_binning_masks: set binning/enable masks for all relevant components. @@ -1574,7 +1650,7 @@ struct hl_asic_funcs { int (*sw_init)(struct hl_device *hdev); int (*sw_fini)(struct hl_device *hdev); int (*hw_init)(struct hl_device *hdev); - void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); + int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset); int (*suspend)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev); @@ -1600,18 +1676,11 @@ struct hl_asic_funcs { size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, size_t size, void *vaddr); - void (*asic_dma_unmap_single)(struct hl_device *hdev, - dma_addr_t dma_addr, int len, - enum dma_data_direction dir); - dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev, - void *addr, int len, + void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); - void (*hl_dma_unmap_sgtable)(struct hl_device *hdev, - struct sg_table *sgt, + int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, - enum dma_data_direction dir); void (*add_end_of_cb_packets)(struct hl_device *hdev, void *kernel_address, u32 len, u32 original_len, @@ -1701,6 +1770,8 @@ struct hl_asic_funcs { u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, u32 num_cores, u32 core_command); + int (*set_engines)(struct hl_device *hdev, u32 *engine_ids, + u32 num_engines, u32 engine_command); int (*send_device_activity)(struct hl_device *hdev, bool open); int (*set_dram_properties)(struct hl_device *hdev); int (*set_binning_masks)(struct hl_device *hdev); @@ -1767,16 +1838,19 @@ struct hl_cs_counters_atomic { * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported * where virtual memory is supported. * @memhash_hnode: pointer to the memhash node. this object holds the export count. - * @device_address: physical address of the device's memory. Relevant only - * if phys_pg_pack is NULL (dma-buf was exported from address). - * The total size can be taken from the dmabuf object. + * @offset: the offset into the buffer from which the memory is exported. + * Relevant only if virtual memory is supported and phys_pg_pack is being used. + * device_phys_addr: physical address of the device's memory. Relevant only + * if phys_pg_pack is NULL (dma-buf was exported from address). + * The total size can be taken from the dmabuf object. */ struct hl_dmabuf_priv { struct dma_buf *dmabuf; struct hl_ctx *ctx; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_hash_node *memhash_hnode; - uint64_t device_address; + u64 offset; + u64 device_phys_addr; }; #define HL_CS_OUTCOME_HISTORY_LEN 256 @@ -1824,13 +1898,14 @@ struct hl_cs_outcome_store { * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). * @hdev: pointer to the device structure. * @refcount: reference counter for the context. Context is released only when - * this hits 0l. It is incremented on CS and CS_WAIT. + * this hits 0. It is incremented on CS and CS_WAIT. * @cs_pending: array of hl fence objects representing pending CS. * @outcome_store: storage data structure used to remember outcomes of completed * command submissions for a long time after CS id wraparound. * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. * @hw_block_list_lock: protects the HW block memory list. + * @ts_reg_lock: timestamp registration ioctls lock. * @debugfs_list: node in debugfs list of contexts. * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. @@ -1867,6 +1942,7 @@ struct hl_ctx { struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; struct mutex hw_block_list_lock; + struct mutex ts_reg_lock; struct list_head debugfs_list; struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; @@ -1913,17 +1989,17 @@ struct hl_ctx_mgr { * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ struct hl_userptr { - enum vm_type vm_type; /* must be first */ - struct list_head job_node; - struct page **pages; - unsigned int npages; - struct sg_table *sgt; - enum dma_data_direction dir; - struct list_head debugfs_list; - pid_t pid; - u64 addr; - u64 size; - u8 dma_mapped; + enum vm_type vm_type; /* must be first */ + struct list_head job_node; + struct page **pages; + unsigned int npages; + struct sg_table *sgt; + enum dma_data_direction dir; + struct list_head debugfs_list; + pid_t pid; + u64 addr; + u64 size; + u8 dma_mapped; }; /** @@ -2144,7 +2220,6 @@ struct hl_vm_hw_block_list_node { * @pages: the physical page array. * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. - * @exported_size: buffer exported size. * @node: used to attach to deletion list that is used when all the allocations are cleared * at the teardown of the context. * @mapping_cnt: number of shared mappings. @@ -2161,7 +2236,6 @@ struct hl_vm_phys_pg_pack { u64 *pages; u64 npages; u64 total_size; - u64 exported_size; struct list_head node; atomic_t mapping_cnt; u32 asid; @@ -2204,6 +2278,9 @@ struct hl_vm { u8 init_done; }; +#ifdef CONFIG_HL_HLDIO +#include "hldio.h" +#endif /* * DEBUG, PROFILING STRUCTURE @@ -2246,7 +2323,7 @@ struct hl_notifier_event { /** * struct hl_fpriv - process information stored in FD private data. * @hdev: habanalabs device structure. - * @filp: pointer to the given file structure. + * @file_priv: pointer to the DRM file private data structure. * @taskpid: current process ID. * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. @@ -2261,7 +2338,7 @@ struct hl_notifier_event { */ struct hl_fpriv { struct hl_device *hdev; - struct file *filp; + struct drm_file *file_priv; struct pid *taskpid; struct hl_ctx *ctx; struct hl_ctx_mgr ctx_mgr; @@ -2274,7 +2351,6 @@ struct hl_fpriv { struct mutex ctx_lock; }; - /* * DebugFS */ @@ -2302,6 +2378,7 @@ struct hl_debugfs_entry { struct hl_dbg_device_entry *dev_entry; }; + /** * struct hl_dbg_device_entry - ASIC specific debugfs manager. * @root: root dentry. @@ -2318,7 +2395,7 @@ struct hl_debugfs_entry { * @userptr_list: list of available userptrs (virtual memory chunk descriptor). * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. - * @ctx_mem_hash_spinlock: protects cb_list. + * @ctx_mem_hash_mutex: protects list of available contexts with MMU mappings. * @data_dma_blob_desc: data DMA descriptor of blob. * @mon_dump_blob_desc: monitor dump descriptor of blob. * @state_dump: data of the system states in case of a bad cs. @@ -2333,6 +2410,7 @@ struct hl_debugfs_entry { * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read. + * @dio_stats: Direct I/O statistics */ struct hl_dbg_device_entry { struct dentry *root; @@ -2349,7 +2427,7 @@ struct hl_dbg_device_entry { struct list_head userptr_list; spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; - spinlock_t ctx_mem_hash_spinlock; + struct mutex ctx_mem_hash_mutex; struct debugfs_blob_wrapper data_dma_blob_desc; struct debugfs_blob_wrapper mon_dump_blob_desc; char *state_dump[HL_STATE_DUMP_HIST_LEN]; @@ -2364,6 +2442,35 @@ struct hl_dbg_device_entry { u8 i2c_addr; u8 i2c_reg; u8 i2c_len; +#ifdef CONFIG_HL_HLDIO + struct hl_dio_stats dio_stats; +#endif +}; + +/** + * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of + * hl_debugfs_cfg_access. + * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons. + * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64. + * @addr: the requested address to access. + * @valid: if set, this entry has valid data for dumping at interrupt time. + */ +struct hl_debugfs_cfg_access_entry { + ktime_t seconds_since_epoch; + enum debugfs_access_type debugfs_type; + u64 addr; + bool valid; +}; + +/** + * struct hl_debugfs_cfg_access - saves debugfs config region access requests history. + * @cfg_access_list: list of objects describing config region access requests. + * @head: next valid index to add new entry to in cfg_access_list. + */ +struct hl_debugfs_cfg_access { + struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN]; + u32 head; + spinlock_t lock; /* protects head and entries */ }; /** @@ -2502,7 +2609,7 @@ struct hl_state_dump_specs { * DEVICES */ -#define HL_STR_MAX 32 +#define HL_STR_MAX 64 #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) @@ -2554,12 +2661,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); ktime_t __timeout; \ u32 __elbi_read; \ int __rc = 0; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ might_sleep_if(sleep_us); \ for (;;) { \ if (elbi) { \ @@ -2611,13 +2713,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); u8 __arr_idx; \ int __rc = 0; \ \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min(((u64)timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ - \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ might_sleep_if(sleep_us); \ if (arr_size >= 64) \ __rc = -EINVAL; \ @@ -2670,17 +2766,18 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); * updated directly by the device. If false, the host memory being polled will * be updated by host CPU. Required so host knows whether or not the memory * might need to be byte-swapped before returning value to caller. + * + * On the first 4 polling iterations the macro goes to sleep for short period of + * time that gradually increases and reaches sleep_us on the fifth iteration. */ #define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ mem_written_by_device) \ ({ \ + u64 __sleep_step_us; \ ktime_t __timeout; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 100), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ + u8 __step = 8; \ + \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ might_sleep_if(sleep_us); \ for (;;) { \ /* Verify we read updates done by other cores or by device */ \ @@ -2696,8 +2793,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + __sleep_step_us = sleep_us >> __step; \ + if (__sleep_step_us) \ + usleep_range((__sleep_step_us >> 2) + 1, __sleep_step_us); \ + __step >>= 1; \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -2717,6 +2816,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); usr_intr.type = intr_type; \ INIT_LIST_HEAD(&usr_intr.wait_list_head); \ spin_lock_init(&usr_intr.wait_list_lock); \ + INIT_LIST_HEAD(&usr_intr.ts_list_head); \ + spin_lock_init(&usr_intr.ts_list_lock); \ }) struct hwmon_chip_info; @@ -2974,8 +3075,8 @@ struct cs_timeout_info { * @cq_addr: the address of the current handled command buffer * @cq_size: the size of the current handled command buffer * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. - * should be equal to 1 incase of undefined opcode - * in Upper-CP (specific stream) and equal to 4 incase + * should be equal to 1 in case of undefined opcode + * in Upper-CP (specific stream) and equal to 4 in case * of undefined opcode in Lower-CP. * @engine_id: engine-id that the error occurred on * @stream_id: the stream id the error occurred on. In case the stream equals to @@ -3032,17 +3133,71 @@ struct razwi_info { }; /** + * struct hw_err_info - HW error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then a HW event was discovered for the + * first time after the driver has finished booting-up. + * currently we assume that only fatal events (that require hard-reset) are + * reported so we don't care of the others that might follow it. + * so once changed to 1, it will remain that way. + * TODO: support multiple events. + * @event_info_available: indicates that a HW event info is now available. + */ +struct hw_err_info { + struct hl_info_hw_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + +/** + * struct fw_err_info - FW error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then a FW event was discovered for the + * first time after the driver has finished booting-up. + * currently we assume that only fatal events (that require hard-reset) are + * reported so we don't care of the others that might follow it. + * so once changed to 1, it will remain that way. + * TODO: support multiple events. + * @event_info_available: indicates that a HW event info is now available. + */ +struct fw_err_info { + struct hl_info_fw_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + +/** + * struct engine_err_info - engine error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then an engine event was discovered for the + * first time after the driver has finished booting-up. + * @event_info_available: indicates that an engine event info is now available. + */ +struct engine_err_info { + struct hl_info_engine_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + + +/** * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi_info: RAZWI information. * @undef_opcode: undefined opcode information. * @page_fault_info: page fault information. + * @hw_err: (fatal) hardware error information. + * @fw_err: firmware error information. + * @engine_err: engine error information. */ struct hl_error_info { struct cs_timeout_info cs_timeout; struct razwi_info razwi_info; struct undefined_opcode_info undef_opcode; struct page_fault_info page_fault_info; + struct hw_err_info hw_err; + struct fw_err_info fw_err; + struct engine_err_info engine_err; }; /** @@ -3084,13 +3239,28 @@ struct hl_reset_info { }; /** + * struct eq_heartbeat_debug_info - stores debug info to be used upon heartbeat failure. + * @last_pq_heartbeat_ts: timestamp of the last test packet that was sent to FW. + * This packet is the trigger in FW to send the EQ heartbeat event. + * @last_eq_heartbeat_ts: timestamp of the last EQ heartbeat event that was received from FW. + * @heartbeat_event_counter: number of heartbeat events received. + * @cpu_queue_id: used to read the queue pi/ci + */ +struct eq_heartbeat_debug_info { + time64_t last_pq_heartbeat_ts; + time64_t last_eq_heartbeat_ts; + u32 heartbeat_event_counter; + u32 cpu_queue_id; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar_phys: array of available PCIe bars physical addresses. * (required only for PCI address match mode) * @pcie_bar: array of available PCIe bars virtual addresses. * @rmmio: configuration area address on SRAM. - * @cdev: related char device. + * @drm: related DRM device. * @cdev_ctrl: char device for control operations only (INFO IOCTL) * @dev: related kernel basic device structure. * @dev_ctrl: related kernel device structure for the control device @@ -3104,6 +3274,8 @@ struct hl_reset_info { * @user_interrupt: array of hl_user_interrupt. upon the corresponding user * interrupt, driver will monitor the list of fences * registered to this interrupt. + * @tpc_interrupt: single TPC interrupt for all TPCs. + * @unexpected_error_interrupt: single interrupt for unexpected user error indication. * @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts. * upon any user CQ interrupt, driver will monitor the * list of fences registered to this common structure. @@ -3146,6 +3318,7 @@ struct hl_reset_info { * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. + * @debugfs_cfg_accesses: list of last debugfs config region accesses. * @cb_pool: list of pre allocated CBs. * @cb_pool_lock: protects the CB pool. * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. @@ -3169,9 +3342,15 @@ struct hl_reset_info { * @clk_throttling: holds information about current/previous clock throttling events * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. + * @heartbeat_debug_info: counters used to debug heartbeat failures. + * @hldio: describes habanalabs direct storage interaction interface. + * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling. * @stream_master_qid_arr: pointer to array with QIDs of master streams. - * @fw_major_version: major version of current loaded preboot. - * @fw_minor_version: minor version of current loaded preboot. + * @fw_inner_major_ver: the major of current loaded preboot inner version. + * @fw_inner_minor_ver: the minor of current loaded preboot inner version. + * @fw_sw_major_ver: the major of current loaded preboot SW version. + * @fw_sw_minor_ver: the minor of current loaded preboot SW version. + * @fw_sw_sub_minor_ver: the sub-minor of current loaded preboot SW version. * @dram_used_mem: current DRAM memory consumption. * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram * @timeout_jiffies: device CS timeout value. @@ -3199,6 +3378,7 @@ struct hl_reset_info { * drams are binned-out * @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which * tpc engines are binned-out + * @dmabuf_export_cnt: number of dma-buf exporting. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. @@ -3211,12 +3391,12 @@ struct hl_reset_info { * @rotator_binning: contains mask of rotators engines that is received from the f/w * which indicates which rotator engines are binned-out(Gaudi3 and above). * @id: device minor. - * @id_control: minor of the control device. - * @cdev_idx: char device index. Used for setting its name. + * @cdev_idx: char device index. * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. * @disabled: is device disabled. + * @cpld_shutdown: is cpld shutdown. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false @@ -3231,7 +3411,7 @@ struct hl_reset_info { * @in_debug: whether the device is in a state where the profiling/tracing infrastructure * can be used. This indication is needed because in some ASICs we need to do * specific operations to enable that infrastructure. - * @cdev_sysfs_created: were char devices and sysfs nodes created. + * @cdev_sysfs_debugfs_created: were char devices and sysfs/debugfs files created. * @stop_on_err: true if engines should stop on error. * @supports_sync_stream: is sync stream supported. * @sync_stream_queue_idx: helper index for sync stream queues initialization. @@ -3253,10 +3433,13 @@ struct hl_reset_info { * @supports_mmu_prefetch: true if prefetch is supported, otherwise false. * @reset_upon_device_release: reset the device when the user closes the file descriptor of the * device. + * @supports_ctx_switch: true if a ctx switch is required upon first submission. + * @support_preboot_binning: true if we support read binning info from preboot. + * @eq_heartbeat_received: indication that eq heartbeat event has received from FW. * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing. * @fw_components: Controls which f/w components to load to the device. There are multiple f/w * stages and sometimes we want to stop at a certain stage. Used only for testing. - * @mmu_enable: Whether to enable or disable the device MMU(s). Used only for testing. + * @mmu_disable: Disable the device MMU(s). Used only for testing. * @cpu_queues_enable: Whether to enable queues communication vs. the f/w. Used only for testing. * @pldm: Whether we are running in Palladium environment. Used only for testing. * @hard_reset_on_fw_events: Whether to do device hard-reset when a fatal event is received from @@ -3266,15 +3449,13 @@ struct hl_reset_info { * Used only for testing. * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies * that the f/w is always alive. Used only for testing. - * @supports_ctx_switch: true if a ctx switch is required upon first submission. - * @support_preboot_binning: true if we support read binning info from preboot. */ struct hl_device { struct pci_dev *pdev; u64 pcie_bar_phys[HL_PCI_NUM_BARS]; void __iomem *pcie_bar[HL_PCI_NUM_BARS]; void __iomem *rmmio; - struct cdev cdev; + struct drm_device drm; struct cdev cdev_ctrl; struct device *dev; struct device *dev_ctrl; @@ -3286,6 +3467,8 @@ struct hl_device { enum hl_asic_type asic_type; struct hl_cq *completion_queue; struct hl_user_interrupt *user_interrupt; + struct hl_user_interrupt tpc_interrupt; + struct hl_user_interrupt unexpected_error_interrupt; struct hl_user_interrupt common_user_cq_interrupt; struct hl_user_interrupt common_decoder_interrupt; struct hl_cs **shadow_cs_queue; @@ -3318,6 +3501,7 @@ struct hl_device { struct hwmon_chip_info *hl_chip_info; struct hl_dbg_device_entry hl_debugfs; + struct hl_debugfs_cfg_access debugfs_cfg_accesses; struct list_head cb_pool; spinlock_t cb_pool_lock; @@ -3352,9 +3536,18 @@ struct hl_device { struct hl_reset_info reset_info; + struct eq_heartbeat_debug_info heartbeat_debug_info; +#ifdef CONFIG_HL_HLDIO + struct hl_dio hldio; +#endif + cpumask_t irq_affinity_mask; + u32 *stream_master_qid_arr; - u32 fw_major_version; - u32 fw_minor_version; + u32 fw_inner_major_ver; + u32 fw_inner_minor_ver; + u32 fw_sw_major_ver; + u32 fw_sw_minor_ver; + u32 fw_sw_sub_minor_ver; atomic64_t dram_used_mem; u64 memory_scrub_val; u64 timeout_jiffies; @@ -3369,7 +3562,7 @@ struct hl_device { u64 fw_comms_poll_interval_usec; u64 dram_binning; u64 tpc_binning; - + atomic_t dmabuf_export_cnt; enum cpucp_card_types card_type; u32 major; u32 high_pll; @@ -3378,11 +3571,11 @@ struct hl_device { u32 device_release_watchdog_timeout_sec; u32 rotator_binning; u16 id; - u16 id_control; u16 cdev_idx; u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; u8 disabled; + u8 cpld_shutdown; u8 late_init_done; u8 hwmon_initialized; u8 reset_on_lockup; @@ -3392,7 +3585,7 @@ struct hl_device { u8 init_done; u8 device_cpu_disabled; u8 in_debug; - u8 cdev_sysfs_created; + u8 cdev_sysfs_debugfs_created; u8 stop_on_err; u8 supports_sync_stream; u8 sync_stream_queue_idx; @@ -3411,11 +3604,12 @@ struct hl_device { u8 reset_upon_device_release; u8 supports_ctx_switch; u8 support_preboot_binning; + u8 eq_heartbeat_received; - /* Parameters for bring-up */ + /* Parameters for bring-up to be upstreamed */ u64 nic_ports_mask; u64 fw_components; - u8 mmu_enable; + u8 mmu_disable; u8 cpu_queues_enable; u8 pldm; u8 hard_reset_on_fw_events; @@ -3424,6 +3618,9 @@ struct hl_device { u8 heartbeat; }; +/* Retrieve PCI device name in case of a PCI device or dev name in simulator */ +#define HL_DEV_NAME(hdev) \ + ((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE") /** * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure @@ -3450,6 +3647,20 @@ struct hl_cs_encaps_sig_handle { u32 count; }; +/** + * struct hl_info_fw_err_info - firmware error information structure + * @err_type: The type of error detected (or reported). + * @event_mask: Pointer to the event mask to be modified with the detected error flag + * (can be NULL) + * @event_id: The id of the event that reported the error + * (applicable when err_type is HL_INFO_FW_REPORTED_ERR). + */ +struct hl_info_fw_err_info { + enum hl_info_fw_err_type err_type; + u64 *event_mask; + u16 event_id; +}; + /* * IOCTLs */ @@ -3474,7 +3685,6 @@ struct hl_ioctl_desc { hl_ioctl_t *func; }; - /* * Kernel module functions that can be accessed by entire module */ @@ -3518,6 +3728,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size, return false; } +static inline struct hl_device *to_hl_device(struct drm_device *ddev) +{ + return container_of(ddev, struct hl_device, drm); +} + /** * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. * @address: The start address of the area we want to validate. @@ -3537,20 +3752,23 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, } uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); +void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); +void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flag, const char *caller); void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle, const char *caller); -void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle, const char *caller); -void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, - const char *caller); void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle, const char *caller); void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, const char *caller); -int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); -void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, +int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir); +void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar); @@ -3558,7 +3776,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); -int hl_device_open(struct inode *inode, struct file *filp); + +int hl_mmap(struct file *filp, struct vm_area_struct *vma); + +int hl_device_open(struct drm_device *drm, struct drm_file *file_priv); +void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv); + int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, enum hl_device_status *status); @@ -3587,11 +3810,13 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); -irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg); -irqreturn_t hl_irq_handler_default(int irq, void *arg); +irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg); +irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg); +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -3612,7 +3837,7 @@ int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); -int hl_device_init(struct hl_device *hdev, struct class *hclass); +int hl_device_init(struct hl_device *hdev); void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); @@ -3662,6 +3887,7 @@ bool cs_needs_timeout(struct hl_cs *cs); bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs); struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq); void hl_multi_cs_completion_init(struct hl_device *hdev); +u32 hl_get_active_cs_num(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); @@ -3737,10 +3963,9 @@ struct pgt_info *hl_mmu_hr_get_alloc_next_hop(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop); int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops, struct hl_hr_mmu_funcs *hr_func); -void hl_mmu_swap_out(struct hl_ctx *ctx); -void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); +void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, @@ -3748,7 +3973,24 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr); u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr); bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); - +struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info); +u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx); +u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx); +void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr); +u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); +void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr); +int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr); +u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop); +u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx); +void hl_mmu_dr_flush(struct hl_ctx *ctx); +int hl_mmu_dr_init(struct hl_device *hdev); +void hl_mmu_dr_fini(struct hl_device *hdev); + +int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value); @@ -3796,6 +4038,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); int hl_fw_send_device_activity(struct hl_device *hdev, bool open); +int hl_fw_send_soft_reset(struct hl_device *hdev); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); @@ -3821,6 +4064,8 @@ long hl_fw_get_max_power(struct hl_device *hdev); void hl_fw_set_max_power(struct hl_device *hdev); int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, u32 nonce); +int hl_fw_get_dev_info_signed(struct hl_device *hdev, + struct cpucp_dev_info_signed *dev_info_signed, u32 nonce); int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); @@ -3845,7 +4090,7 @@ void hl_dec_fini(struct hl_device *hdev); void hl_dec_ctx_fini(struct hl_ctx *ctx); void hl_release_pending_user_interrupts(struct hl_device *hdev); -void hl_abort_waitings_for_completion(struct hl_device *hdev); +void hl_abort_waiting_for_cs_completions(struct hl_device *hdev); int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig); @@ -3860,7 +4105,8 @@ char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); -void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats); +void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg); int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, void *args); struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, @@ -3879,13 +4125,21 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, u64 *event_mask); +void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask); +void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); +void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count); +void hl_enable_err_info_capture(struct hl_error_info *captured_err_info); +void hl_init_cpu_for_irq(struct hl_device *hdev); +void hl_set_irq_affinity(struct hl_device *hdev, int irq); +void hl_eq_heartbeat_event_handle(struct hl_device *hdev); +void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); +void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask); #ifdef CONFIG_DEBUG_FS -void hl_debugfs_init(void); -void hl_debugfs_fini(void); +int hl_debugfs_device_init(struct hl_device *hdev); +void hl_debugfs_device_fini(struct hl_device *hdev); void hl_debugfs_add_device(struct hl_device *hdev); -void hl_debugfs_remove_device(struct hl_device *hdev); void hl_debugfs_add_file(struct hl_fpriv *hpriv); void hl_debugfs_remove_file(struct hl_fpriv *hpriv); void hl_debugfs_add_cb(struct hl_cb *cb); @@ -3901,14 +4155,16 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, unsigned long length); +void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev); #else -static inline void __init hl_debugfs_init(void) +static inline int hl_debugfs_device_init(struct hl_device *hdev) { + return 0; } -static inline void hl_debugfs_fini(void) +static inline void hl_debugfs_device_fini(struct hl_device *hdev) { } @@ -3916,10 +4172,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev) { } -static inline void hl_debugfs_remove_device(struct hl_device *hdev) -{ -} - static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) { } @@ -3979,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev, { } +static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) +{ +} + #endif /* Security */ @@ -4031,11 +4287,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, const u32 pb_blocks[], u32 blocks_array_size); /* IOCTLs */ -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); #endif /* HABANALABSP_H_ */ diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index 03dae57dc838..0035748f3228 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -12,8 +12,13 @@ #include "../include/hw_ip/pci/pci_general.h" #include <linux/pci.h> -#include <linux/aer.h> #include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/version.h> + +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_ioctl.h> #define CREATE_TRACE_POINTS #include <trace/events/habanalabs.h> @@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC); MODULE_LICENSE("GPL v2"); static int hl_major; -static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); @@ -55,8 +59,6 @@ module_param(boot_error_status_mask, ulong, 0444); MODULE_PARM_DESC(boot_error_status_mask, "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); -#define PCI_VENDOR_ID_HABANALABS 0x1da3 - #define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GAUDI 0x1000 #define PCI_IDS_GAUDI_SEC 0x1010 @@ -72,6 +74,41 @@ static const struct pci_device_id ids[] = { }; MODULE_DEVICE_TABLE(pci, ids); +static const struct drm_ioctl_desc hl_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0), +}; + +static const struct file_operations hl_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, + .mmap = hl_mmap +}; + +static const struct drm_driver hl_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL, + + .name = HL_NAME, + .desc = HL_DRIVER_DESC, + .major = LINUX_VERSION_MAJOR, + .minor = LINUX_VERSION_PATCHLEVEL, + .patchlevel = LINUX_VERSION_SUBLEVEL, + + .fops = &hl_fops, + .open = hl_device_open, + .postclose = hl_device_release, + .ioctls = hl_drm_ioctls, + .num_ioctls = ARRAY_SIZE(hl_drm_ioctls) +}; + /* * get_asic_type - translate device id to asic type * @@ -103,6 +140,12 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev) case REV_ID_B: asic_type = ASIC_GAUDI2B; break; + case REV_ID_C: + asic_type = ASIC_GAUDI2C; + break; + case REV_ID_D: + asic_type = ASIC_GAUDI2D; + break; default: break; } @@ -125,43 +168,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type) } /* - * hl_device_open - open function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure + * hl_device_open() - open function for habanalabs device. + * @ddev: pointer to DRM device structure. + * @file: pointer to DRM file private data structure. * * Called when process opens an habanalabs device. */ -int hl_device_open(struct inode *inode, struct file *filp) +int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) { + struct hl_device *hdev = to_hl_device(ddev); enum hl_device_status status; - struct hl_device *hdev; struct hl_fpriv *hpriv; int rc; - mutex_lock(&hl_devs_idr_lock); - hdev = idr_find(&hl_devs_idr, iminor(inode)); - mutex_unlock(&hl_devs_idr_lock); - - if (!hdev) { - pr_err("Couldn't find device %d:%d\n", - imajor(inode), iminor(inode)); - return -ENXIO; - } - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); if (!hpriv) return -ENOMEM; hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); mutex_init(&hpriv->ctx_lock); kref_init(&hpriv->refcount); - nonseekable_open(inode, filp); hl_ctx_mgr_init(&hpriv->ctx_mgr); hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); @@ -221,24 +249,22 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); - atomic_set(&hdev->captured_err_info.razwi_info.razwi_detected, 0); - atomic_set(&hdev->captured_err_info.page_fault_info.page_fault_detected, 0); - hdev->captured_err_info.undef_opcode.write_enable = true; - hdev->captured_err_info.razwi_info.razwi_info_available = false; - hdev->captured_err_info.page_fault_info.page_fault_info_available = false; + hl_enable_err_info_capture(&hdev->captured_err_info); hdev->open_counter++; hdev->last_successful_open_jif = jiffies; hdev->last_successful_open_ktime = ktime_get(); + file_priv->driver_priv = hpriv; + hpriv->file_priv = file_priv; + return 0; out_err: mutex_unlock(&hdev->fpriv_list_lock); - hl_mem_mgr_fini(&hpriv->mem_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr, NULL); + hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->notifier_event.lock); @@ -274,9 +300,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) */ hpriv->hdev = hdev; filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->notifier_event.lock); nonseekable_open(inode, filp); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); @@ -310,7 +334,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) { hdev->nic_ports_mask = 0; hdev->fw_components = FW_TYPE_ALL_TYPES; - hdev->mmu_enable = MMU_EN_ALL; hdev->cpu_queues_enable = 1; hdev->pldm = 0; hdev->hard_reset_on_fw_events = 1; @@ -338,8 +361,7 @@ static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) * a different default timeout for Gaudi */ if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) - hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * - MSEC_PER_SEC); + hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED); hdev->reset_upon_device_release = 0; break; @@ -364,7 +386,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; if (tmp_timeout) - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); + hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; @@ -384,12 +406,36 @@ static int fixup_device_params(struct hl_device *hdev) /* If CPU queues not enabled, no way to do heartbeat */ if (!hdev->cpu_queues_enable) hdev->heartbeat = 0; - fixup_device_params_per_asic(hdev, tmp_timeout); return 0; } +static int allocate_device_id(struct hl_device *hdev) +{ + int id; + + mutex_lock(&hl_devs_idr_lock); + id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); + mutex_unlock(&hl_devs_idr_lock); + + if (id < 0) { + if (id == -ENOSPC) + pr_err("too many devices in the system\n"); + return -EBUSY; + } + + hdev->id = id; + + /* + * Firstly initialized with the internal device ID. + * Will be updated later after the DRM device registration to hold the minor ID. + */ + hdev->cdev_idx = hdev->id; + + return 0; +} + /** * create_hdev - create habanalabs device instance * @@ -402,27 +448,29 @@ static int fixup_device_params(struct hl_device *hdev) */ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) { - int main_id, ctrl_id = 0, rc = 0; struct hl_device *hdev; + int rc; *dev = NULL; - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); - if (!hdev) - return -ENOMEM; + hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); + if (IS_ERR(hdev)) + return PTR_ERR(hdev); + + hdev->dev = hdev->drm.dev; /* Will be NULL in case of simulator device */ hdev->pdev = pdev; /* Assign status description string */ - strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], - "in device creation", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], - "in reset after device release", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], + "in device creation", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], + "in reset after device release", HL_STR_MAX); /* First, we must find out which ASIC are we handling. This is needed @@ -432,7 +480,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) if (hdev->asic_type == ASIC_INVALID) { dev_err(&pdev->dev, "Unsupported ASIC\n"); rc = -ENODEV; - goto free_hdev; + goto out_err; } copy_kernel_module_params_to_device(hdev); @@ -441,42 +489,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) fixup_device_params(hdev); - mutex_lock(&hl_devs_idr_lock); - - /* Always save 2 numbers, 1 for main device and 1 for control. - * They must be consecutive - */ - main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); - - if (main_id >= 0) - ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, - main_id + 2, GFP_KERNEL); - - mutex_unlock(&hl_devs_idr_lock); - - if ((main_id < 0) || (ctrl_id < 0)) { - if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) - pr_err("too many devices in the system\n"); - - if (main_id >= 0) { - mutex_lock(&hl_devs_idr_lock); - idr_remove(&hl_devs_idr, main_id); - mutex_unlock(&hl_devs_idr_lock); - } - - rc = -EBUSY; - goto free_hdev; - } - - hdev->id = main_id; - hdev->id_control = ctrl_id; + rc = allocate_device_id(hdev); + if (rc) + goto out_err; *dev = hdev; return 0; -free_hdev: - kfree(hdev); +out_err: return rc; } @@ -491,10 +512,8 @@ static void destroy_hdev(struct hl_device *hdev) /* Remove device from the device list */ mutex_lock(&hl_devs_idr_lock); idr_remove(&hl_devs_idr, hdev->id); - idr_remove(&hl_devs_idr, hdev->id_control); mutex_unlock(&hl_devs_idr_lock); - kfree(hdev); } static int hl_pmops_suspend(struct device *dev) @@ -550,9 +569,7 @@ static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, hdev); - pci_enable_pcie_error_reporting(pdev); - - rc = hl_device_init(hdev, hl_class); + rc = hl_device_init(hdev); if (rc) { dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); rc = -ENODEV; @@ -562,7 +579,6 @@ static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; disable_device: - pci_disable_pcie_error_reporting(pdev); pci_set_drvdata(pdev, NULL); destroy_hdev(hdev); @@ -585,7 +601,6 @@ static void hl_pci_remove(struct pci_dev *pdev) return; hl_device_fini(hdev); - pci_disable_pcie_error_reporting(pdev); pci_set_drvdata(pdev, NULL); destroy_hdev(hdev); } @@ -659,6 +674,38 @@ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } +static void hl_pci_reset_prepare(struct pci_dev *pdev) +{ + struct hl_device *hdev; + + hdev = pci_get_drvdata(pdev); + if (!hdev) + return; + + hdev->disabled = true; +} + +static void hl_pci_reset_done(struct pci_dev *pdev) +{ + struct hl_device *hdev; + u32 flags; + + hdev = pci_get_drvdata(pdev); + if (!hdev) + return; + + /* + * Schedule a thread to trigger hard reset. + * The reason for this handler, is for rare cases where the driver is up + * and FLR occurs. This is valid only when working with no VM, so FW handles FLR + * and resets the device. FW will go back preboot stage, so driver needs to perform + * hard reset in order to load FW fit again. + */ + flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; + + hl_device_reset(hdev, flags); +} + static const struct dev_pm_ops hl_pm_ops = { .suspend = hl_pmops_suspend, .resume = hl_pmops_resume, @@ -668,6 +715,8 @@ static const struct pci_error_handlers hl_pci_err_handler = { .error_detected = hl_pci_err_detected, .slot_reset = hl_pci_err_slot_reset, .resume = hl_pci_err_resume, + .reset_prepare = hl_pci_reset_prepare, + .reset_done = hl_pci_reset_done, }; static struct pci_driver hl_pci_driver = { @@ -702,28 +751,16 @@ static int __init hl_init(void) hl_major = MAJOR(dev); - hl_class = class_create(THIS_MODULE, HL_NAME); - if (IS_ERR(hl_class)) { - pr_err("failed to allocate class\n"); - rc = PTR_ERR(hl_class); - goto remove_major; - } - - hl_debugfs_init(); - rc = pci_register_driver(&hl_pci_driver); if (rc) { pr_err("failed to register pci device\n"); - goto remove_debugfs; + goto remove_major; } pr_debug("driver loaded\n"); return 0; -remove_debugfs: - hl_debugfs_fini(); - class_destroy(hl_class); remove_major: unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); return rc; @@ -736,14 +773,6 @@ static void __exit hl_exit(void) { pci_unregister_driver(&hl_pci_driver); - /* - * Removing debugfs must be after all devices or simulator devices - * have been removed because otherwise we get a bug in the - * debugfs module for referencing NULL objects - */ - hl_debugfs_fini(); - - class_destroy(hl_class); unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); idr_destroy(&hl_devs_idr); diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index 5005e6fca691..fdfdabc85e54 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -17,6 +17,9 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> +/* make sure there is space for all the signed info */ +static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ); + static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), @@ -62,7 +65,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev); hw_ip.sram_base_address = prop->sram_user_base_address; hw_ip.dram_base_address = - hdev->mmu_enable && prop->dram_supports_virtual_memory ? + prop->dram_supports_virtual_memory ? prop->dmmu.start_addr : prop->dram_user_base_address; hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask & 0xFF; hw_ip.tpc_enabled_mask_ext = prop->tpc_enabled_mask; @@ -71,11 +74,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) dram_available_size = prop->dram_size - dram_kmd_size; - if (hdev->mmu_enable == MMU_EN_ALL) - hw_ip.dram_size = DIV_ROUND_DOWN_ULL(dram_available_size, - prop->dram_page_size) * prop->dram_page_size; - else - hw_ip.dram_size = dram_available_size; + hw_ip.dram_size = DIV_ROUND_DOWN_ULL(dram_available_size, prop->dram_page_size) * + prop->dram_page_size; if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; @@ -102,11 +102,15 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.mme_master_slave_mode = prop->mme_master_slave_mode; hw_ip.first_available_interrupt_id = prop->first_available_user_interrupt; hw_ip.number_of_user_interrupts = prop->user_interrupt_count; + hw_ip.tpc_interrupt_id = prop->tpc_interrupt_id; hw_ip.edma_enabled_mask = prop->edma_enabled_mask; hw_ip.server_type = prop->server_type; hw_ip.security_enabled = prop->fw_security_enabled; hw_ip.revision_id = hdev->pdev->revision; + hw_ip.rotator_enabled_mask = prop->rotator_enabled_mask; + hw_ip.engine_core_interrupt_reg_addr = prop->engine_core_interrupt_reg_addr; + hw_ip.reserved_dram_size = dram_kmd_size; return copy_to_user(out, &hw_ip, min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; @@ -319,6 +323,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); time_sync.host_time = ktime_get_raw_ns(); + time_sync.tsc_time = rdtsc(); return copy_to_user(out, &time_sync, min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; @@ -681,7 +686,7 @@ static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if (!sec_attest_info) return -ENOMEM; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; goto free_sec_attest_info; @@ -715,6 +720,53 @@ free_sec_attest_info: return rc; } +static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct cpucp_dev_info_signed *dev_info_signed; + struct hl_info_signed *info; + u32 max_size = args->return_size; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL); + if (!dev_info_signed) + return -ENOMEM; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto free_dev_info_signed; + } + + rc = hl_fw_get_dev_info_signed(hpriv->hdev, + dev_info_signed, args->sec_attest_nonce); + if (rc) + goto free_info; + + info->nonce = le32_to_cpu(dev_info_signed->nonce); + info->info_sig_len = dev_info_signed->info_sig_len; + info->pub_data_len = le16_to_cpu(dev_info_signed->pub_data_len); + info->certificate_len = le16_to_cpu(dev_info_signed->certificate_len); + info->dev_info_len = sizeof(struct cpucp_info); + memcpy(&info->info_sig, &dev_info_signed->info_sig, sizeof(info->info_sig)); + memcpy(&info->public_data, &dev_info_signed->public_data, sizeof(info->public_data)); + memcpy(&info->certificate, &dev_info_signed->certificate, sizeof(info->certificate)); + memcpy(&info->dev_info, &dev_info_signed->info, info->dev_info_len); + + rc = copy_to_user(out, info, min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0; + +free_info: + kfree(info); +free_dev_info_signed: + kfree(dev_info_signed); + + return rc; +} + + static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) { int rc; @@ -830,6 +882,72 @@ static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args) return copy_to_user(out, pgf_info->user_mappings, actual_size) ? -EFAULT : 0; } +static int hw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer; + struct hl_device *hdev = hpriv->hdev; + u32 user_buf_size = args->return_size; + struct hw_err_info *info; + int rc; + + if (!user_buf) + return -EINVAL; + + info = &hdev->captured_err_info.hw_err; + if (!info->event_info_available) + return 0; + + if (user_buf_size < sizeof(struct hl_info_hw_err_event)) + return -ENOMEM; + + rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_hw_err_event)); + return rc ? -EFAULT : 0; +} + +static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer; + struct hl_device *hdev = hpriv->hdev; + u32 user_buf_size = args->return_size; + struct fw_err_info *info; + int rc; + + if (!user_buf) + return -EINVAL; + + info = &hdev->captured_err_info.fw_err; + if (!info->event_info_available) + return 0; + + if (user_buf_size < sizeof(struct hl_info_fw_err_event)) + return -ENOMEM; + + rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_fw_err_event)); + return rc ? -EFAULT : 0; +} + +static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer; + struct hl_device *hdev = hpriv->hdev; + u32 user_buf_size = args->return_size; + struct engine_err_info *info; + int rc; + + if (!user_buf) + return -EINVAL; + + info = &hdev->captured_err_info.engine_err; + if (!info->event_info_available) + return 0; + + if (user_buf_size < sizeof(struct hl_info_engine_err_event)) + return -ENOMEM; + + rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event)); + return rc ? -EFAULT : 0; +} + static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args) { void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer; @@ -843,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args * case HL_PASSTHROUGH_VERSIONS: need_input_buff = false; break; + case HL_GET_ERR_COUNTERS_CMD: + need_input_buff = true; + break; + case HL_GET_P_STATE: + need_input_buff = false; + break; default: return -EINVAL; } @@ -950,6 +1074,17 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_UNREGISTER_EVENTFD: return eventfd_unregister(hpriv, args); + case HL_INFO_HW_ERR_EVENT: + return hw_err_info(hpriv, args); + + case HL_INFO_FW_ERR_EVENT: + return fw_err_info(hpriv, args); + + case HL_INFO_USER_ENGINE_ERR_EVENT: + return engine_err_info(hpriv, args); + + case HL_INFO_DRAM_USAGE: + return dram_usage_info(hpriv, args); default: break; } @@ -962,10 +1097,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, } switch (args->op) { - case HL_INFO_DRAM_USAGE: - rc = dram_usage_info(hpriv, args); - break; - case HL_INFO_HW_IDLE: rc = hw_idle(hdev, args); break; @@ -1012,6 +1143,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_FW_GENERIC_REQ: return send_fw_generic_request(hdev, args); + case HL_INFO_DEV_SIGNED: + return dev_info_signed(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; @@ -1021,20 +1155,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, return rc; } -static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); } static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) { + struct hl_info_args *args = data; + + switch (args->op) { + case HL_INFO_GET_EVENTS: + case HL_INFO_UNREGISTER_EVENTFD: + case HL_INFO_REGISTER_EVENTFD: + return -EOPNOTSUPP; + default: + break; + } + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); } -static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { - struct hl_debug_args *args = data; + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; + struct hl_debug_args *args = data; enum hl_device_status status; int rc = 0; @@ -1077,25 +1225,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) } #define HL_IOCTL_DEF(ioctl, _func) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} - -static const struct hl_ioctl_desc hl_ioctls[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), - HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl), - HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), - HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) -}; + [_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func} static const struct hl_ioctl_desc hl_ioctls_control[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) + HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control) }; -static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, - const struct hl_ioctl_desc *ioctl, struct device *dev) +static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg, + const struct hl_ioctl_desc *ioctl, struct device *dev) { - struct hl_fpriv *hpriv = filep->private_data; unsigned int nr = _IOC_NR(cmd); char stack_kdata[128] = {0}; char *kdata = NULL; @@ -1146,8 +1284,9 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, out_err: if (retcode) - dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", - task_pid_nr(current), cmd, nr); + dev_dbg_ratelimited(dev, + "error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", + task_pid_nr(current), current->comm, cmd, nr); if (kdata != stack_kdata) kfree(kdata); @@ -1155,29 +1294,6 @@ out_err: return retcode; } -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) -{ - struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; - const struct hl_ioctl_desc *ioctl = NULL; - unsigned int nr = _IOC_NR(cmd); - - if (!hdev) { - pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); - return -ENODEV; - } - - if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { - ioctl = &hl_ioctls[nr]; - } else { - dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); - return -ENOTTY; - } - - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); -} - long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) { struct hl_fpriv *hpriv = filep->private_data; @@ -1190,13 +1306,14 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) return -ENODEV; } - if (nr == _IOC_NR(HL_IOCTL_INFO)) { - ioctl = &hl_ioctls_control[nr]; + if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) { + ioctl = &hl_ioctls_control[nr - HL_COMMAND_START]; } else { - dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); + dev_dbg_ratelimited(hdev->dev_ctrl, + "invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", + task_pid_nr(current), current->comm, cmd, nr); return -ENOTTY; } - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); + return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl); } diff --git a/drivers/accel/habanalabs/common/hldio.c b/drivers/accel/habanalabs/common/hldio.c new file mode 100644 index 000000000000..083ae5610875 --- /dev/null +++ b/drivers/accel/habanalabs/common/hldio.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2024 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "hldio.h" +#include <generated/uapi/linux/version.h> +#include <linux/pci-p2pdma.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> + +/* + * NVMe Direct I/O implementation for habanalabs driver + * + * ASSUMPTIONS + * =========== + * 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless). + * 2. Only READ operations (can extend in the future). + * 3. No sparse files (can overcome this in the future). + * 4. Kernel version >= 6.9 + * 5. Requiring page alignment is OK (I don't see a solution to this one right, + * now, how do we read partial pages?) + * 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel. + * Theoretically I have a slight idea on how this could be solvable, but it + * is probably inacceptable for the upstream. Also may not work in the end. + * 7. Either make sure our cards and disks are under the same PCI bridge, or + * compile a custom kernel to hack around this. + */ + +#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */ + +/* + * This struct contains all the useful data I could milk out of the file handle + * provided by the user. + * @TODO: right now it is retrieved on each IO, but can be done once with some + * dedicated IOCTL, call it for example HL_REGISTER_HANDLE. + */ +struct hl_dio_fd { + /* Back pointer in case we need it in async completion */ + struct hl_ctx *ctx; + /* Associated fd struct */ + struct file *filp; +}; + +/* + * This is a single IO descriptor + */ +struct hl_direct_io { + struct hl_dio_fd f; + struct kiocb kio; + struct bio_vec *bv; + struct iov_iter iter; + u64 device_va; + u64 off_bytes; + u64 len_bytes; + u32 type; +}; + +bool hl_device_supports_nvme(struct hl_device *hdev) +{ + return hdev->asic_prop.supports_nvme; +} + +static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f) +{ + struct hl_device *hdev = ctx->hdev; + struct block_device *bd; + struct super_block *sb; + struct inode *inode; + struct gendisk *gd; + struct device *disk_dev; + int rc; + + f->filp = fget(fd); + if (!f->filp) { + rc = -ENOENT; + goto out; + } + + if (!(f->filp->f_flags & O_DIRECT)) { + dev_err(hdev->dev, "file is not in the direct mode\n"); + rc = -EINVAL; + goto fput; + } + + if (!f->filp->f_op->read_iter) { + dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n"); + rc = -EINVAL; + goto fput; + } + + inode = file_inode(f->filp); + sb = inode->i_sb; + bd = sb->s_bdev; + gd = bd->bd_disk; + + if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) { + dev_err(hdev->dev, "sparse files are not currently supported\n"); + rc = -EINVAL; + goto fput; + } + + if (!bd || !gd) { + dev_err(hdev->dev, "invalid block device\n"); + rc = -ENODEV; + goto fput; + } + /* Get the underlying device from the block device */ + disk_dev = disk_to_dev(gd); + if (!dma_pci_p2pdma_supported(disk_dev)) { + dev_err(hdev->dev, "device does not support PCI P2P DMA\n"); + rc = -EOPNOTSUPP; + goto fput; + } + + /* + * @TODO: Maybe we need additional checks here + */ + + f->ctx = ctx; + rc = 0; + + goto out; +fput: + fput(f->filp); +out: + return rc; +} + +static void hl_dio_fd_unregister(struct hl_dio_fd *f) +{ + fput(f->filp); +} + +static long hl_dio_count_io(struct hl_device *hdev) +{ + s64 sum = 0; + int i; + + for_each_possible_cpu(i) + sum += per_cpu(*hdev->hldio.inflight_ios, i); + + return sum; +} + +static bool hl_dio_get_iopath(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->hldio.io_enabled) { + this_cpu_inc(*hdev->hldio.inflight_ios); + + /* Avoid race conditions */ + if (!hdev->hldio.io_enabled) { + this_cpu_dec(*hdev->hldio.inflight_ios); + return false; + } + + hl_ctx_get(ctx); + + return true; + } + + return false; +} + +static void hl_dio_put_iopath(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + hl_ctx_put(ctx); + this_cpu_dec(*hdev->hldio.inflight_ios); +} + +static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled) +{ + hdev->hldio.io_enabled = enabled; +} + +static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io) +{ + if ((u64)io->device_va & ~PAGE_MASK) { + dev_dbg(hdev->dev, "device address must be 4K aligned\n"); + return false; + } + + if (io->len_bytes & ~PAGE_MASK) { + dev_dbg(hdev->dev, "IO length must be 4K aligned\n"); + return false; + } + + if (io->off_bytes & ~PAGE_MASK) { + dev_dbg(hdev->dev, "IO offset must be 4K aligned\n"); + return false; + } + + return true; +} + +static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va) +{ + struct hl_dio *hldio = &hdev->hldio; + u64 device_pa; + int rc, i; + + rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa); + if (rc) { + dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)", + device_va, rc); + return NULL; + } + + for (i = 0 ; i < hldio->np2prs ; ++i) { + if (device_pa >= hldio->p2prs[i].device_pa && + device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size) + return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >> + PAGE_SHIFT]; + } + + return NULL; +} + +static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io) +{ + u64 npages, device_va; + ssize_t rc; + int i; + + if (!hl_dio_validate_io(hdev, io)) + return -EINVAL; + + if (!hl_dio_get_iopath(io->f.ctx)) { + dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n"); + return -ESHUTDOWN; + } + + init_sync_kiocb(&io->kio, io->f.filp); + io->kio.ki_pos = io->off_bytes; + + npages = (io->len_bytes >> PAGE_SHIFT); + + /* @TODO: this can be implemented smarter, vmalloc in iopath is not + * ideal. Maybe some variation of genpool. Number of pages may differ + * greatly, so maybe even use pools of different sizes and chose the + * closest one. + */ + io->bv = vzalloc(npages * sizeof(struct bio_vec)); + if (!io->bv) + return -ENOMEM; + + for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) { + io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va); + if (!io->bv[i].bv_page) { + dev_err(hdev->dev, "error getting page struct for device va %#llx", + device_va); + rc = -EFAULT; + goto cleanup; + } + io->bv[i].bv_offset = 0; + io->bv[i].bv_len = PAGE_SIZE; + } + + iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes); + if (io->f.filp->f_op && io->f.filp->f_op->read_iter) + rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter); + else + rc = -EINVAL; + +cleanup: + vfree(io->bv); + hl_dio_put_iopath(io->f.ctx); + + dev_dbg(hdev->dev, "IO ended with %ld\n", rc); + + return rc; +} + +/* + * @TODO: This function can be used as a callback for io completion under + * kio->ki_complete in order to implement async IO. + * Note that on more recent kernels there is no ret2. + */ +__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2) +{ + struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio); + + dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret); + + /* Do something to copy result to user / notify completion */ + + hl_dio_put_iopath(io->f.ctx); + + hl_dio_fd_unregister(&io->f); +} + +/* + * DMA disk to ASIC, wait for results. Must be invoked from the user context + */ +int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read) +{ + struct hl_direct_io *io; + ssize_t rc; + + dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes); + + io = kzalloc(sizeof(*io), GFP_KERNEL); + if (!io) { + rc = -ENOMEM; + goto out; + } + + *io = (struct hl_direct_io){ + .device_va = device_va, + .len_bytes = len_bytes, + .off_bytes = off_bytes, + .type = READ, + }; + + rc = hl_dio_fd_register(ctx, fd, &io->f); + if (rc) + goto kfree_io; + + rc = hl_direct_io(hdev, io); + if (rc >= 0) { + *len_read = rc; + rc = 0; + } + + /* This shall be called only in the case of a sync IO */ + hl_dio_fd_unregister(&io->f); +kfree_io: + kfree(io); +out: + return rc; +} + +static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ + if (p2pr->p2ppages) { + vfree(p2pr->p2ppages); + p2pr->p2ppages = NULL; + } + + if (p2pr->p2pmem) { + dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n", + p2pr->p2pmem, p2pr->size); + pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size); + p2pr->p2pmem = NULL; + } +} + +void hl_p2p_region_fini_all(struct hl_device *hdev) +{ + int i; + + for (i = 0 ; i < hdev->hldio.np2prs ; ++i) + hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]); + + kvfree(hdev->hldio.p2prs); + hdev->hldio.p2prs = NULL; + hdev->hldio.np2prs = 0; +} + +int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ + void *addr; + int rc, i; + + /* Start by publishing our p2p memory */ + rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset); + if (rc) { + dev_err(hdev->dev, "error adding p2p resource: %d\n", rc); + goto err; + } + + /* Alloc all p2p mem */ + p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size); + if (!p2pr->p2pmem) { + dev_err(hdev->dev, "error allocating p2p memory\n"); + rc = -ENOMEM; + goto err; + } + + p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *)); + if (!p2pr->p2ppages) { + rc = -ENOMEM; + goto err; + } + + for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) { + p2pr->p2ppages[i] = virt_to_page(addr); + if (!p2pr->p2ppages[i]) { + rc = -EFAULT; + goto err; + } + } + + return 0; +err: + hl_p2p_region_fini(hdev, p2pr); + return rc; +} + +int hl_dio_start(struct hl_device *hdev) +{ + dev_dbg(hdev->dev, "initializing HLDIO\n"); + + /* Initialize the IO counter and enable IO */ + hdev->hldio.inflight_ios = alloc_percpu(s64); + if (!hdev->hldio.inflight_ios) + return -ENOMEM; + + hl_dio_set_io_enabled(hdev, true); + + return 0; +} + +void hl_dio_stop(struct hl_device *hdev) +{ + dev_dbg(hdev->dev, "deinitializing HLDIO\n"); + + if (hdev->hldio.io_enabled) { + /* Wait for all the IO to finish */ + hl_dio_set_io_enabled(hdev, false); + hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT); + } + + if (hdev->hldio.inflight_ios) { + free_percpu(hdev->hldio.inflight_ios); + hdev->hldio.inflight_ios = NULL; + } +} diff --git a/drivers/accel/habanalabs/common/hldio.h b/drivers/accel/habanalabs/common/hldio.h new file mode 100644 index 000000000000..2874388f2851 --- /dev/null +++ b/drivers/accel/habanalabs/common/hldio.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver + * + * This feature requires specific hardware setup and must not be built + * under COMPILE_TEST. + */ + +#ifndef __HL_HLDIO_H__ +#define __HL_HLDIO_H__ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/ktime.h> /* ktime functions */ +#include <linux/delay.h> /* usleep_range */ +#include <linux/kernel.h> /* might_sleep_if */ +#include <linux/errno.h> /* error codes */ + +/* Forward declarations */ +struct hl_device; +struct file; + +/* Enable only if Kconfig selected */ +#ifdef CONFIG_HL_HLDIO +/** + * struct hl_p2p_region - describes a single P2P memory region + * @p2ppages: array of page structs for the P2P memory + * @p2pmem: virtual address of the P2P memory region + * @device_pa: physical address on the device + * @bar_offset: offset within the BAR + * @size: size of the region in bytes + * @bar: BAR number containing this region + */ +struct hl_p2p_region { + struct page **p2ppages; + void *p2pmem; + u64 device_pa; + u64 bar_offset; + u64 size; + int bar; +}; + +/** + * struct hl_dio_stats - Direct I/O statistics + * @total_ops: total number of operations attempted + * @successful_ops: number of successful operations + * @failed_ops: number of failed operations + * @bytes_transferred: total bytes successfully transferred + * @last_len_read: length of the last read operation + */ +struct hl_dio_stats { + u64 total_ops; + u64 successful_ops; + u64 failed_ops; + u64 bytes_transferred; + size_t last_len_read; +}; + +/** + * struct hl_dio - describes habanalabs direct storage interaction interface + * @p2prs: array of p2p regions + * @inflight_ios: percpu counter for inflight ios + * @np2prs: number of elements in p2prs + * @io_enabled: 1 if io is enabled 0 otherwise + */ +struct hl_dio { + struct hl_p2p_region *p2prs; + s64 __percpu *inflight_ios; + u8 np2prs; + u8 io_enabled; +}; + +int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read); +void hl_p2p_region_fini_all(struct hl_device *hdev); +int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr); +int hl_dio_start(struct hl_device *hdev); +void hl_dio_stop(struct hl_device *hdev); + +/* Init/teardown */ +int hl_hldio_init(struct hl_device *hdev); +void hl_hldio_fini(struct hl_device *hdev); + +/* File operations */ +long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +/* DebugFS hooks */ +#ifdef CONFIG_DEBUG_FS +void hl_hldio_debugfs_init(struct hl_device *hdev); +void hl_hldio_debugfs_fini(struct hl_device *hdev); +#else +static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } +static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } +#endif + +#else /* !CONFIG_HL_HLDIO */ + +struct hl_p2p_region; +/* Stubs when HLDIO is disabled */ +static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read) +{ return -EOPNOTSUPP; } +static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {} +static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ return -EOPNOTSUPP; } +static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; } +static inline void hl_dio_stop(struct hl_device *hdev) {} + +static inline int hl_hldio_init(struct hl_device *hdev) { return 0; } +static inline void hl_hldio_fini(struct hl_device *hdev) { } +static inline long hl_hldio_ioctl(struct file *f, unsigned int c, + unsigned long a) +{ return -ENOTTY; } +static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } +static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } + +#endif /* CONFIG_HL_HLDIO */ + +/* Simplified polling macro for HLDIO (no simulator support) */ +#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \ +({ \ + ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \ + might_sleep_if(sleep_us); \ + (void)(hdev); /* keep signature consistent, hdev unused */ \ + for (;;) { \ + mb(); /* ensure ordering of memory operations */ \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \ + break; \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +#ifdef CONFIG_HL_HLDIO +bool hl_device_supports_nvme(struct hl_device *hdev); +#else +static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; } +#endif + +#endif /* __HL_HLDIO_H__ */ diff --git a/drivers/accel/habanalabs/common/hw_queue.c b/drivers/accel/habanalabs/common/hw_queue.c index d0087c0ec48c..3d04a7507cce 100644 --- a/drivers/accel/habanalabs/common/hw_queue.c +++ b/drivers/accel/habanalabs/common/hw_queue.c @@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; + u64 addr; + int i; bd = q->kernel_address; bd += hl_pi_2_offset(q->pi); @@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, bd->len = cpu_to_le32(len); bd->ptr = cpu_to_le64(ptr); + if (q->dram_bd) + for (i = 0 ; i < 2 ; i++) { + addr = q->pq_dram_address + + ((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64))); + hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr, + (u64 *)(bd) + i, DEBUGFS_WRITE64); + } + q->pi = hl_queue_inc_ptr(q->pi); + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); } @@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev) q->supports_sync_stream = asic->hw_queues_props[i].supports_sync_stream; q->collective_mode = asic->hw_queues_props[i].collective_mode; + q->dram_bd = asic->hw_queues_props[i].dram_bd; + rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, "failed to initialize queue %d\n", i); goto release_queues; } + + /* Set DRAM PQ address for the queue if it should be at DRAM */ + if (q->dram_bd) + q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address; } return 0; diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c index 55eb0203817f..52d1e6bf10dc 100644 --- a/drivers/accel/habanalabs/common/hwmon.c +++ b/drivers/accel/habanalabs/common/hwmon.c @@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types break; default: - dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type); flags = cpucp_flags; break; } @@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty break; default: - dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type); flags = cpucp_flags; break; } @@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen break; if (type >= HWMON_NR_SENSOR_TYPES) { - dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type); + dev_err_ratelimited(hdev->dev, + "Got wrong sensor type %d from device\n", type); return -EINVAL; } @@ -578,19 +579,16 @@ int hl_get_temperature(struct hl_device *hdev, CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); - - dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n", - pkt.ctl, pkt.sensor_index, pkt.type); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get temperature from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get temperature from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -613,9 +611,8 @@ int hl_set_temperature(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set temperature of sensor %d, error %d\n", sensor_index, rc); @@ -642,9 +639,10 @@ int hl_get_voltage(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get voltage from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get voltage from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -671,9 +669,10 @@ int hl_get_current(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get current from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get current from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -700,9 +699,10 @@ int hl_get_fan_speed(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get fan speed from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get fan speed from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -729,9 +729,10 @@ int hl_get_pwm_info(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get pwm info from sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get pwm info from sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -754,9 +755,8 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set pwm info to sensor %d, error %d\n", sensor_index, rc); } @@ -777,9 +777,8 @@ int hl_set_voltage(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set voltage of sensor %d, error %d\n", sensor_index, rc); @@ -800,11 +799,9 @@ int hl_set_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); - - if (rc) - dev_err(hdev->dev, + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set current of sensor %d, error %d\n", sensor_index, rc); @@ -833,9 +830,8 @@ int hl_set_power(struct hl_device *hdev, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); - - if (rc) - dev_err(hdev->dev, + if (rc && rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, "Failed to set power of sensor %d, error %d\n", sensor_index, rc); @@ -862,9 +858,10 @@ int hl_get_power(struct hl_device *hdev, *value = (long) result; if (rc) { - dev_err(hdev->dev, - "Failed to get power of sensor %d, error %d\n", - sensor_index, rc); + if (rc != -EAGAIN) + dev_err_ratelimited(hdev->dev, + "Failed to get power of sensor %d, error %d\n", + sensor_index, rc); *value = 0; } @@ -914,7 +911,7 @@ void hl_hwmon_fini(struct hl_device *hdev) void hl_hwmon_release_resources(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; + const struct hwmon_channel_info * const *channel_info_arr; int i = 0; if (!hdev->hl_chip_info->info) diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index 04844e843a7b..7c9f2f6a2870 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work) { struct timestamp_reg_work_obj *job = container_of(work, struct timestamp_reg_work_obj, free_obj); + struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head; struct timestamp_reg_free_node *free_obj, *temp_free_obj; struct list_head *free_list_head = job->free_obj_head; + struct hl_device *hdev = job->hdev; list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { @@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work) hl_mmap_mem_buf_put(free_obj->buf); hl_cb_put(free_obj->cq_cb); - kfree(free_obj); + atomic_set(&free_obj->in_use, 0); } kfree(free_list_head); + + if (dynamic_alloc_free_list_head) { + list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head, + free_objects_node) { + dev_dbg(hdev->dev, + "Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n", + free_obj->buf, + free_obj->cq_cb); + + hl_mmap_mem_buf_put(free_obj->buf); + hl_cb_put(free_obj->cq_cb); + list_del(&free_obj->free_objects_node); + kfree(free_obj); + } + + kfree(dynamic_alloc_free_list_head); + } + kfree(job); } @@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work) * list to a dedicated workqueue to do the actual put. */ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, - struct list_head **free_list, ktime_t now) + struct list_head **free_list, + struct list_head **dynamic_alloc_list, + struct hl_user_interrupt *intr) { + struct hl_ts_free_jobs *ts_free_jobs_data; struct timestamp_reg_free_node *free_node; + u32 free_node_index; u64 timestamp; + ts_free_jobs_data = &intr->ts_free_jobs_data; + free_node_index = ts_free_jobs_data->next_avail_free_node_idx; + if (!(*free_list)) { /* Alloc/Init the timestamp registration free objects list */ *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); @@ -247,40 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi INIT_LIST_HEAD(*free_list); } - free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); - if (!free_node) - return -ENOMEM; + free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index]; + if (atomic_cmpxchg(&free_node->in_use, 0, 1)) { + dev_dbg(hdev->dev, + "Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n", + pend->ts_reg_info.buf, + pend, + intr->interrupt_id); - timestamp = ktime_to_ns(now); + if (!(*dynamic_alloc_list)) { + *dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + if (!(*dynamic_alloc_list)) + return -ENOMEM; - *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + INIT_LIST_HEAD(*dynamic_alloc_list); + } - dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", - pend->ts_reg_info.timestamp_kernel_addr, - *(u64 *)pend->ts_reg_info.timestamp_kernel_addr); + free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC); + if (!free_node) + return -ENOMEM; + + free_node->dynamic_alloc = 1; + } - list_del(&pend->wait_list_node); + timestamp = ktime_to_ns(intr->timestamp); - /* Mark kernel CB node as free */ - pend->ts_reg_info.in_use = 0; + *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + + dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n", + pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id); + + list_del(&pend->list_node); /* Putting the refcount for ts_buff and cq_cb objects will be handled * in workqueue context, just add job to free_list. */ free_node->buf = pend->ts_reg_info.buf; free_node->cq_cb = pend->ts_reg_info.cq_cb; - list_add(&free_node->free_objects_node, *free_list); + + if (free_node->dynamic_alloc) { + list_add(&free_node->free_objects_node, *dynamic_alloc_list); + } else { + ts_free_jobs_data->next_avail_free_node_idx = + (++free_node_index) % ts_free_jobs_data->free_nodes_length; + list_add(&free_node->free_objects_node, *free_list); + } + + /* Mark TS record as free */ + pend->ts_reg_info.in_use = false; return 0; } -static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr) +static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr) { + struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL; struct hl_user_pending_interrupt *pend, *temp_pend; - struct list_head *ts_reg_free_list_head = NULL; struct timestamp_reg_work_obj *job; bool reg_node_handle_fail = false; - ktime_t now = ktime_get(); + unsigned long flags; int rc; /* For registration nodes: @@ -289,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru * or in irq handler context at all (since release functions are long and * might sleep), so we will need to handle that part in workqueue context. * To avoid handling kmalloc failure which compels us rolling back actions - * and move nodes hanged on the free list back to the interrupt wait list + * and move nodes hanged on the free list back to the interrupt ts list * we always alloc the job of the WQ at the beginning. */ job = kmalloc(sizeof(*job), GFP_ATOMIC); if (!job) return; - spin_lock(&intr->wait_list_lock); - list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) { + spin_lock_irqsave(&intr->ts_list_lock, flags); + list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) { if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { - if (pend->ts_reg_info.buf) { - if (!reg_node_handle_fail) { - rc = handle_registration_node(hdev, pend, - &ts_reg_free_list_head, now); - if (rc) - reg_node_handle_fail = true; - } - } else { - /* Handle wait target value node */ - pend->fence.timestamp = now; - complete_all(&pend->fence.completion); + if (!reg_node_handle_fail) { + rc = handle_registration_node(hdev, pend, + &ts_reg_free_list_head, + &dynamic_alloc_list_head, intr); + if (rc) + reg_node_handle_fail = true; } } } - spin_unlock(&intr->wait_list_lock); + spin_unlock_irqrestore(&intr->ts_list_lock, flags); if (ts_reg_free_list_head) { INIT_WORK(&job->free_obj, hl_ts_free_objects); job->free_obj_head = ts_reg_free_list_head; + job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head; job->hdev = hdev; queue_work(hdev->ts_free_obj_wq, &job->free_obj); } else { @@ -326,30 +374,70 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru } } +static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr) +{ + struct hl_user_pending_interrupt *pend, *temp_pend; + unsigned long flags; + + spin_lock_irqsave(&intr->wait_list_lock, flags); + list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) { + if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || + !pend->cq_kernel_addr) { + /* Handle wait target value node */ + pend->fence.timestamp = intr->timestamp; + complete_all(&pend->fence.completion); + } + } + spin_unlock_irqrestore(&intr->wait_list_lock, flags); +} + +static void handle_tpc_interrupt(struct hl_device *hdev) +{ + u64 event_mask; + u32 flags; + + event_mask = HL_NOTIFIER_EVENT_TPC_ASSERT | + HL_NOTIFIER_EVENT_USER_ENGINE_ERR | + HL_NOTIFIER_EVENT_DEVICE_RESET; + + flags = HL_DRV_RESET_DELAY; + + dev_err_ratelimited(hdev->dev, "Received TPC assert\n"); + hl_device_cond_reset(hdev, flags, event_mask); +} + +static void handle_unexpected_user_interrupt(struct hl_device *hdev) +{ + dev_err_ratelimited(hdev->dev, "Received unexpected user error interrupt\n"); +} + /** - * hl_irq_handler_user_interrupt - irq handler for user interrupts + * hl_irq_user_interrupt_handler - irq handler for user interrupts. * * @irq: irq number * @arg: pointer to user interrupt structure - * */ -irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) +irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg) { struct hl_user_interrupt *user_int = arg; struct hl_device *hdev = user_int->hdev; + user_int->timestamp = ktime_get(); switch (user_int->type) { case HL_USR_INTERRUPT_CQ: - handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt); + /* First handle user waiters threads */ + handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt); + handle_user_interrupt_wait_list(hdev, user_int); - /* Handle user cq interrupt registered on this specific irq */ - handle_user_interrupt(hdev, user_int); + /* Second handle user timestamp registrations */ + handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt); + handle_user_interrupt_ts_list(hdev, user_int); break; case HL_USR_INTERRUPT_DECODER: - handle_user_interrupt(hdev, &hdev->common_decoder_interrupt); + handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt); /* Handle decoder interrupt registered on this specific irq */ - handle_user_interrupt(hdev, user_int); + handle_user_interrupt_wait_list(hdev, user_int); break; default: break; @@ -359,19 +447,41 @@ irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) } /** - * hl_irq_handler_default - default irq handler + * hl_irq_user_interrupt_thread_handler - irq thread handler for user interrupts. + * This function is invoked by threaded irq mechanism * * @irq: irq number * @arg: pointer to user interrupt structure * */ -irqreturn_t hl_irq_handler_default(int irq, void *arg) +irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) +{ + struct hl_user_interrupt *user_int = arg; + struct hl_device *hdev = user_int->hdev; + + user_int->timestamp = ktime_get(); + switch (user_int->type) { + case HL_USR_INTERRUPT_TPC: + handle_tpc_interrupt(hdev); + break; + case HL_USR_INTERRUPT_UNEXPECTED: + handle_unexpected_user_interrupt(hdev); + break; + default: + break; + } + + return IRQ_HANDLED; +} + +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg) { - struct hl_user_interrupt *user_interrupt = arg; - struct hl_device *hdev = user_interrupt->hdev; - u32 interrupt_id = user_interrupt->interrupt_id; + u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + struct hl_device *hdev = arg; + + dev_err(hdev->dev, "EQ error interrupt received\n"); - dev_err(hdev->dev, "got invalid user interrupt %u", interrupt_id); + hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask); return IRQ_HANDLED; } @@ -391,8 +501,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) struct hl_eq_entry *eq_base; struct hl_eqe_work *handle_eqe_work; bool entry_ready; - u32 cur_eqe; - u16 cur_eqe_index; + u32 cur_eqe, ctl; + u16 cur_eqe_index, event_type; eq_base = eq->kernel_address; @@ -405,11 +515,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe); if ((hdev->event_queue.check_eqe_index) && - (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) - != cur_eqe_index)) { - dev_dbg(hdev->dev, - "EQE 0x%x in queue is ready but index does not match %d!=%d", - eq_base[eq->ci].hdr.ctl, + (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) { + dev_err(hdev->dev, + "EQE %#x in queue is ready but index does not match %d!=%d", + cur_eqe, ((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK), cur_eqe_index); break; @@ -426,7 +535,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) dma_rmb(); if (hdev->disabled && !hdev->reset_info.in_compute_reset) { - dev_warn(hdev->dev, "Device disabled but received an EQ event\n"); + ctl = le32_to_cpu(eq_entry->hdr.ctl); + event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); + dev_warn(hdev->dev, + "Device disabled but received an EQ event (%u)\n", event_type); goto skip_irq; } @@ -463,7 +575,7 @@ irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg) { struct hl_dec *dec = arg; - schedule_work(&dec->completion_abnrm_work); + schedule_work(&dec->abnrm_intr_work); return IRQ_HANDLED; } @@ -540,14 +652,16 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) */ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) { + u32 size = hdev->asic_prop.fw_event_queue_size ? : HL_EQ_SIZE_IN_BYTES; void *p; - p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_EQ_SIZE_IN_BYTES, &q->bus_address); + p = hl_cpu_accessible_dma_pool_alloc(hdev, size, &q->bus_address); if (!p) return -ENOMEM; q->hdev = hdev; q->kernel_address = p; + q->size = size; q->ci = 0; q->prev_eqe_index = 0; @@ -566,7 +680,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) { flush_workqueue(hdev->eq_wq); - hl_cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, q->kernel_address); + hl_cpu_accessible_dma_pool_free(hdev, q->size, q->kernel_address); } void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) @@ -581,5 +695,30 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) * when the device is operational again */ - memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, q->size); +} + +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q) +{ + u32 eq_length, eqe_size, ctl, ready, mode, type, index; + struct hl_eq_header *hdr; + u8 *ptr; + int i; + + eq_length = HL_EQ_LENGTH; + eqe_size = q->size / HL_EQ_LENGTH; + + dev_info(hdev->dev, "Contents of EQ entries headers:\n"); + + for (i = 0, ptr = q->kernel_address ; i < eq_length ; ++i, ptr += eqe_size) { + hdr = (struct hl_eq_header *) ptr; + ctl = le32_to_cpu(hdr->ctl); + ready = FIELD_GET(EQ_CTL_READY_MASK, ctl); + mode = FIELD_GET(EQ_CTL_EVENT_MODE_MASK, ctl); + type = FIELD_GET(EQ_CTL_EVENT_TYPE_MASK, ctl); + index = FIELD_GET(EQ_CTL_INDEX_MASK, ctl); + + dev_info(hdev->dev, "%02u: %#010x [ready: %u, mode %u, type %04u, index %05u]\n", + i, ctl, ready, mode, type, index); + } } diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index e6474d38afc4..633db4bff46f 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -14,7 +14,7 @@ #include <linux/vmalloc.h> #include <linux/pci-p2pdma.h> -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); #define HL_MMU_DEBUG 0 @@ -235,10 +235,8 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, } rc = hl_pin_host_memory(hdev, addr, size, userptr); - if (rc) { - dev_err(hdev->dev, "Failed to pin host memory\n"); + if (rc) goto pin_err; - } userptr->dma_mapped = true; userptr->dir = DMA_BIDIRECTIONAL; @@ -246,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, *p_userptr = userptr; - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto dma_map_err; @@ -607,6 +605,7 @@ static u64 get_va_block(struct hl_device *hdev, bool is_align_pow_2 = is_power_of_2(va_range->page_size); bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr); bool force_hint = flags & HL_MEM_FORCE_HINT; + int rc; if (is_align_pow_2) align_mask = ~((u64)va_block_align - 1); @@ -724,9 +723,13 @@ static u64 get_va_block(struct hl_device *hdev, kfree(new_va_block); } - if (add_prev) - add_va_block_locked(hdev, &va_range->list, prev_start, - prev_end); + if (add_prev) { + rc = add_va_block_locked(hdev, &va_range->list, prev_start, prev_end); + if (rc) { + reserved_valid_start = 0; + goto out; + } + } print_va_list_locked(hdev, &va_range->list); out: @@ -829,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, * physical pages * * This function does the following: - * - Pin the physical pages related to the given virtual block. * - Create a physical page pack from the physical pages related to the given * virtual block. */ @@ -953,8 +955,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, (i + 1) == phys_pg_pack->npages); if (rc) { dev_err(hdev->dev, - "map failed for handle %u, npages: %llu, mapped: %llu", - phys_pg_pack->handle, phys_pg_pack->npages, + "map failed (%d) for handle %u, npages: %llu, mapped: %llu\n", + rc, phys_pg_pack->handle, phys_pg_pack->npages, mapped_pg_cnt); goto err; } @@ -1031,30 +1033,6 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, } } -static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *paddr) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_pack; - u32 handle; - - handle = lower_32_bits(args->map_device.handle); - spin_lock(&vm->idr_lock); - phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); - if (!phys_pg_pack) { - spin_unlock(&vm->idr_lock); - dev_err(hdev->dev, "no match for handle %u\n", handle); - return -EINVAL; - } - - *paddr = phys_pg_pack->pages[0]; - - spin_unlock(&vm->idr_lock); - - return 0; -} - /** * map_device_va() - map the given memory. * @ctx: pointer to the context structure. @@ -1097,10 +1075,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device huge_page_size = hdev->asic_prop.pmmu_huge.page_size; rc = dma_map_host_va(hdev, addr, size, &userptr); - if (rc) { - dev_err(hdev->dev, "failed to get userptr from va\n"); + if (rc) return rc; - } rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack, false); @@ -1210,7 +1186,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { - dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); + dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n", + rc, handle); mutex_unlock(&hdev->mmu_lock); goto map_err; } @@ -1270,6 +1247,18 @@ init_page_pack_err: return rc; } +/* Should be called while the context's mem_hash_lock is taken */ +static struct hl_vm_hash_node *get_vm_hash_node_locked(struct hl_ctx *ctx, u64 vaddr) +{ + struct hl_vm_hash_node *hnode; + + hash_for_each_possible(ctx->mem_hash, hnode, node, vaddr) + if (vaddr == hnode->vaddr) + return hnode; + + return NULL; +} + /** * unmap_device_va() - unmap the given device virtual address. * @ctx: pointer to the context structure. @@ -1285,10 +1274,10 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, { struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; u64 vaddr = args->unmap.device_virt_addr; - struct hl_vm_hash_node *hnode = NULL; struct asic_fixed_properties *prop; struct hl_device *hdev = ctx->hdev; struct hl_userptr *userptr = NULL; + struct hl_vm_hash_node *hnode; struct hl_va_range *va_range; enum vm_type *vm_type; bool is_userptr; @@ -1298,15 +1287,10 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, /* protect from double entrance */ mutex_lock(&ctx->mem_hash_lock); - hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) - if (vaddr == hnode->vaddr) - break; - + hnode = get_vm_hash_node_locked(ctx, vaddr); if (!hnode) { mutex_unlock(&ctx->mem_hash_lock); - dev_err(hdev->dev, - "unmap failed, no mem hnode for vaddr 0x%llx\n", - vaddr); + dev_err(hdev->dev, "unmap failed, no mem hnode for vaddr 0x%llx\n", vaddr); return -EINVAL; } @@ -1548,24 +1532,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, } static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, - u64 page_size, u64 exported_size, + u64 page_size, u64 exported_size, u64 offset, struct device *dev, enum dma_data_direction dir) { - u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages; - struct asic_fixed_properties *prop; - int rc, i, j, nents, cur_page; + u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page, + left_size_in_dma_seg, device_address, bar_address, start_page; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; + unsigned int nents, i; struct sg_table *sgt; + bool next_sg_entry; + int rc; - prop = &hdev->asic_prop; - - dma_max_seg_size = dma_get_max_seg_size(dev); - - /* We would like to align the max segment size to PAGE_SIZE, so the - * SGL will contain aligned addresses that can be easily mapped to - * an MMU - */ - dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE); + /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */ + dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE); if (dma_max_seg_size < PAGE_SIZE) { dev_err_ratelimited(hdev->dev, "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n", @@ -1577,121 +1557,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 if (!sgt) return ERR_PTR(-ENOMEM); - /* remove export size restrictions in case not explicitly defined */ - cur_size_to_export = exported_size ? exported_size : (npages * page_size); - - /* If the size of each page is larger than the dma max segment size, - * then we can't combine pages and the number of entries in the SGL - * will just be the - * <number of pages> * <chunks of max segment size in each page> - */ - if (page_size > dma_max_seg_size) { - /* we should limit number of pages according to the exported size */ - cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size); - nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size); - } else { - cur_npages = npages; - - /* Get number of non-contiguous chunks */ - for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) { - if (pages[i - 1] + page_size != pages[i] || - chunk_size + page_size > dma_max_seg_size) { - nents++; - chunk_size = page_size; - continue; - } + /* Use the offset to move to the actual first page that is exported */ + for (start_page = 0 ; start_page < npages ; ++start_page) { + if (offset < page_size) + break; - chunk_size += page_size; - } + /* The offset value was validated so there can't be an underflow */ + offset -= page_size; } - rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); - if (rc) - goto error_free; + /* Calculate the required number of entries for the SG table */ + curr_page = start_page; + nents = 1; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - cur_page = 0; + while (true) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; - if (page_size > dma_max_seg_size) { - u64 size_left, cur_device_address = 0; + if (!left_size_to_export) + break; - size_left = page_size; + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) + next_sg_entry = true; - /* Need to split each page into the number of chunks of - * dma_max_seg_size - */ - for_each_sgtable_dma_sg(sgt, sg, i) { - if (size_left == page_size) - cur_device_address = - pages[cur_page] - prop->dram_base_address; - else - cur_device_address += dma_max_seg_size; + ++curr_page; + left_size_in_page = page_size; + } - /* make sure not to export over exported size */ - chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export); + if (!left_size_in_dma_seg) { + next_sg_entry = true; + left_size_in_dma_seg = dma_max_seg_size; + } - bar_address = hdev->dram_pci_bar_start + cur_device_address; + if (next_sg_entry) { + ++nents; + next_sg_entry = false; + } + } - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); + if (rc) + goto err_free_sgt; - cur_size_to_export -= chunk_size; + /* Prepare the SG table entries */ + curr_page = start_page; + device_address = pages[curr_page] + offset; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - if (size_left > dma_max_seg_size) { - size_left -= dma_max_seg_size; - } else { - cur_page++; - size_left = page_size; + for_each_sgtable_dma_sg(sgt, sg, i) { + bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address); + chunk_size = 0; + + for ( ; curr_page < npages ; ++curr_page) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + chunk_size += size; + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; + + if (!left_size_to_export) + break; + + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) { + device_address = pages[curr_page + 1]; + next_sg_entry = true; + } + + left_size_in_page = page_size; } - } - } else { - /* Merge pages and put them into the scatterlist */ - for_each_sgtable_dma_sg(sgt, sg, i) { - chunk_size = page_size; - for (j = cur_page + 1 ; j < cur_npages ; j++) { - if (pages[j - 1] + page_size != pages[j] || - chunk_size + page_size > dma_max_seg_size) - break; - - chunk_size += page_size; + + if (!left_size_in_dma_seg) { + /* + * Skip setting a new device address if already moving to a page + * which is not contiguous with the current page. + */ + if (!next_sg_entry) { + device_address += chunk_size; + next_sg_entry = true; + } + + left_size_in_dma_seg = dma_max_seg_size; } - bar_address = hdev->dram_pci_bar_start + - (pages[cur_page] - prop->dram_base_address); + if (next_sg_entry) { + next_sg_entry = false; + break; + } + } - /* make sure not to export over exported size */ - chunk_size = min(chunk_size, cur_size_to_export); - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); + if (rc) + goto err_unmap; + } - cur_size_to_export -= chunk_size; - cur_page = j; - } + /* There should be nothing left to export exactly after looping over all SG elements */ + if (left_size_to_export) { + dev_err(hdev->dev, + "left size to export %#llx after initializing %u SG elements\n", + left_size_to_export, sgt->nents); + rc = -ENOMEM; + goto err_unmap; } - /* Because we are not going to include a CPU list we want to have some - * chance that other users will detect this by setting the orig_nents - * to 0 and using only nents (length of DMA list) when going over the - * sgl + /* + * Because we are not going to include a CPU list, we want to have some chance that other + * users will detect this when going over SG table, by setting the orig_nents to 0 and using + * only nents (length of DMA list). */ sgt->orig_nents = 0; + dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n", + nents, dev_name(dev)); + for_each_sgtable_dma_sg(sgt, sg, i) + dev_dbg(hdev->dev, + "SG entry %d: address %#llx, length %#x\n", + i, sg_dma_address(sg), sg_dma_len(sg)); + return sgt; -error_unmap: +err_unmap: for_each_sgtable_dma_sg(sgt, sg, i) { if (!sg_dma_len(sg)) continue; - dma_unmap_resource(dev, sg_dma_address(sg), - sg_dma_len(sg), dir, + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir, DMA_ATTR_SKIP_CPU_SYNC); } sg_free_table(sgt); -error_free: +err_free_sgt: kfree(sgt); return ERR_PTR(rc); } @@ -1716,6 +1724,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf, static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { + u64 *pages, npages, page_size, exported_size, offset; struct dma_buf *dma_buf = attachment->dmabuf; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_dmabuf_priv *hl_dmabuf; @@ -1724,30 +1733,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, hl_dmabuf = dma_buf->priv; hdev = hl_dmabuf->ctx->hdev; - phys_pg_pack = hl_dmabuf->phys_pg_pack; if (!attachment->peer2peer) { dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); return ERR_PTR(-EPERM); } - if (phys_pg_pack) - sgt = alloc_sgt_from_device_pages(hdev, - phys_pg_pack->pages, - phys_pg_pack->npages, - phys_pg_pack->page_size, - phys_pg_pack->exported_size, - attachment->dev, - dir); - else - sgt = alloc_sgt_from_device_pages(hdev, - &hl_dmabuf->device_address, - 1, - hl_dmabuf->dmabuf->size, - 0, - attachment->dev, - dir); + exported_size = hl_dmabuf->dmabuf->size; + offset = hl_dmabuf->offset; + phys_pg_pack = hl_dmabuf->phys_pg_pack; + + if (phys_pg_pack) { + pages = phys_pg_pack->pages; + npages = phys_pg_pack->npages; + page_size = phys_pg_pack->page_size; + } else { + pages = &hl_dmabuf->device_phys_addr; + npages = 1; + page_size = hl_dmabuf->dmabuf->size; + } + sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset, + attachment->dev, dir); if (IS_ERR(sgt)) dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); @@ -1779,23 +1786,65 @@ static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment, kfree(sgt); } +static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_hash_node *hnode; + + /* get the memory handle */ + mutex_lock(&ctx->mem_hash_lock); + hnode = get_vm_hash_node_locked(ctx, addr); + if (!hnode) { + mutex_unlock(&ctx->mem_hash_lock); + dev_dbg(hdev->dev, "map address %#llx not found\n", addr); + return ERR_PTR(-EINVAL); + } + + if (upper_32_bits(hnode->handle)) { + mutex_unlock(&ctx->mem_hash_lock); + dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n", + hnode->handle, addr); + return ERR_PTR(-EINVAL); + } + + /* + * node found, increase export count so this memory cannot be unmapped + * and the hash node cannot be deleted. + */ + hnode->export_cnt++; + mutex_unlock(&ctx->mem_hash_lock); + + return hnode; +} + +static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode) +{ + mutex_lock(&ctx->mem_hash_lock); + hnode->export_cnt--; + mutex_unlock(&ctx->mem_hash_lock); +} + static void hl_release_dmabuf(struct dma_buf *dmabuf) { struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; struct hl_ctx *ctx; - if (!hl_dmabuf) - return; - ctx = hl_dmabuf->ctx; - if (hl_dmabuf->memhash_hnode) { - mutex_lock(&ctx->mem_hash_lock); - hl_dmabuf->memhash_hnode->export_cnt--; - mutex_unlock(&ctx->mem_hash_lock); - } + if (hl_dmabuf->memhash_hnode) + memhash_node_export_put(ctx, hl_dmabuf->memhash_hnode); + atomic_dec(&ctx->hdev->dmabuf_export_cnt); hl_ctx_put(ctx); + + /* + * Paired with get_file() in export_dmabuf(). + * 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called, + * because releasing the compute device file involves another reference decrement, and it + * would be possible only after calling fput(). + */ + fput(ctx->hpriv->file_priv->filp); + kfree(hl_dmabuf); } @@ -1812,7 +1861,12 @@ static int export_dmabuf(struct hl_ctx *ctx, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct hl_device *hdev = ctx->hdev; - int rc, fd; + CLASS(get_unused_fd, fd)(flags); + + if (fd < 0) { + dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); + return fd; + } exp_info.ops = &habanalabs_dmabuf_ops; exp_info.size = total_size; @@ -1825,42 +1879,45 @@ static int export_dmabuf(struct hl_ctx *ctx, return PTR_ERR(hl_dmabuf->dmabuf); } - fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); - if (fd < 0) { - dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); - rc = fd; - goto err_dma_buf_put; - } - hl_dmabuf->ctx = ctx; hl_ctx_get(hl_dmabuf->ctx); + atomic_inc(&ctx->hdev->dmabuf_export_cnt); + + /* Get compute device file to enforce release order, such that all exported dma-buf will be + * released first and only then the compute device. + * Paired with fput() in hl_release_dmabuf(). + */ + get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; + fd_install(take_fd(fd), hl_dmabuf->dmabuf->file); return 0; - -err_dma_buf_put: - hl_dmabuf->dmabuf->priv = NULL; - dma_buf_put(hl_dmabuf->dmabuf); - return rc; } -static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size) +static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) { - if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { + if (!PAGE_ALIGNED(addr)) { dev_dbg(hdev->dev, - "exported device memory address 0x%llx should be aligned to 0x%lx\n", - device_addr, PAGE_SIZE); + "exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n", + addr, PAGE_SIZE); return -EINVAL; } - if (size < PAGE_SIZE) { + if (!size || !PAGE_ALIGNED(size)) { dev_dbg(hdev->dev, - "exported device memory size %llu should be equal to or greater than %lu\n", + "exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n", size, PAGE_SIZE); return -EINVAL; } + if (!PAGE_ALIGNED(offset)) { + dev_dbg(hdev->dev, + "exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n", + offset, PAGE_SIZE); + return -EINVAL; + } + return 0; } @@ -1870,13 +1927,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr u64 bar_address; int rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, 0); if (rc) return rc; if (device_addr < prop->dram_user_base_address || - (device_addr + size) > prop->dram_end_address || - (device_addr + size) < device_addr) { + (device_addr + size) > prop->dram_end_address || + (device_addr + size) < device_addr) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n", device_addr, size); @@ -1903,29 +1960,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s u64 bar_address; int i, rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, offset); if (rc) return rc; if ((offset + size) > phys_pg_pack->total_size) { dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n", - offset, size, phys_pg_pack->total_size); + offset, size, phys_pg_pack->total_size); return -EINVAL; } for (i = 0 ; i < phys_pg_pack->npages ; i++) { - bar_address = hdev->dram_pci_bar_start + - (phys_pg_pack->pages[i] - prop->dram_base_address); + (phys_pg_pack->pages[i] - prop->dram_base_address); if ((bar_address + phys_pg_pack->page_size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || (bar_address + phys_pg_pack->page_size) < bar_address) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", - phys_pg_pack->pages[i], - phys_pg_pack->page_size); - + phys_pg_pack->pages[i], phys_pg_pack->page_size); return -EINVAL; } } @@ -1933,47 +1987,6 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s return 0; } -static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm_hash_node *hnode; - - /* get the memory handle */ - mutex_lock(&ctx->mem_hash_lock); - hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)addr) - if (addr == hnode->vaddr) - break; - - if (!hnode) { - mutex_unlock(&ctx->mem_hash_lock); - dev_dbg(hdev->dev, "map address %#llx not found\n", addr); - return ERR_PTR(-EINVAL); - } - - if (upper_32_bits(hnode->handle)) { - mutex_unlock(&ctx->mem_hash_lock); - dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n", - hnode->handle, addr); - return ERR_PTR(-EINVAL); - } - - /* - * node found, increase export count so this memory cannot be unmapped - * and the hash node cannot be deleted. - */ - hnode->export_cnt++; - mutex_unlock(&ctx->mem_hash_lock); - - return hnode; -} - -static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode) -{ - mutex_lock(&ctx->mem_hash_lock); - hnode->export_cnt--; - mutex_unlock(&ctx->mem_hash_lock); -} - static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev, struct hl_vm_hash_node *hnode) { @@ -2022,7 +2035,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o struct asic_fixed_properties *prop; struct hl_dmabuf_priv *hl_dmabuf; struct hl_device *hdev; - u64 export_addr; int rc; hdev = ctx->hdev; @@ -2034,8 +2046,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o return -EINVAL; } - export_addr = addr + offset; - hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); if (!hl_dmabuf) return -ENOMEM; @@ -2051,20 +2061,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o rc = PTR_ERR(phys_pg_pack); goto dec_memhash_export_cnt; } - rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack); + rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack); if (rc) goto dec_memhash_export_cnt; - phys_pg_pack->exported_size = size; hl_dmabuf->phys_pg_pack = phys_pg_pack; hl_dmabuf->memhash_hnode = hnode; + hl_dmabuf->offset = offset; } else { - rc = validate_export_params_no_mmu(hdev, export_addr, size); + rc = validate_export_params_no_mmu(hdev, addr, size); if (rc) goto err_free_dmabuf_wrapper; - } - hl_dmabuf->device_address = export_addr; + hl_dmabuf->device_phys_addr = addr; + } rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd); if (rc) @@ -2080,76 +2090,6 @@ err_free_dmabuf_wrapper: return rc; } -static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) -{ - struct hl_device *hdev = hpriv->hdev; - u64 block_handle, device_addr = 0; - struct hl_ctx *ctx = hpriv->ctx; - u32 handle = 0, block_size; - int rc; - - switch (args->in.op) { - case HL_MEM_OP_ALLOC: - if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, "alloc size must be larger than 0\n"); - rc = -EINVAL; - goto out; - } - - /* Force contiguous as there are no real MMU - * translations to overcome physical memory gaps - */ - args->in.flags |= HL_MEM_CONTIGUOUS; - rc = alloc_device_memory(ctx, &args->in, &handle); - - memset(args, 0, sizeof(*args)); - args->out.handle = (__u64) handle; - break; - - case HL_MEM_OP_FREE: - rc = free_device_memory(ctx, &args->in); - break; - - case HL_MEM_OP_MAP: - if (args->in.flags & HL_MEM_USERPTR) { - dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n"); - rc = -EPERM; - } else { - rc = get_paddr_from_handle(ctx, &args->in, &device_addr); - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; - } - - break; - - case HL_MEM_OP_UNMAP: - rc = 0; - break; - - case HL_MEM_OP_MAP_BLOCK: - rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size); - args->out.block_handle = block_handle; - args->out.block_size = block_size; - break; - - case HL_MEM_OP_EXPORT_DMABUF_FD: - dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n"); - rc = -EPERM; - break; - - case HL_MEM_OP_TS_ALLOC: - rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); - break; - default: - dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -EINVAL; - break; - } - -out: - return rc; -} - static void ts_buff_release(struct hl_mmap_mem_buf *buf) { struct hl_ts_buff *ts_buff = buf->private; @@ -2163,7 +2103,7 @@ static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, v { struct hl_ts_buff *ts_buff = buf->private; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE); return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0); } @@ -2221,11 +2161,11 @@ static struct hl_mmap_mem_buf_behavior hl_ts_behavior = { * allocate_timestamps_buffers() - allocate timestamps buffers * This function will allocate ts buffer that will later on be mapped to the user * in order to be able to read the timestamp. - * in additon it'll allocate an extra buffer for registration management. + * in addition it'll allocate an extra buffer for registration management. * since we cannot fail during registration for out-of-memory situation, so * we'll prepare a pool which will be used as user interrupt nodes and instead * of dynamically allocating nodes while registration we'll pick the node from - * this pool. in addtion it'll add node to the mapping hash which will be used + * this pool. in addition it'll add node to the mapping hash which will be used * to map user ts buffer to the internal kernel ts buffer. * @hpriv: pointer to the private data of the fd * @args: ioctl input @@ -2251,8 +2191,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in return 0; } -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; enum hl_device_status status; union hl_mem_args *args = data; struct hl_device *hdev = hpriv->hdev; @@ -2268,9 +2209,6 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) return -EBUSY; } - if (!hdev->mmu_enable) - return mem_ioctl_no_mmu(hpriv, args); - switch (args->in.op) { case HL_MEM_OP_ALLOC: if (args->in.alloc.mem_size == 0) { @@ -2399,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, if (rc < 0) goto destroy_pages; npages = rc; - rc = -EFAULT; + rc = -ENOMEM; goto put_pages; } userptr->npages = npages; @@ -2503,7 +2441,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); + hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); @@ -2765,13 +2703,10 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) atomic64_set(&ctx->dram_phys_mem, 0); /* - * - If MMU is enabled, init the ranges as usual. - * - If MMU is disabled, in case of host mapping, the returned address - * is the given one. * In case of DRAM mapping, the returned address is the physical * address of the memory related to the given handle. */ - if (!ctx->hdev->mmu_enable) + if (ctx->hdev->mmu_disable) return 0; dram_range_start = prop->dmmu.start_addr; @@ -2821,7 +2756,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) struct hl_mem_in args; int i; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return; hl_debugfs_remove_ctx_mem_hash(hdev, ctx); diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c index 0f2759e26547..4401beb99e42 100644 --- a/drivers/accel/habanalabs/common/memory_mgr.c +++ b/drivers/accel/habanalabs/common/memory_mgr.c @@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, goto put_mem; } -#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK - if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start, - user_mem_size)) { -#else if (!access_ok((void __user *)(uintptr_t)vma->vm_start, user_mem_size)) { -#endif dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n", buf->behavior->topic, vma->vm_start); @@ -275,7 +270,7 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, if (atomic_cmpxchg(&buf->mmap, 0, 1)) { dev_err(mmg->dev, - "%s, Memory mmap failed, already mmaped to user\n", + "%s, Memory mmap failed, already mapped to user\n", buf->behavior->topic); rc = -EINVAL; goto put_mem; @@ -318,31 +313,75 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) idr_init(&mmg->handles); } +static void hl_mem_mgr_fini_stats_reset(struct hl_mem_mgr_fini_stats *stats) +{ + if (!stats) + return; + + memset(stats, 0, sizeof(*stats)); +} + +static void hl_mem_mgr_fini_stats_inc(u64 mem_id, struct hl_mem_mgr_fini_stats *stats) +{ + if (!stats) + return; + + switch (mem_id) { + case HL_MMAP_TYPE_CB: + ++stats->n_busy_cb; + break; + case HL_MMAP_TYPE_TS_BUFF: + ++stats->n_busy_ts; + break; + default: + /* we currently store only CB/TS so this shouldn't happen */ + ++stats->n_busy_other; + } +} + /** * hl_mem_mgr_fini - release unified memory manager * * @mmg: parent unified memory manager + * @stats: if non-NULL, will return some counters for handles that could not be removed. * * Release the unified memory manager. Shall be called from an interrupt context. */ -void hl_mem_mgr_fini(struct hl_mem_mgr *mmg) +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats) { struct hl_mmap_mem_buf *buf; struct idr *idp; const char *topic; + u64 mem_id; u32 id; + hl_mem_mgr_fini_stats_reset(stats); + idp = &mmg->handles; idr_for_each_entry(idp, buf, id) { topic = buf->behavior->topic; - if (hl_mmap_mem_buf_put(buf) != 1) + mem_id = buf->behavior->mem_id; + if (hl_mmap_mem_buf_put(buf) != 1) { dev_err(mmg->dev, "%s: Buff handle %u for CTX is still alive\n", topic, id); + hl_mem_mgr_fini_stats_inc(mem_id, stats); + } } +} - /* TODO: can it happen that some buffer is still in use at this point? */ +/** + * hl_mem_mgr_idr_destroy() - destroy memory manager IDR. + * @mmg: parent unified memory manager + * + * Destroy the memory manager IDR. + * Shall be called when IDR is empty and no memory buffers are in use. + */ +void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg) +{ + if (!idr_is_empty(&mmg->handles)) + dev_crit(mmg->dev, "memory manager IDR is destroyed while it is not empty!\n"); idr_destroy(&mmg->handles); } diff --git a/drivers/accel/habanalabs/common/mmu/Makefile b/drivers/accel/habanalabs/common/mmu/Makefile index 1806c524e04a..f4b815bf4f7d 100644 --- a/drivers/accel/habanalabs/common/mmu/Makefile +++ b/drivers/accel/habanalabs/common/mmu/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \ - common/mmu/mmu_v2_hr.o + common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c index a42ae8bc61e8..79823facce7f 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu.c +++ b/drivers/accel/habanalabs/common/mmu/mmu.c @@ -6,6 +6,7 @@ */ #include <linux/slab.h> +#include <linux/pci.h> #include "../habanalabs.h" @@ -44,7 +45,7 @@ int hl_mmu_init(struct hl_device *hdev) { int rc = -EOPNOTSUPP; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return 0; mutex_init(&hdev->mmu_lock); @@ -82,7 +83,7 @@ fini_dr_mmu: */ void hl_mmu_fini(struct hl_device *hdev) { - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return; if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) @@ -107,7 +108,7 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) struct hl_device *hdev = ctx->hdev; int rc = -EOPNOTSUPP; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return 0; if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { @@ -145,7 +146,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return; if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL) @@ -233,7 +234,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu u64 real_virt_addr; bool is_dram_addr; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); @@ -262,7 +263,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu mmu_funcs->flush(ctx); if (trace_habanalabs_mmu_unmap_enabled() && !rc) - trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); + trace_habanalabs_mmu_unmap(&hdev->pdev->dev, virt_addr, 0, page_size, flush_pte); return rc; } @@ -301,7 +302,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s bool is_dram_addr; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); @@ -349,7 +350,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s if (flush_pte) mmu_funcs->flush(ctx); - trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); + trace_habanalabs_mmu_map(&hdev->pdev->dev, virt_addr, phys_addr, page_size, flush_pte); return 0; @@ -472,46 +473,6 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size) return rc; } -/* - * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out - * - * @ctx: pointer to the context structure - * - */ -void hl_mmu_swap_out(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - - if (!hdev->mmu_enable) - return; - - if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL) - hdev->mmu_func[MMU_DR_PGT].swap_out(ctx); - - if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL) - hdev->mmu_func[MMU_HR_PGT].swap_out(ctx); -} - -/* - * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in - * - * @ctx: pointer to the context structure - * - */ -void hl_mmu_swap_in(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - - if (!hdev->mmu_enable) - return; - - if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL) - hdev->mmu_func[MMU_DR_PGT].swap_in(ctx); - - if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL) - hdev->mmu_func[MMU_HR_PGT].swap_in(ctx); -} - static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops, u64 *phys_addr) @@ -540,8 +501,8 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, u32 page_off; /* - * Bit arithmetics cannot be used for non power of two page - * sizes. In addition, since bit arithmetics is not used, + * Bit arithmetic cannot be used for non power of two page + * sizes. In addition, since bit arithmetic is not used, * we cannot ignore dram base. All that shall be considered. */ @@ -594,7 +555,7 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, int pgt_residency, rc; bool is_dram_addr; - if (!hdev->mmu_enable) + if (hdev->mmu_disable) return -EOPNOTSUPP; prop = &hdev->asic_prop; @@ -625,7 +586,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, int hl_mmu_if_set_funcs(struct hl_device *hdev) { - if (!hdev->mmu_enable) + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (hdev->mmu_disable) return 0; switch (hdev->asic_type) { @@ -636,8 +599,11 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) break; case ASIC_GAUDI2: case ASIC_GAUDI2B: - /* MMUs in Gaudi2 are always host resident */ - hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); + case ASIC_GAUDI2C: + case ASIC_GAUDI2D: + hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); + if (prop->pmmu.host_resident) + hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -679,7 +645,10 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); if (rc) - dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n"); + dev_err_ratelimited(hdev->dev, + "%s: %s cache invalidation failed, rc=%d\n", + dev_name(&hdev->pdev->dev), + flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc); return rc; } @@ -692,7 +661,10 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags, asid, va, size); if (rc) - dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n"); + dev_err_ratelimited(hdev->dev, + "%s: %s cache range invalidation failed: va=%#llx, size=%llu, rc=%d", + dev_name(&hdev->pdev->dev), flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", + va, size, rc); return rc; } @@ -757,7 +729,7 @@ u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) * @mmu_prop: MMU properties. * @hop_idx: HOP index. * @hop_addr: HOP address. - * @virt_addr: virtual address fro the translation. + * @virt_addr: virtual address for the translation. * * @return the matching PTE value on success, otherwise U64_MAX. */ @@ -1244,3 +1216,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_ return 0; } +struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = NULL; + + hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, + (unsigned long) hop_addr) + if (hop_addr == pgt_info->shadow_addr) + break; + + return pgt_info; +} + +void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr); + + hl_mmu_dr_free_pgt_node(ctx, pgt_info); +} + +void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info) +{ + struct hl_device *hdev = ctx->hdev; + + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, + hdev->asic_prop.dmmu.hop_table_size); + hash_del(&pgt_info->node); + kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); + kfree(pgt_info); +} + +u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size); +} + +u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx) +{ + return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + + (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size); +} + +u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) +{ + u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1; + u64 shadow_hop_addr = shadow_addr & (~page_mask); + u64 pte_offset = shadow_addr & page_mask; + u64 phys_hop_addr; + + if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx)) + phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr; + else + phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); + + return phys_hop_addr + pte_offset; +} + +void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val); + + ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), + phys_val); + + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val); + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr) +{ + hl_mmu_dr_write_final_pte(ctx, pte_addr, 0); +} + +void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++; +} + +int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr); + int num_of_ptes_left; + + pgt_info->num_of_ptes--; + + /* + * Need to save the number of ptes left because hl_mmu_free_hop might free + * the pgt_info + */ + num_of_ptes_left = pgt_info->num_of_ptes; + if (!num_of_ptes_left) + hl_mmu_dr_free_pgt_node(ctx, pgt_info); + + return num_of_ptes_left; +} + +u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pgt_info *pgt_info; + u64 phys_addr, shadow_addr; + + pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + if (!pgt_info) + return ULLONG_MAX; + + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, + prop->dmmu.hop_table_size); + if (!phys_addr) { + dev_err(hdev->dev, "failed to allocate page\n"); + goto pool_add_err; + } + + shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size, + GFP_KERNEL); + if (!shadow_addr) + goto shadow_err; + + pgt_info->phys_addr = phys_addr; + pgt_info->shadow_addr = shadow_addr; + pgt_info->ctx = ctx; + pgt_info->num_of_ptes = 0; + hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); + + return shadow_addr; + +shadow_err: + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, + phys_addr, prop->dmmu.hop_table_size); +pool_add_err: + kfree(pgt_info); + + return ULLONG_MAX; +} + +u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop) +{ + u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); + + if (hop_addr == ULLONG_MAX) { + hop_addr = hl_mmu_dr_alloc_hop(ctx); + *is_new_hop = (hop_addr != ULLONG_MAX); + } + + return hop_addr; +} + +void hl_mmu_dr_flush(struct hl_ctx *ctx) +{ + /* flush all writes from all cores to reach PCI */ + mb(); + ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx)); +} + +int hl_mmu_dr_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + hdev->mmu_priv.dr.mmu_pgt_pool = + gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1); + + if (!hdev->mmu_priv.dr.mmu_pgt_pool) { + dev_err(hdev->dev, "Failed to create page gen pool\n"); + return -ENOMEM; + } + + rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + + prop->dmmu.hop0_tables_total_size, + prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size, + -1); + if (rc) { + dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); + goto err_pool_add; + } + + hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, + prop->dmmu.hop_table_size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { + rc = -ENOMEM; + goto err_pool_add; + } + + /* MMU H/W init will be done in device hw_init() */ + + return 0; + +err_pool_add: + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + + return rc; +} + +void hl_mmu_dr_fini(struct hl_device *hdev) +{ + /* MMU H/W fini was already done in device hw_fini() */ + + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) + return; + + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + + /* Make sure that if we arrive here again without init was + * called we won't cause kernel panic. This can happen for + * example if we fail during hard reset code at certain points + */ + hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; +} diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v1.c b/drivers/accel/habanalabs/common/mmu/mmu_v1.c index d925dc4dd097..845d16aaa637 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/accel/habanalabs/common/mmu/mmu_v1.c @@ -12,166 +12,6 @@ #define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); - -static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = NULL; - - hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, - (unsigned long) hop_addr) - if (hop_addr == pgt_info->shadow_addr) - break; - - return pgt_info; -} - -static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) -{ - struct hl_device *hdev = ctx->hdev; - - gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, - hdev->asic_prop.mmu_hop_table_size); - hash_del(&pgt_info->node); - kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); - kfree(pgt_info); -} - -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - - _free_hop(ctx, pgt_info); -} - -static u64 alloc_hop(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct pgt_info *pgt_info; - u64 phys_addr, shadow_addr; - - pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); - if (!pgt_info) - return ULLONG_MAX; - - phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, - prop->mmu_hop_table_size); - if (!phys_addr) { - dev_err(hdev->dev, "failed to allocate page\n"); - goto pool_add_err; - } - - shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, - GFP_KERNEL); - if (!shadow_addr) - goto shadow_err; - - pgt_info->phys_addr = phys_addr; - pgt_info->shadow_addr = shadow_addr; - pgt_info->ctx = ctx; - pgt_info->num_of_ptes = 0; - hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); - - return shadow_addr; - -shadow_err: - gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, - prop->mmu_hop_table_size); -pool_add_err: - kfree(pgt_info); - - return ULLONG_MAX; -} - -static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static void flush(struct hl_ctx *ctx) -{ - /* flush all writes from all cores to reach PCI */ - mb(); - ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); -} - -/* transform the value to physical address when writing to H/W */ -static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) -{ - /* - * The value to write is actually the address of the next shadow hop + - * flags at the 12 LSBs. - * Hence in order to get the value to write to the physical PTE, we - * clear the 12 LSBs and translate the shadow hop to its associated - * physical hop, and add back the original 12 LSBs. - */ - u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | - (val & FLAGS_MASK); - - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - phys_val); - - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* do not transform the value to physical address when writing to H/W */ -static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, - u64 val) -{ - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - val); - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* clear the last and present bits */ -static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) -{ - /* no need to transform the value to physical address */ - write_final_pte(ctx, pte_addr, 0); -} - -static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - get_pgt_info(ctx, hop_addr)->num_of_ptes++; -} - -/* - * put_pte - decrement the num of ptes and free the hop if possible - * - * @ctx: pointer to the context structure - * @hop_addr: addr of the hop - * - * This function returns the number of ptes left on this hop. If the number is - * 0, it means the pte was freed. - */ -static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - int num_of_ptes_left; - - pgt_info->num_of_ptes--; - - /* - * Need to save the number of ptes left because free_hop might free - * the pgt_info - */ - num_of_ptes_left = pgt_info->num_of_ptes; - if (!num_of_ptes_left) - _free_hop(ctx, pgt_info); - - return num_of_ptes_left; -} - static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) { @@ -183,35 +23,6 @@ static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } -static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, - bool *is_new_hop) -{ - u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop_addr == ULLONG_MAX) { - hop_addr = alloc_hop(ctx); - *is_new_hop = (hop_addr != ULLONG_MAX); - } - - return hop_addr; -} - -/* translates shadow address inside hop to a physical address */ -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) -{ - u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); - u64 shadow_hop_addr = shadow_addr & ~page_mask; - u64 pte_offset = shadow_addr & page_mask; - u64 phys_hop_addr; - - if (shadow_hop_addr != get_hop0_addr(ctx)) - phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; - else - phys_hop_addr = get_phys_hop0_addr(ctx); - - return phys_hop_addr + pte_offset; -} - static int dram_default_mapping_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -232,13 +43,13 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) /* add hop1 and hop2 */ total_hops = num_of_hop3 + 2; - ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); + ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL); if (!ctx->dram_default_hops) return -ENOMEM; - hop0_addr = get_hop0_addr(ctx); + hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); - hop1_addr = alloc_hop(ctx); + hop1_addr = hl_mmu_dr_alloc_hop(ctx); if (hop1_addr == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 1\n"); rc = -ENOMEM; @@ -247,7 +58,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) ctx->dram_default_hops[total_hops - 1] = hop1_addr; - hop2_addr = alloc_hop(ctx); + hop2_addr = hl_mmu_dr_alloc_hop(ctx); if (hop2_addr == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 2\n"); rc = -ENOMEM; @@ -257,7 +68,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) ctx->dram_default_hops[total_hops - 2] = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - ctx->dram_default_hops[i] = alloc_hop(ctx); + ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx); if (ctx->dram_default_hops[i] == ULLONG_MAX) { dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); rc = -ENOMEM; @@ -268,18 +79,18 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) /* need only pte 0 in hops 0 and 1 */ pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_addr, pte_val); + hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val); pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_addr, pte_val); - get_pte(ctx, hop1_addr); + hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val); + hl_mmu_dr_get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, pte_val); - get_pte(ctx, hop2_addr); + hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val); + hl_mmu_dr_get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } @@ -289,23 +100,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { - write_final_pte(ctx, hop3_pte_addr, pte_val); - get_pte(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val); + hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; } } - flush(ctx); + hl_mmu_dr_flush(ctx); return 0; hop3_err: for (i = 0 ; i < hop3_allocated ; i++) - free_hop(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]); - free_hop(ctx, hop2_addr); + hl_mmu_dr_free_hop(ctx, hop2_addr); hop2_err: - free_hop(ctx, hop1_addr); + hl_mmu_dr_free_hop(ctx, hop1_addr); hop1_err: kfree(ctx->dram_default_hops); @@ -329,7 +140,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) do_div(num_of_hop3, prop->dram_page_size); do_div(num_of_hop3, HOP_PTE_ENTRIES_512); - hop0_addr = get_hop0_addr(ctx); + hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); /* add hop1 and hop2 */ total_hops = num_of_hop3 + 2; hop1_addr = ctx->dram_default_hops[total_hops - 1]; @@ -338,101 +149,26 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { - clear_pte(ctx, hop3_pte_addr); - put_pte(ctx, ctx->dram_default_hops[i]); + hl_mmu_dr_clear_pte(ctx, hop3_pte_addr); + hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; } } hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - clear_pte(ctx, hop2_pte_addr); - put_pte(ctx, hop2_addr); + hl_mmu_dr_clear_pte(ctx, hop2_pte_addr); + hl_mmu_dr_put_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } - clear_pte(ctx, hop1_addr); - put_pte(ctx, hop1_addr); - clear_pte(ctx, hop0_addr); + hl_mmu_dr_clear_pte(ctx, hop1_addr); + hl_mmu_dr_put_pte(ctx, hop1_addr); + hl_mmu_dr_clear_pte(ctx, hop0_addr); kfree(ctx->dram_default_hops); - flush(ctx); -} - -/** - * hl_mmu_v1_init() - initialize the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Create a pool of pages for pgt_infos. - * - Create a shadow table for pgt - * - * Return: 0 for success, non-zero for failure. - */ -static int hl_mmu_v1_init(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - int rc; - - hdev->mmu_priv.dr.mmu_pgt_pool = - gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - - if (!hdev->mmu_priv.dr.mmu_pgt_pool) { - dev_err(hdev->dev, "Failed to create page gen pool\n"); - return -ENOMEM; - } - - rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + - prop->mmu_hop0_tables_total_size, - prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, - -1); - if (rc) { - dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); - goto err_pool_add; - } - - hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size, - GFP_KERNEL); - if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { - rc = -ENOMEM; - goto err_pool_add; - } - - /* MMU H/W init will be done in device hw_init() */ - - return 0; - -err_pool_add: - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); - - return rc; -} - -/** - * hl_mmu_v1_fini() - release the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Disable MMU in H/W. - * - Free the pgt_infos pool. - * - * All contexts should be freed before calling this function. - */ -static void hl_mmu_v1_fini(struct hl_device *hdev) -{ - /* MMU H/W fini was already done in device hw_fini() */ - - if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { - kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); - - /* Make sure that if we arrive here again without init was - * called we won't cause kernel panic. This can happen for - * example if we fail during hard reset code at certain points - */ - hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; - } + hl_mmu_dr_flush(ctx); } /** @@ -476,7 +212,7 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) dev_err_ratelimited(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - _free_hop(ctx, pgt_info); + hl_mmu_dr_free_pgt_node(ctx, pgt_info); } } @@ -495,7 +231,7 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx, for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { if (hop_idx == MMU_HOP0) { - hop_addr[hop_idx] = get_hop0_addr(ctx); + hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); } else { hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop_addr[hop_idx] == ULLONG_MAX) @@ -546,30 +282,30 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx, } hop_idx = MMU_HOP3; - write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); - put_pte(ctx, hop_addr[hop_idx]); + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); + hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; if (hop_addr[MMU_HOP4]) - clear_pte(ctx, hop_pte_addr[MMU_HOP4]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]); else - clear_pte(ctx, hop_pte_addr[MMU_HOP3]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]); - if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) + if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4])) clear_hop3 = true; if (!clear_hop3) goto mapped; for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { - clear_pte(ctx, hop_pte_addr[hop_idx]); + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]); if (hop_idx == MMU_HOP0) break; - if (put_pte(ctx, hop_addr[hop_idx])) + if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx])) goto mapped; } } @@ -616,10 +352,10 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { if (hop_idx == MMU_HOP0) { - hop_addr[hop_idx] = get_hop0_addr(ctx); + hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); } else { hop_addr[hop_idx] = - get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); + hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); if (hop_addr[hop_idx] == ULLONG_MAX) goto err; } @@ -666,27 +402,27 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; - write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { prev_hop = hop_idx - 1; if (hop_new[hop_idx]) { curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); + hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); if (hop_idx != MMU_HOP1) - get_pte(ctx, hop_addr[prev_hop]); + hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]); } } - get_pte(ctx, hop_addr[num_hops - 1]); + hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]); return 0; err: for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { if (hop_new[hop_idx]) - free_hop(ctx, hop_addr[hop_idx]); + hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]); } return rc; @@ -752,7 +488,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, if (is_huge) used_hops--; - hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, hops->hop_info[0].hop_addr, virt_addr); @@ -801,13 +537,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, */ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) { - mmu->init = hl_mmu_v1_init; - mmu->fini = hl_mmu_v1_fini; + mmu->init = hl_mmu_dr_init; + mmu->fini = hl_mmu_dr_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_fini = hl_mmu_v1_ctx_fini; mmu->map = hl_mmu_v1_map; mmu->unmap = hl_mmu_v1_unmap; - mmu->flush = flush; + mmu->flush = hl_mmu_dr_flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2.c b/drivers/accel/habanalabs/common/mmu/mmu_v2.c new file mode 100644 index 000000000000..4bc0268fff1c --- /dev/null +++ b/drivers/accel/habanalabs/common/mmu/mmu_v2.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "../habanalabs.h" +#include "../../include/hw_ip/mmu/mmu_general.h" +#include "../../include/hw_ip/mmu/mmu_v2_0.h" + +#include <linux/slab.h> + +/** + * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx) +{ + hash_init(ctx->mmu_shadow_hash); + + return 0; +} + +/* + * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module + * + * @ctx: pointer to the context structure + * + * This function does the following: + * - Free any pgts which were not freed yet + * - Free the mutex + * - Free DRAM default page mapping hops + */ +static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct pgt_info *pgt_info; + struct hlist_node *tmp; + int i; + + if (!hash_empty(ctx->mmu_shadow_hash)) + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); + + hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { + dev_err_ratelimited(hdev->dev, + "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", + pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); + hl_mmu_dr_free_pgt_node(ctx, pgt_info); + } +} + +static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) +{ + u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte, + scrambled_virt_addr; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + bool is_huge = false; + int i, hop_last; + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + + hop_last = mmu_prop->num_hops - 1; + + scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + + hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx); + hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hop_addr[0], scrambled_virt_addr); + if (hop_pte_addr[0] == U64_MAX) + return -EFAULT; + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0]; + + for (i = 1 ; i < mmu_prop->num_hops ; i++) { + hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[i] == ULLONG_MAX) + goto not_mapped; + + hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hop_addr[i], scrambled_virt_addr); + if (hop_pte_addr[i] == U64_MAX) + return -EFAULT; + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i]; + + if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) { + hop_last = i; + is_huge = true; + break; + } + } + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); + return -EFAULT; + } + + if (!(curr_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + for (i = hop_last ; i > 0 ; i--) { + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]); + if (hl_mmu_dr_put_pte(ctx, hop_addr[i])) + goto mapped; + } + hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]); + +mapped: + return 0; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + + return -EINVAL; +} + +static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool is_dram_addr) +{ + u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, + curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + bool hop_new[MMU_ARCH_6_HOPS] = { false }; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + int rc, i, hop_last; + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + + hop_last = mmu_prop->num_hops - 1; + + scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr); + + /* First hop is preallocated therefore it is treated differently */ + hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx); + hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hop_addr[0], scrambled_virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0]; + + /* Handle hop1 to hop_last */ + for (i = 1 ; i <= hop_last ; i++) { + hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]); + if (hop_addr[i] == ULLONG_MAX) { + rc = -ENOMEM; + goto err; + } + + hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hop_addr[i], scrambled_virt_addr); + if (hop_pte_addr[i] == U64_MAX) { + rc = -EINVAL; + goto err; + } + + if (!hop_pte_addr[i]) { + rc = -EINVAL; + goto err; + } + + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i]; + } + + if (curr_pte & PAGE_PRESENT_MASK) { + dev_err(hdev->dev, + "mapping already exists for virt_addr 0x%llx\n", + virt_addr); + + for (i = 0 ; i <= hop_last ; i++) + dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", + i, *(u64 *) (uintptr_t) hop_pte_addr[i], + hop_pte_addr[i]); + + rc = -EINVAL; + goto err; + } + + curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK) + | mmu_prop->last_mask | PAGE_PRESENT_MASK; + + /* Write the PTEs */ + hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte); + + /* for each new hop, add its address to the table of previous-hop */ + for (i = 1 ; i <= hop_last ; i++) { + if (hop_new[i]) { + curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte); + + if (i - 1) + hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]); + } + } + hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]); + + return 0; + +err: + for (i = 1 ; i <= hop_last ; i++) + if (hop_new[i] && (hop_addr[i] != U64_MAX)) + hl_mmu_dr_free_hop(ctx, hop_addr[i]); + + return rc; +} + +/* + * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v2_swap_out(struct hl_ctx *ctx) +{ + +} + +/* + * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v2_swap_in(struct hl_ctx *ctx) +{ + +} + +static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) +{ + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + struct hl_device *hdev = ctx->hdev; + struct hl_mmu_properties *mmu_prop; + bool is_dram_addr; + int i; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* device resident in V2 are allowed only for HMMU */ + if (!is_dram_addr) + return -EINVAL; + + mmu_prop = &prop->dmmu; + hops->range_type = HL_VA_RANGE_TYPE_DRAM; + + hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); + + hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, + hops->hop_info[0].hop_addr, + hops->scrambled_vaddr); + if (hops->hop_info[0].hop_pte_addr == U64_MAX) + return -EFAULT; + + hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev, + hops->hop_info[0].hop_pte_addr); + if (hops->hop_info[0].hop_pte_val == U64_MAX) + return -EFAULT; + + for (i = 1 ; i < mmu_prop->num_hops ; i++) { + hops->hop_info[i].hop_addr = + hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val); + if (hops->hop_info[i].hop_addr == ULLONG_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_addr = + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, + hops->hop_info[i].hop_addr, + hops->scrambled_vaddr); + if (hops->hop_info[i].hop_pte_addr == U64_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[i].hop_pte_addr); + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) + break; + } + + /* if passed over all hops then no last hop was found */ + if (i == mmu_prop->num_hops) + return -EFAULT; + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->scrambled_vaddr != virt_addr) + hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr + (hdev, hops->hop_info[i].hop_pte_val); + else + hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val; + + hops->used_hops = i + 1; + + return 0; +} + +/* + * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2 + * + * @hdev: pointer to the device structure + * @mmu_if: pointer to the mmu interface structure + */ +void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) +{ + mmu->init = hl_mmu_dr_init; + mmu->fini = hl_mmu_dr_fini; + mmu->ctx_init = hl_mmu_v2_ctx_init; + mmu->ctx_fini = hl_mmu_v2_ctx_fini; + mmu->map = hl_mmu_v2_map; + mmu->unmap = hl_mmu_v2_unmap; + mmu->flush = hl_mmu_dr_flush; + mmu->swap_out = hl_mmu_v2_swap_out; + mmu->swap_in = hl_mmu_v2_swap_in; + mmu->get_tlb_info = hl_mmu_v2_get_tlb_info; +} diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c index afe7ef964f82..31507b2a431b 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c +++ b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c @@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size, + return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size, prop->mmu_pgt_size); } @@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size); + hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size); } /** @@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx) "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); } } @@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) { hop_last = i; @@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx, for (i = hop_last ; i > 0 ; i--) { hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size)) + ctx->hdev->asic_prop.pmmu.hop_table_size)) goto mapped; } hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); mapped: return 0; @@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, scrambled_virt_addr); curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); } if (curr_pte & PAGE_PRESENT_MASK) { @@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], - ctx->hdev->asic_prop.mmu_hop_table_size), + ctx->hdev->asic_prop.pmmu.hop_table_size), hop_pte_phys_addr[i]); rc = -EINVAL; goto err; @@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, /* Write the PTEs */ hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); /* for each new hop, add its address to the table of previous-hop */ for (i = 1 ; i <= hop_last ; i++) { @@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1], - curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size); + curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size); if (i - 1) hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, hops_pgt_info[i - 1]->phys_addr); @@ -303,7 +303,7 @@ err: for (i = 1 ; i <= hop_last ; i++) if (hop_new[i] && hops_pgt_info[i]) hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, - ctx->hdev->asic_prop.mmu_hop_table_size); + ctx->hdev->asic_prop.pmmu.hop_table_size); return rc; } diff --git a/drivers/accel/habanalabs/common/pci/pci.c b/drivers/accel/habanalabs/common/pci/pci.c index d1f4c695baf2..81cbd8697d4c 100644 --- a/drivers/accel/habanalabs/common/pci/pci.c +++ b/drivers/accel/habanalabs/common/pci/pci.c @@ -123,7 +123,7 @@ int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data) pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); if (unlikely(trace_habanalabs_elbi_read_enabled())) - trace_habanalabs_elbi_read(hdev->dev, (u32) addr, val); + trace_habanalabs_elbi_read(&hdev->pdev->dev, (u32) addr, val); return 0; } @@ -186,7 +186,7 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) { if (unlikely(trace_habanalabs_elbi_write_enabled())) - trace_habanalabs_elbi_write(hdev->dev, (u32) addr, val); + trace_habanalabs_elbi_write(&hdev->pdev->dev, (u32) addr, val); return 0; } @@ -420,7 +420,6 @@ int hl_pci_init(struct hl_device *hdev) unmap_pci_bars: hl_pci_bars_unmap(hdev); disable_device: - pci_clear_master(pdev); pci_disable_device(pdev); return rc; @@ -436,6 +435,5 @@ void hl_pci_fini(struct hl_device *hdev) { hl_pci_bars_unmap(hdev); - pci_clear_master(hdev->pdev); pci_disable_device(hdev->pdev); } diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c index 5f03ade07ead..5402a3cd0491 100644 --- a/drivers/accel/habanalabs/common/security.c +++ b/drivers/accel/habanalabs/common/security.c @@ -7,15 +7,31 @@ #include "habanalabs.h" -static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = { +static const char * const hl_glbl_error_cause[] = { "Error due to un-priv read", "Error due to un-secure read", "Error due to read from unmapped reg", "Error due to un-priv write", "Error due to un-secure write", "Error due to write to unmapped reg", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", "External I/F write sec violation", "External I/F write to un-mapped reg", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", "Read to write only", "Write to read only" }; @@ -284,14 +300,14 @@ void hl_secure_block(struct hl_device *hdev, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_array: register array - * @regs_array_size: register array size + * @user_regs_array: unsecured register array + * @user_regs_array_size: unsecured register array size * @mask: enabled instances mask: 1- enabled, 0- disabled */ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const u32 *regs_array, u32 regs_array_size, u64 mask) + const u32 *user_regs_array, u32 user_regs_array_size, u64 mask) { int i, j; struct hl_block_glbl_sec *glbl_sec; @@ -303,8 +319,8 @@ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, return -ENOMEM; hl_secure_block(hdev, glbl_sec, blocks_array_size); - hl_unsecure_registers(hdev, regs_array, regs_array_size, 0, pb_blocks, - glbl_sec, blocks_array_size); + hl_unsecure_registers(hdev, user_regs_array, user_regs_array_size, 0, + pb_blocks, glbl_sec, blocks_array_size); /* Fill all blocks with the same configuration */ for (i = 0 ; i < num_dcores ; i++) { @@ -336,19 +352,19 @@ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_array: register array - * @regs_array_size: register array size + * @user_regs_array: unsecured register array + * @user_regs_array_size: unsecured register array size * */ int hl_init_pb(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const u32 *regs_array, u32 regs_array_size) + const u32 *user_regs_array, u32 user_regs_array_size) { return hl_init_pb_with_mask(hdev, num_dcores, dcore_offset, num_instances, instance_offset, pb_blocks, - blocks_array_size, regs_array, regs_array_size, - ULLONG_MAX); + blocks_array_size, user_regs_array, + user_regs_array_size, ULLONG_MAX); } /** @@ -364,15 +380,15 @@ int hl_init_pb(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_range_array: register range array - * @regs_range_array_size: register range array size + * @user_regs_range_array: unsecured register range array + * @user_regs_range_array_size: unsecured register range array size * @mask: enabled instances mask: 1- enabled, 0- disabled */ int hl_init_pb_ranges_with_mask(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const struct range *regs_range_array, u32 regs_range_array_size, - u64 mask) + const struct range *user_regs_range_array, + u32 user_regs_range_array_size, u64 mask) { int i, j, rc = 0; struct hl_block_glbl_sec *glbl_sec; @@ -384,8 +400,8 @@ int hl_init_pb_ranges_with_mask(struct hl_device *hdev, u32 num_dcores, return -ENOMEM; hl_secure_block(hdev, glbl_sec, blocks_array_size); - rc = hl_unsecure_registers_range(hdev, regs_range_array, - regs_range_array_size, 0, pb_blocks, glbl_sec, + rc = hl_unsecure_registers_range(hdev, user_regs_range_array, + user_regs_range_array_size, 0, pb_blocks, glbl_sec, blocks_array_size); if (rc) goto free_glbl_sec; @@ -422,19 +438,20 @@ free_glbl_sec: * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_range_array: register range array - * @regs_range_array_size: register range array size + * @user_regs_range_array: unsecured register range array + * @user_regs_range_array_size: unsecured register range array size * */ int hl_init_pb_ranges(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const struct range *regs_range_array, u32 regs_range_array_size) + const struct range *user_regs_range_array, + u32 user_regs_range_array_size) { return hl_init_pb_ranges_with_mask(hdev, num_dcores, dcore_offset, num_instances, instance_offset, pb_blocks, - blocks_array_size, regs_range_array, - regs_range_array_size, ULLONG_MAX); + blocks_array_size, user_regs_range_array, + user_regs_range_array_size, ULLONG_MAX); } /** @@ -447,14 +464,14 @@ int hl_init_pb_ranges(struct hl_device *hdev, u32 num_dcores, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_array: register array - * @regs_array_size: register array size + * @user_regs_array: unsecured register array + * @user_regs_array_size: unsecured register array size * */ int hl_init_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const u32 *regs_array, u32 regs_array_size) + const u32 *user_regs_array, u32 user_regs_array_size) { int i, rc = 0; struct hl_block_glbl_sec *glbl_sec; @@ -466,8 +483,8 @@ int hl_init_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, return -ENOMEM; hl_secure_block(hdev, glbl_sec, blocks_array_size); - rc = hl_unsecure_registers(hdev, regs_array, regs_array_size, 0, - pb_blocks, glbl_sec, blocks_array_size); + rc = hl_unsecure_registers(hdev, user_regs_array, user_regs_array_size, + 0, pb_blocks, glbl_sec, blocks_array_size); if (rc) goto free_glbl_sec; @@ -495,14 +512,14 @@ free_glbl_sec: * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_range_array: register range array - * @regs_range_array_size: register range array size + * @user_regs_range_array: unsecured register range array + * @user_regs_range_array_size: unsecured register range array size * */ int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const struct range *regs_range_array, u32 regs_range_array_size) + const struct range *user_regs_range_array, u32 user_regs_range_array_size) { int i; struct hl_block_glbl_sec *glbl_sec; @@ -514,8 +531,8 @@ int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset, return -ENOMEM; hl_secure_block(hdev, glbl_sec, blocks_array_size); - hl_unsecure_registers_range(hdev, regs_range_array, - regs_range_array_size, 0, pb_blocks, glbl_sec, + hl_unsecure_registers_range(hdev, user_regs_range_array, + user_regs_range_array_size, 0, pb_blocks, glbl_sec, blocks_array_size); /* Fill all blocks with the same configuration */ @@ -670,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev, static int hl_read_glbl_errors(struct hl_device *hdev, u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data) { - struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_special_block_info *special_blocks = prop->special_blocks; struct hl_special_block_info *current_block = &special_blocks[blk_idx]; u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base, - base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address); + base = current_block->base_addr - lower_32_bits(prop->cfg_base_address); int i; block_base = base + major * current_block->major_offset + @@ -688,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev, glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET; addr_val = RREG32(glbl_err_addr); - for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) { + for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) { if (cause_val & BIT(i)) dev_err_ratelimited(hdev->dev, - "%s, addr %#llx\n", - hl_glbl_error_cause[i], - hdev->asic_prop.cfg_base_address + block_base + - FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val)); + "%s, addr %#llx\n", + hl_glbl_error_cause[i], + prop->cfg_base_address + block_base + + FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val)); } WREG32(glbl_err_cause, cause_val); diff --git a/drivers/accel/habanalabs/common/security.h b/drivers/accel/habanalabs/common/security.h index 234b4a6ed8bc..476f70687c09 100644 --- a/drivers/accel/habanalabs/common/security.h +++ b/drivers/accel/habanalabs/common/security.h @@ -10,11 +10,10 @@ #include <linux/io-64-nonatomic-lo-hi.h> -extern struct hl_device *hdev; +struct hl_device; /* special blocks */ -#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10 -#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0) +#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0) /* GLBL_ERR_ADDR register offset from the start of the block */ #define HL_GLBL_ERR_ADDR_OFFSET 0xF44 /* GLBL_ERR_CAUSE register offset from the start of the block */ diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 735d8bed0066..8f55ba3b4e73 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -8,6 +8,7 @@ #include "habanalabs.h" #include <linux/pci.h> +#include <linux/types.h> static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -80,14 +81,36 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c { struct hl_device *hdev = dev_get_drvdata(dev); struct cpucp_info *cpucp_info; + u32 infineon_second_stage_version; + u32 infineon_second_stage_first_instance; + u32 infineon_second_stage_second_instance; + u32 infineon_second_stage_third_instance; + u32 mask = 0xff; cpucp_info = &hdev->asic_prop.cpucp_info; - if (cpucp_info->infineon_second_stage_version) - return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version), - le32_to_cpu(cpucp_info->infineon_second_stage_version)); - else + infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version); + infineon_second_stage_first_instance = infineon_second_stage_version & mask; + infineon_second_stage_second_instance = + (infineon_second_stage_version >> 8) & mask; + infineon_second_stage_third_instance = + (infineon_second_stage_version >> 16) & mask; + + if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version) + return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n", + le32_to_cpu(cpucp_info->infineon_version), + infineon_second_stage_first_instance, + infineon_second_stage_second_instance, + infineon_second_stage_third_instance); + else if (cpucp_info->infineon_second_stage_version) + return sprintf(buf, "%#04x:%#04x:%#04x\n", + infineon_second_stage_first_instance, + infineon_second_stage_second_instance, + infineon_second_stage_third_instance); + else if (cpucp_info->infineon_version) return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); + + return 0; } static DEVICE_ATTR_RO(vrm_ver); @@ -126,8 +149,9 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "0x%08x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); + return sprintf(buf, "0x%08x%08x\n", + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_timestamp), + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); } static ssize_t cpucp_kernel_ver_show(struct device *dev, @@ -251,6 +275,12 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI2B: str = "GAUDI2B"; break; + case ASIC_GAUDI2C: + str = "GAUDI2C"; + break; + case ASIC_GAUDI2D: + str = "GAUDI2D"; + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); @@ -345,7 +375,7 @@ out: } static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t offset, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { struct device *dev = kobj_to_dev(kobj); @@ -383,6 +413,21 @@ static ssize_t security_enabled_show(struct device *dev, return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled); } +static ssize_t module_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location)); +} + +static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", HL_DEV_NAME(hdev)); +} + static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); @@ -402,8 +447,10 @@ static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); static DEVICE_ATTR_RO(fw_os_ver); static DEVICE_ATTR_RO(security_enabled); +static DEVICE_ATTR_RO(module_id); +static DEVICE_ATTR_RO(parent_device); -static struct bin_attribute bin_attr_eeprom = { +static const struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, .size = PAGE_SIZE, .read = eeprom_read_handler @@ -427,10 +474,12 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_uboot_ver.attr, &dev_attr_fw_os_ver.attr, &dev_attr_security_enabled.attr, + &dev_attr_module_id.attr, + &dev_attr_parent_device.attr, NULL, }; -static struct bin_attribute *hl_dev_bin_attrs[] = { +static const struct bin_attribute *const hl_dev_bin_attrs[] = { &bin_attr_eeprom, NULL }; @@ -497,10 +546,14 @@ int hl_sysfs_init(struct hl_device *hdev) if (rc) { dev_err(hdev->dev, "Failed to add groups to device, error %d\n", rc); - return rc; + goto remove_groups; } return 0; + +remove_groups: + device_remove_groups(hdev->dev, hl_dev_attr_groups); + return rc; } void hl_sysfs_fini(struct hl_device *hdev) diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index 71debe862c86..34771d75da9d 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -63,6 +63,10 @@ #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" +MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); +MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); +MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); + #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ @@ -114,13 +118,6 @@ static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { GAUDI_QUEUE_ID_DMA_1_3 }; -static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { - "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", - "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", - "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", - "gaudi cpu eq" -}; - static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, @@ -617,8 +614,6 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = false; @@ -640,8 +635,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -652,10 +647,12 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); prop->dmmu.end_addr = VA_HOST_SPACE_END; prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->dmmu.pgt_size = prop->mmu_pgt_size; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI_EVENT_SIZE; + prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; prop->tpc_enabled_mask = TPC_ENABLED_MASK; set_default_power_values(hdev); @@ -666,7 +663,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GAUDI_MAX_PENDING_CS; @@ -679,6 +676,10 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) (num_sync_stream_queues * HL_RSVD_MONS); prop->first_available_user_interrupt = USHRT_MAX; + prop->tpc_interrupt_id = USHRT_MAX; + + /* single msi */ + prop->eq_interrupt_id = 0; for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; @@ -867,13 +868,18 @@ pci_init: rc = hl_fw_read_preboot_status(hdev); if (rc) { if (hdev->reset_on_preboot_fail) + /* we are already on failure flow, so don't check if hw_fini fails. */ hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); + goto pci_fini; + } } return 0; @@ -1466,8 +1472,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, } /* Allocate internal mapped CB for non patched CBs */ - cb = hl_cb_kernel_create(hdev, cb_size, - hdev->mmu_enable && !patched_cb); + cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); if (!cb) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -1634,10 +1639,8 @@ static int gaudi_late_init(struct hl_device *hdev) } rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); - if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + if (rc) return rc; - } /* Scrub both SRAM and DRAM */ rc = hdev->asic_funcs->scrub_device_mem(hdev); @@ -2010,38 +2013,6 @@ static int gaudi_enable_msi_single(struct hl_device *hdev) return rc; } -static int gaudi_enable_msi_multi(struct hl_device *hdev) -{ - int cq_cnt = hdev->asic_prop.completion_queues_count; - int rc, i, irq_cnt_init, irq; - - for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { - irq = gaudi_pci_irq_vector(hdev, i, false); - rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], - &hdev->completion_queue[i]); - if (rc) { - dev_err(hdev->dev, "Failed to request IRQ %d", irq); - goto free_irqs; - } - } - - irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); - rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], - &hdev->event_queue); - if (rc) { - dev_err(hdev->dev, "Failed to request IRQ %d", irq); - goto free_irqs; - } - - return 0; - -free_irqs: - for (i = 0 ; i < irq_cnt_init ; i++) - free_irq(gaudi_pci_irq_vector(hdev, i, false), - &hdev->completion_queue[i]); - return rc; -} - static int gaudi_enable_msi(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -2056,14 +2027,7 @@ static int gaudi_enable_msi(struct hl_device *hdev) return rc; } - if (rc < NUMBER_OF_INTERRUPTS) { - gaudi->multi_msi_mode = false; - rc = gaudi_enable_msi_single(hdev); - } else { - gaudi->multi_msi_mode = true; - rc = gaudi_enable_msi_multi(hdev); - } - + rc = gaudi_enable_msi_single(hdev); if (rc) goto free_pci_irq_vectors; @@ -2079,47 +2043,23 @@ free_pci_irq_vectors: static void gaudi_sync_irqs(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; - int i, cq_cnt = hdev->asic_prop.completion_queues_count; if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) return; /* Wait for all pending IRQs to be finished */ - if (gaudi->multi_msi_mode) { - for (i = 0 ; i < cq_cnt ; i++) - synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); - - synchronize_irq(gaudi_pci_irq_vector(hdev, - GAUDI_EVENT_QUEUE_MSI_IDX, - true)); - } else { - synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); - } + synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); } static void gaudi_disable_msi(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; - int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) return; gaudi_sync_irqs(hdev); - - if (gaudi->multi_msi_mode) { - irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, - true); - free_irq(irq, &hdev->event_queue); - - for (i = 0 ; i < cq_cnt ; i++) { - irq = gaudi_pci_irq_vector(hdev, i, false); - free_irq(irq, &hdev->completion_queue[i]); - } - } else { - free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); - } - + free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); pci_free_irq_vectors(hdev->pdev); gaudi->hw_cap_initialized &= ~HW_CAP_MSI; @@ -3704,21 +3644,18 @@ static int gaudi_mmu_init(struct hl_device *hdev) u64 hop0_addr; int rc, i; - if (!hdev->mmu_enable) - return 0; - if (gaudi->hw_cap_initialized & HW_CAP_MMU) return 0; for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + - (i * prop->mmu_hop_table_size); + (i * prop->dmmu.hop_table_size); rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); if (rc) { dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", i); - goto err; + return rc; } } @@ -3729,7 +3666,9 @@ static int gaudi_mmu_init(struct hl_device *hdev) /* mem cache invalidation */ WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); - hl_mmu_invalidate_cache(hdev, true, 0); + rc = hl_mmu_invalidate_cache(hdev, true, 0); + if (rc) + return rc; WREG32(mmMMU_UP_MMU_ENABLE, 1); WREG32(mmMMU_UP_SPI_MASK, 0xF); @@ -3745,9 +3684,6 @@ static int gaudi_mmu_init(struct hl_device *hdev) gaudi->hw_cap_initialized |= HW_CAP_MMU; return 0; - -err: - return rc; } static int gaudi_load_firmware_to_device(struct hl_device *hdev) @@ -3915,11 +3851,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) WREG32(mmCPU_IF_PF_PQ_PI, 0); - if (gaudi->multi_msi_mode) - WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); - else - WREG32(mmCPU_IF_QUEUE_INIT, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); + WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); irq_handler_offset = prop->gic_interrupts_enable ? mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : @@ -4068,7 +4000,7 @@ disable_queues: return rc; } -static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) { struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; @@ -4078,7 +4010,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset if (!hard_reset) { dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); - return; + return 0; } if (hdev->pldm) { @@ -4199,10 +4131,10 @@ skip_reset: msleep(reset_timeout_ms); status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); - if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) - dev_err(hdev->dev, - "Timeout while waiting for device to reset 0x%x\n", - status); + if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { + dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); + return -ETIMEDOUT; + } if (gaudi) { gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | @@ -4215,17 +4147,12 @@ skip_reset: hdev->device_cpu_is_halted = false; } + return 0; } static int gaudi_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } static int gaudi_resume(struct hl_device *hdev) @@ -4238,13 +4165,32 @@ static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, { int rc; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | - VM_DONTCOPY | VM_NORESERVE; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); + +#ifdef _HAS_DMA_MMAP_COHERENT + /* + * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP + * so vm_insert_page() can handle it safely. Without this, the kernel + * may BUG_ON due to VM_PFNMAP. + */ + if (is_vmalloc_addr(cpu_addr)) + vm_flags_set(vma, VM_MIXEDMAP); rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); +#else + + rc = remap_pfn_range(vma, vma->vm_start, + virt_to_phys(cpu_addr) >> PAGE_SHIFT, + size, vma->vm_page_prot); + if (rc) + dev_err(hdev->dev, "remap_pfn_range error %d", rc); + + #endif + return rc; } @@ -4687,8 +4633,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) static int gaudi_scrub_device_mem(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : - min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); + u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; u64 addr, size, val = hdev->memory_scrub_val; ktime_t timeout; int rc = 0; @@ -4972,7 +4917,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -5595,7 +5540,6 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add u32 len, u32 original_len, u64 cq_addr, u32 cq_val, u32 msi_vec, bool eb) { - struct gaudi_device *gaudi = hdev->asic_specific; struct packet_msg_prot *cq_pkt; struct packet_nop *cq_padding; u64 msi_addr; @@ -5625,12 +5569,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); cq_pkt->ctl = cpu_to_le32(tmp); cq_pkt->value = cpu_to_le32(1); - - if (gaudi->multi_msi_mode) - msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; - else - msi_addr = mmPCIE_CORE_MSI_REQ; - + msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); } @@ -7297,7 +7236,7 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e } static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, - bool razwi, u64 *event_mask) + bool check_razwi, u64 *event_mask) { bool is_read = false, is_write = false; u16 engine_id[2], num_of_razwi_eng = 0; @@ -7316,7 +7255,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", event_type, desc); - if (razwi) { + if (check_razwi) { gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, &is_write); gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); @@ -7333,8 +7272,9 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, num_of_razwi_eng = 1; } - hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags, - event_mask); + if (razwi_flags) + hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, + razwi_flags, event_mask); } } @@ -7633,6 +7573,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + struct hl_info_fw_err_info fw_err_info; u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0, flags = 0; @@ -7911,7 +7852,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_FW_ALIVE_S: gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); - event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; + fw_err_info.event_id = event_type; + fw_err_info.event_mask = &event_mask; + hl_handle_fw_err(hdev, &fw_err_info); goto reset_device; default: @@ -7942,6 +7886,10 @@ reset_device: } if (reset_required) { + /* escalate general hw errors to critical/fatal error */ + if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) + hl_handle_critical_hw_err(hdev, event_type, &event_mask); + hl_device_cond_reset(hdev, flags, event_mask); } else { hl_fw_unmask_irq(hdev, event_type); @@ -8065,7 +8013,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) return rc; if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); @@ -8403,19 +8351,26 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, } mutex_lock(&hdev->mmu_lock); + rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - - hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&hdev->mmu_lock); - if (rc) goto unreserve_internal_cb_pool; + rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); + if (rc) + goto unmap_internal_cb_pool; + + mutex_unlock(&hdev->mmu_lock); + return 0; +unmap_internal_cb_pool: + hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, + HOST_SPACE_INTERNAL_CB_SZ); unreserve_internal_cb_pool: + mutex_unlock(&hdev->mmu_lock); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); destroy_internal_cb_pool: @@ -9198,9 +9153,9 @@ static const struct hl_asic_funcs gaudi_funcs = { .asic_dma_pool_free = gaudi_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = gaudi_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .update_eq_ci = gaudi_update_eq_ci, .context_switch = gaudi_context_switch, diff --git a/drivers/accel/habanalabs/gaudi/gaudiP.h b/drivers/accel/habanalabs/gaudi/gaudiP.h index 3d88d56c8eb3..831be53bb9d7 100644 --- a/drivers/accel/habanalabs/gaudi/gaudiP.h +++ b/drivers/accel/habanalabs/gaudi/gaudiP.h @@ -10,7 +10,7 @@ #include <uapi/drm/habanalabs_accel.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> #include "../include/gaudi/gaudi_packets.h" #include "../include/gaudi/gaudi.h" #include "../include/gaudi/gaudi_async_events.h" @@ -28,20 +28,8 @@ #define NUMBER_OF_COLLECTIVE_QUEUES 12 #define NUMBER_OF_SOBS_IN_GRP 11 -/* - * Number of MSI interrupts IDS: - * Each completion queue has 1 ID - * The event queue has 1 ID - */ -#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \ - NUMBER_OF_CPU_HW_QUEUES) - #define GAUDI_STREAM_MASTER_ARR_SIZE 8 -#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES) -#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES" -#endif - #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */ @@ -324,8 +312,6 @@ struct gaudi_internal_qman_info { * signal we can use this engine in later code paths. * Each bit is cleared upon reset of its corresponding H/W * engine. - * @multi_msi_mode: whether we are working in multi MSI single MSI mode. - * Multi MSI is possible only with IOMMU enabled. * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an * 8-bit value so use u8. */ @@ -345,7 +331,6 @@ struct gaudi_device { u32 events_stat[GAUDI_EVENT_SIZE]; u32 events_stat_aggregate[GAUDI_EVENT_SIZE]; u32 hw_cap_initialized; - u8 multi_msi_mode; u8 mmu_cache_inv_pi; }; diff --git a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c index 3455b14554c6..1168fefa33f4 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c @@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev, WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + val = RREG32(base_reg + 0x20); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + 0x304); val |= 0x1000; WREG32(base_reg + 0x304, val); @@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + + + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index f1f2a58ee68c..b8c0689dba64 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -23,7 +23,8 @@ #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ -#define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ + +#define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */ #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ @@ -56,16 +57,15 @@ #define GAUDI2_NA_EVENT_CAUSE 0xFF #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 -#define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 +#define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 -#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 +#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 -#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 @@ -86,10 +86,11 @@ #define KDMA_TIMEOUT_USEC USEC_PER_SEC -#define IS_DMA_IDLE(dma_core_idle_ind_mask) \ - (!((dma_core_idle_ind_mask) & \ - ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \ - (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK)))) +#define IS_DMA_IDLE(dma_core_sts0) \ + (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK))) + +#define IS_DMA_HALTED(dma_core_sts1) \ + ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK)) #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) @@ -132,6 +133,287 @@ #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +/* RAZWI initiator coordinates */ +#define RAZWI_GET_AXUSER_XY(x) \ + ((x & 0xF8001FF0) >> 4) + +#define RAZWI_GET_AXUSER_LOW_XY(x) \ + ((x & 0x00001FF0) >> 4) + +#define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0 +#define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F +#define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5 +#define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF + +#define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23 +#define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F + +#define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \ + ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \ + (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT)) + +#define RAZWI_INITIATOR_ID_X_HIGH(x) \ + (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT) + +#define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \ + (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh)) + +#define PSOC_RAZWI_ENG_STR_SIZE 128 +#define PSOC_RAZWI_MAX_ENG_PER_RTR 5 + +/* HW scrambles only bits 0-25 */ +#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26) + +#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17 + +struct gaudi2_razwi_info { + u32 axuser_xy; + u32 rtr_ctrl; + u16 eng_id; + char *eng_name; +}; + +static struct gaudi2_razwi_info common_razwi_info[] = { + {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"}, + {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"}, + {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"}, + {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"}, + {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"}, + {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"}, + {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"}, + {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"}, + {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"}, + {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"}, + {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"}, + {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"}, + {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"}, + {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"}, + {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"}, + {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"}, + {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"}, + {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"}, + {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"}, + {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"}, + {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"}, + {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"}, + {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"}, + {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"}, + {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC0_0, "NIC0"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC0_1, "NIC1"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC1_0, "NIC2"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC1_1, "NIC3"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC2_0, "NIC4"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC2_1, "NIC5"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC3_0, "NIC6"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC3_1, "NIC7"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC4_0, "NIC8"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC4_1, "NIC9"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC5_0, "NIC10"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_NIC5_1, "NIC11"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "PMMU"}, + {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "PCIE"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"}, + {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_KDMA, "KDMA"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU0"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU1"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU2"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU3"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU4"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU5"}, + {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU6"}, + {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU7"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU8"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU9"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU10"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU11"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU12"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU13"}, + {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU14"}, + {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_SIZE, "HMMU15"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_ROT_0, "ROT0"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_ROT_1, "ROT1"}, + {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, + GAUDI2_ENGINE_ID_PSOC, "CPU"}, + {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE, + GAUDI2_ENGINE_ID_PSOC, "PSOC"} +}; + +static struct gaudi2_razwi_info mme_razwi_info[] = { + /* MME X high coordinate is N/A, hence using only low coordinates */ + {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, + GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, + GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, + GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"}, + {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, + GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"} +}; + enum hl_pmmu_fatal_cause { LATENCY_RD_OUT_FIFO_OVERRUN, LATENCY_WR_OUT_FIFO_OVERRUN, @@ -446,6 +728,354 @@ static const int gaudi2_dma_core_async_event_id[] = { [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, }; +const char *gaudi2_engine_id_str[] = { + __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0), + __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1), + __stringify(GAUDI2_DCORE0_ENGINE_ID_MME), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5), + __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0), + __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1), + __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0), + __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1), + __stringify(GAUDI2_DCORE1_ENGINE_ID_MME), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4), + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5), + __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0), + __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1), + __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0), + __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1), + __stringify(GAUDI2_DCORE2_ENGINE_ID_MME), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4), + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5), + __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0), + __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1), + __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0), + __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1), + __stringify(GAUDI2_DCORE3_ENGINE_ID_MME), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4), + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5), + __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0), + __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1), + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6), + __stringify(GAUDI2_ENGINE_ID_PDMA_0), + __stringify(GAUDI2_ENGINE_ID_PDMA_1), + __stringify(GAUDI2_ENGINE_ID_ROT_0), + __stringify(GAUDI2_ENGINE_ID_ROT_1), + __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0), + __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1), + __stringify(GAUDI2_ENGINE_ID_NIC0_0), + __stringify(GAUDI2_ENGINE_ID_NIC0_1), + __stringify(GAUDI2_ENGINE_ID_NIC1_0), + __stringify(GAUDI2_ENGINE_ID_NIC1_1), + __stringify(GAUDI2_ENGINE_ID_NIC2_0), + __stringify(GAUDI2_ENGINE_ID_NIC2_1), + __stringify(GAUDI2_ENGINE_ID_NIC3_0), + __stringify(GAUDI2_ENGINE_ID_NIC3_1), + __stringify(GAUDI2_ENGINE_ID_NIC4_0), + __stringify(GAUDI2_ENGINE_ID_NIC4_1), + __stringify(GAUDI2_ENGINE_ID_NIC5_0), + __stringify(GAUDI2_ENGINE_ID_NIC5_1), + __stringify(GAUDI2_ENGINE_ID_NIC6_0), + __stringify(GAUDI2_ENGINE_ID_NIC6_1), + __stringify(GAUDI2_ENGINE_ID_NIC7_0), + __stringify(GAUDI2_ENGINE_ID_NIC7_1), + __stringify(GAUDI2_ENGINE_ID_NIC8_0), + __stringify(GAUDI2_ENGINE_ID_NIC8_1), + __stringify(GAUDI2_ENGINE_ID_NIC9_0), + __stringify(GAUDI2_ENGINE_ID_NIC9_1), + __stringify(GAUDI2_ENGINE_ID_NIC10_0), + __stringify(GAUDI2_ENGINE_ID_NIC10_1), + __stringify(GAUDI2_ENGINE_ID_NIC11_0), + __stringify(GAUDI2_ENGINE_ID_NIC11_1), + __stringify(GAUDI2_ENGINE_ID_PCIE), + __stringify(GAUDI2_ENGINE_ID_PSOC), + __stringify(GAUDI2_ENGINE_ID_ARC_FARM), + __stringify(GAUDI2_ENGINE_ID_KDMA), + __stringify(GAUDI2_ENGINE_ID_SIZE), +}; + +const char *gaudi2_queue_id_str[] = { + __stringify(GAUDI2_QUEUE_ID_PDMA_0_0), + __stringify(GAUDI2_QUEUE_ID_PDMA_0_1), + __stringify(GAUDI2_QUEUE_ID_PDMA_0_2), + __stringify(GAUDI2_QUEUE_ID_PDMA_0_3), + __stringify(GAUDI2_QUEUE_ID_PDMA_1_0), + __stringify(GAUDI2_QUEUE_ID_PDMA_1_1), + __stringify(GAUDI2_QUEUE_ID_PDMA_1_2), + __stringify(GAUDI2_QUEUE_ID_PDMA_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2), + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2), + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2), + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2), + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3), + __stringify(GAUDI2_QUEUE_ID_NIC_0_0), + __stringify(GAUDI2_QUEUE_ID_NIC_0_1), + __stringify(GAUDI2_QUEUE_ID_NIC_0_2), + __stringify(GAUDI2_QUEUE_ID_NIC_0_3), + __stringify(GAUDI2_QUEUE_ID_NIC_1_0), + __stringify(GAUDI2_QUEUE_ID_NIC_1_1), + __stringify(GAUDI2_QUEUE_ID_NIC_1_2), + __stringify(GAUDI2_QUEUE_ID_NIC_1_3), + __stringify(GAUDI2_QUEUE_ID_NIC_2_0), + __stringify(GAUDI2_QUEUE_ID_NIC_2_1), + __stringify(GAUDI2_QUEUE_ID_NIC_2_2), + __stringify(GAUDI2_QUEUE_ID_NIC_2_3), + __stringify(GAUDI2_QUEUE_ID_NIC_3_0), + __stringify(GAUDI2_QUEUE_ID_NIC_3_1), + __stringify(GAUDI2_QUEUE_ID_NIC_3_2), + __stringify(GAUDI2_QUEUE_ID_NIC_3_3), + __stringify(GAUDI2_QUEUE_ID_NIC_4_0), + __stringify(GAUDI2_QUEUE_ID_NIC_4_1), + __stringify(GAUDI2_QUEUE_ID_NIC_4_2), + __stringify(GAUDI2_QUEUE_ID_NIC_4_3), + __stringify(GAUDI2_QUEUE_ID_NIC_5_0), + __stringify(GAUDI2_QUEUE_ID_NIC_5_1), + __stringify(GAUDI2_QUEUE_ID_NIC_5_2), + __stringify(GAUDI2_QUEUE_ID_NIC_5_3), + __stringify(GAUDI2_QUEUE_ID_NIC_6_0), + __stringify(GAUDI2_QUEUE_ID_NIC_6_1), + __stringify(GAUDI2_QUEUE_ID_NIC_6_2), + __stringify(GAUDI2_QUEUE_ID_NIC_6_3), + __stringify(GAUDI2_QUEUE_ID_NIC_7_0), + __stringify(GAUDI2_QUEUE_ID_NIC_7_1), + __stringify(GAUDI2_QUEUE_ID_NIC_7_2), + __stringify(GAUDI2_QUEUE_ID_NIC_7_3), + __stringify(GAUDI2_QUEUE_ID_NIC_8_0), + __stringify(GAUDI2_QUEUE_ID_NIC_8_1), + __stringify(GAUDI2_QUEUE_ID_NIC_8_2), + __stringify(GAUDI2_QUEUE_ID_NIC_8_3), + __stringify(GAUDI2_QUEUE_ID_NIC_9_0), + __stringify(GAUDI2_QUEUE_ID_NIC_9_1), + __stringify(GAUDI2_QUEUE_ID_NIC_9_2), + __stringify(GAUDI2_QUEUE_ID_NIC_9_3), + __stringify(GAUDI2_QUEUE_ID_NIC_10_0), + __stringify(GAUDI2_QUEUE_ID_NIC_10_1), + __stringify(GAUDI2_QUEUE_ID_NIC_10_2), + __stringify(GAUDI2_QUEUE_ID_NIC_10_3), + __stringify(GAUDI2_QUEUE_ID_NIC_11_0), + __stringify(GAUDI2_QUEUE_ID_NIC_11_1), + __stringify(GAUDI2_QUEUE_ID_NIC_11_2), + __stringify(GAUDI2_QUEUE_ID_NIC_11_3), + __stringify(GAUDI2_QUEUE_ID_NIC_12_0), + __stringify(GAUDI2_QUEUE_ID_NIC_12_1), + __stringify(GAUDI2_QUEUE_ID_NIC_12_2), + __stringify(GAUDI2_QUEUE_ID_NIC_12_3), + __stringify(GAUDI2_QUEUE_ID_NIC_13_0), + __stringify(GAUDI2_QUEUE_ID_NIC_13_1), + __stringify(GAUDI2_QUEUE_ID_NIC_13_2), + __stringify(GAUDI2_QUEUE_ID_NIC_13_3), + __stringify(GAUDI2_QUEUE_ID_NIC_14_0), + __stringify(GAUDI2_QUEUE_ID_NIC_14_1), + __stringify(GAUDI2_QUEUE_ID_NIC_14_2), + __stringify(GAUDI2_QUEUE_ID_NIC_14_3), + __stringify(GAUDI2_QUEUE_ID_NIC_15_0), + __stringify(GAUDI2_QUEUE_ID_NIC_15_1), + __stringify(GAUDI2_QUEUE_ID_NIC_15_2), + __stringify(GAUDI2_QUEUE_ID_NIC_15_3), + __stringify(GAUDI2_QUEUE_ID_NIC_16_0), + __stringify(GAUDI2_QUEUE_ID_NIC_16_1), + __stringify(GAUDI2_QUEUE_ID_NIC_16_2), + __stringify(GAUDI2_QUEUE_ID_NIC_16_3), + __stringify(GAUDI2_QUEUE_ID_NIC_17_0), + __stringify(GAUDI2_QUEUE_ID_NIC_17_1), + __stringify(GAUDI2_QUEUE_ID_NIC_17_2), + __stringify(GAUDI2_QUEUE_ID_NIC_17_3), + __stringify(GAUDI2_QUEUE_ID_NIC_18_0), + __stringify(GAUDI2_QUEUE_ID_NIC_18_1), + __stringify(GAUDI2_QUEUE_ID_NIC_18_2), + __stringify(GAUDI2_QUEUE_ID_NIC_18_3), + __stringify(GAUDI2_QUEUE_ID_NIC_19_0), + __stringify(GAUDI2_QUEUE_ID_NIC_19_1), + __stringify(GAUDI2_QUEUE_ID_NIC_19_2), + __stringify(GAUDI2_QUEUE_ID_NIC_19_3), + __stringify(GAUDI2_QUEUE_ID_NIC_20_0), + __stringify(GAUDI2_QUEUE_ID_NIC_20_1), + __stringify(GAUDI2_QUEUE_ID_NIC_20_2), + __stringify(GAUDI2_QUEUE_ID_NIC_20_3), + __stringify(GAUDI2_QUEUE_ID_NIC_21_0), + __stringify(GAUDI2_QUEUE_ID_NIC_21_1), + __stringify(GAUDI2_QUEUE_ID_NIC_21_2), + __stringify(GAUDI2_QUEUE_ID_NIC_21_3), + __stringify(GAUDI2_QUEUE_ID_NIC_22_0), + __stringify(GAUDI2_QUEUE_ID_NIC_22_1), + __stringify(GAUDI2_QUEUE_ID_NIC_22_2), + __stringify(GAUDI2_QUEUE_ID_NIC_22_3), + __stringify(GAUDI2_QUEUE_ID_NIC_23_0), + __stringify(GAUDI2_QUEUE_ID_NIC_23_1), + __stringify(GAUDI2_QUEUE_ID_NIC_23_2), + __stringify(GAUDI2_QUEUE_ID_NIC_23_3), + __stringify(GAUDI2_QUEUE_ID_ROT_0_0), + __stringify(GAUDI2_QUEUE_ID_ROT_0_1), + __stringify(GAUDI2_QUEUE_ID_ROT_0_2), + __stringify(GAUDI2_QUEUE_ID_ROT_0_3), + __stringify(GAUDI2_QUEUE_ID_ROT_1_0), + __stringify(GAUDI2_QUEUE_ID_ROT_1_1), + __stringify(GAUDI2_QUEUE_ID_ROT_1_2), + __stringify(GAUDI2_QUEUE_ID_ROT_1_3), + __stringify(GAUDI2_QUEUE_ID_CPU_PQ), + __stringify(GAUDI2_QUEUE_ID_SIZE), +}; + static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { "qman sei intr", "arc sei intr" @@ -523,7 +1153,7 @@ static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = "PQC L2H error" }; -static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { +static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = { "RSVD0", "CQ AXI HBW error", "CP AXI HBW error", @@ -613,6 +1243,7 @@ static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAU "invalid_lock_access", "LD_L protection violation", "ST_L protection violation", + "D$ L0CS mismatch", }; static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { @@ -634,14 +1265,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = "sbte_prtn_intr_4", }; -static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { - "i0", - "i1", - "i2", - "i3", - "i4", -}; - static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { "WBC ERR RESP_0", "WBC ERR RESP_1", @@ -711,6 +1334,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { "TLP is blocked by RR" }; +static const int gaudi2_queue_id_to_engine_id[] = { + [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0, + [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1, + [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = + GAUDI2_DCORE0_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = + GAUDI2_DCORE0_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = + GAUDI2_DCORE1_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = + GAUDI2_DCORE2_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = + GAUDI2_DCORE3_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0, + [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1, + [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0, + [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1, + [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0, + [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1, + [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0, + [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1, + [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0, + [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1, + [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0, + [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1, + [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0, + [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1, + [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0, + [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1, + [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0, + [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1, + [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0, + [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1, + [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0, + [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1, + [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0, + [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1, + [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0, + [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1, +}; + const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, @@ -1437,6 +2165,34 @@ static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, }; +static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = { + [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE, + [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE, + [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE, + [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE, + [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE, +}; + const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { [ROTATOR_ID_0] = mmROT0_BASE, [ROTATOR_ID_1] = mmROT1_BASE @@ -1475,6 +2231,56 @@ static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, }; +static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = { + [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0, + [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1, + [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2, + [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3, + [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4, + [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5, + [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0, + [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1, + [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2, + [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3, + [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4, + [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5, + [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0, + [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1, + [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2, + [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3, + [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4, + [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5, + [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0, + [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1, + [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2, + [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3, + [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4, + [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5, + /* the PCI TPC is placed last (mapped liked HW) */ + [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6, +}; + +static const u32 gaudi2_mme_engine_id_to_mme_id[] = { + [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0, + [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1, + [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2, + [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3, +}; + +static const u32 gaudi2_edma_engine_id_to_edma_id[] = { + [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0, + [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1, + [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0, + [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1, + [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2, + [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3, + [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4, + [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5, + [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6, + [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7, + [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA, +}; + const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, @@ -1499,41 +2305,6 @@ static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRI "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" }; -static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = { - RTR_ID_X_Y(2, 4), - RTR_ID_X_Y(3, 4), - RTR_ID_X_Y(4, 4), - RTR_ID_X_Y(5, 4), - RTR_ID_X_Y(6, 4), - RTR_ID_X_Y(7, 4), - RTR_ID_X_Y(8, 4), - RTR_ID_X_Y(9, 4), - RTR_ID_X_Y(10, 4), - RTR_ID_X_Y(11, 4), - RTR_ID_X_Y(12, 4), - RTR_ID_X_Y(13, 4), - RTR_ID_X_Y(14, 4), - RTR_ID_X_Y(15, 4), - RTR_ID_X_Y(16, 4), - RTR_ID_X_Y(17, 4), - RTR_ID_X_Y(2, 11), - RTR_ID_X_Y(3, 11), - RTR_ID_X_Y(4, 11), - RTR_ID_X_Y(5, 11), - RTR_ID_X_Y(6, 11), - RTR_ID_X_Y(7, 11), - RTR_ID_X_Y(8, 11), - RTR_ID_X_Y(9, 11), - RTR_ID_X_Y(0, 0),/* 24 no id */ - RTR_ID_X_Y(0, 0),/* 25 no id */ - RTR_ID_X_Y(0, 0),/* 26 no id */ - RTR_ID_X_Y(0, 0),/* 27 no id */ - RTR_ID_X_Y(14, 11), - RTR_ID_X_Y(15, 11), - RTR_ID_X_Y(16, 11), - RTR_ID_X_Y(17, 11) -}; - enum rtr_id { DCORE0_RTR0, DCORE0_RTR1, @@ -1676,7 +2447,8 @@ enum razwi_event_sources { RAZWI_PDMA, RAZWI_NIC, RAZWI_DEC, - RAZWI_ROT + RAZWI_ROT, + RAZWI_ARC_FARM }; struct hbm_mc_error_causes { @@ -1784,7 +2556,14 @@ static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, bool is_memset); +static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e); +static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e); +static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e); static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); +static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr); static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) { @@ -1879,11 +2658,26 @@ static int set_number_of_functional_hbms(struct hl_device *hdev) return 0; } +static bool gaudi2_is_edma_queue_id(u32 queue_id) +{ + + switch (queue_id) { + case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: + case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: + return true; + default: + return false; + } +} + static int gaudi2_set_dram_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 basic_hbm_page_size; - int rc; + u64 hbm_drv_base_offset = 0, edma_pq_base_addr; + u32 basic_hbm_page_size, edma_idx = 0; + int rc, i; rc = set_number_of_functional_hbms(hdev); if (rc) @@ -1927,9 +2721,35 @@ static int gaudi2_set_dram_properties(struct hl_device *hdev) prop->dmmu.start_addr = prop->dram_base_address + (prop->dram_page_size * DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); - prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); + /* + * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block + * the driver part by range register, so it must start at the next (48MB) page + */ + hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M); + + /* + * The NIC driver section size and the HMMU page tables section in the HBM needs + * to be the remaining size in the first dram page after taking into + * account the F/W image size + */ + + /* Reserve region in HBM for HMMU page tables */ + prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset + + ((prop->dram_page_size - hbm_drv_base_offset) - + (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE)); + + /* Set EDMA PQs HBM addresses */ + edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE; + + for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { + if (gaudi2_is_edma_queue_id(i)) { + prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr + + (edma_idx * HL_QUEUE_SIZE_IN_BYTES); + edma_idx++; + } + } return 0; } @@ -1939,7 +2759,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct hw_queue_properties *q_props; u32 num_sync_stream_queues = 0; - int i; + int i, rc; prop->max_queues = GAUDI2_QUEUE_ID_SIZE; prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), @@ -1962,6 +2782,9 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) } q_props[i].cb_alloc_flags = CB_ALLOC_USER; + + if (gaudi2_is_edma_queue_id(i)) + q_props[i].dram_bd = 1; } q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; @@ -1988,48 +2811,45 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->hints_range_reservation = true; - if (hdev->pldm) - prop->mmu_pgt_size = 0x800000; /* 8MB */ - else - prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; + prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1; + + prop->max_asid = 2; + prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; - prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; - prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; prop->dmmu.page_size = PAGE_SIZE_1GB; - prop->dmmu.num_hops = MMU_ARCH_6_HOPS; + prop->dmmu.num_hops = MMU_ARCH_4_HOPS; prop->dmmu.last_mask = LAST_MASK; - prop->dmmu.host_resident = 1; - /* TODO: will be duplicated until implementing per-MMU props */ - prop->dmmu.hop_table_size = prop->mmu_hop_table_size; - prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->dmmu.host_resident = 0; + prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid; - /* - * this is done in order to be able to validate FW descriptor (i.e. validating that - * the addresses and allocated space for FW image does not cross memory bounds). - * for this reason we set the DRAM size to the minimum possible and later it will - * be modified according to what reported in the cpucp info packet + /* As we need to set the pgt address in dram for HMMU init so we cannot + * wait to the fw cpucp info to set the dram props as mmu init comes before + * hw init */ - prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; + rc = hdev->asic_funcs->set_dram_properties(hdev); + if (rc) + goto free_qprops; + + prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE; + prop->pmmu.pgt_size = prop->mmu_pgt_size; hdev->pmmu_huge_range = true; prop->pmmu.host_resident = 1; prop->pmmu.num_hops = MMU_ARCH_6_HOPS; prop->pmmu.last_mask = LAST_MASK; - /* TODO: will be duplicated until implementing per-MMU props */ - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid; prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; @@ -2084,11 +2904,13 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; } + prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE; prop->num_engine_cores = CPU_ID_MAX; prop->cfg_size = CFG_SIZE; - prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI2_EVENT_SIZE; + prop->supports_engine_modes = true; + prop->dc_power_default = DC_POWER_DEFAULT; prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; @@ -2096,7 +2918,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); + strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->mme_master_slave_mode = 1; @@ -2107,6 +2929,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) (num_sync_stream_queues * HL_RSVD_MONS); prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; + prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT; + prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE; prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; @@ -2125,7 +2949,13 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; + prop->supports_advanced_cpucp_rc = true; + return 0; + +free_qprops: + kfree(prop->hw_queues_props); + return rc; } static int gaudi2_pci_bars_map(struct hl_device *hdev) @@ -2547,7 +3377,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev) } if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); + strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); /* Overwrite binning masks with the actual binning values from F/W */ hdev->dram_binning = prop->cpucp_info.dram_binning_mask; @@ -2555,6 +3386,10 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev) hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); + dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n", + hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning, + hdev->decoder_binning); + /* * at this point the DRAM parameters need to be updated according to data obtained * from the FW @@ -2594,6 +3429,25 @@ static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) return 0; } +static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) + return 0; + + if (prop->dmmu.host_resident) + return 0; + + rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0); + if (rc) + dev_err(hdev->dev, "Failed to clear mmu pgt"); + + return rc; +} + static int gaudi2_early_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -2650,7 +3504,18 @@ static int gaudi2_early_init(struct hl_device *hdev) if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); + goto pci_fini; + } + + rc = hl_fw_read_preboot_status(hdev); + if (rc) { + if (hdev->reset_on_preboot_fail) + hdev->asic_funcs->hw_fini(hdev, true, false); + goto pci_fini; + } } return 0; @@ -2692,6 +3557,7 @@ static bool gaudi2_is_arc_tpc_owned(u64 arc_id) static void gaudi2_init_arcs(struct hl_device *hdev) { + struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; struct gaudi2_device *gaudi2 = hdev->asic_specific; u64 arc_id; u32 i; @@ -2721,6 +3587,10 @@ static void gaudi2_init_arcs(struct hl_device *hdev) gaudi2_set_arc_id_cap(hdev, arc_id); } + + /* Fetch ARC scratchpad address */ + hdev->asic_prop.engine_core_interrupt_reg_addr = + CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl); } static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) @@ -2772,16 +3642,21 @@ static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) return 0; } -static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev) +static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev) { u16 arc_id; + int rc; for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { if (!gaudi2_is_arc_enabled(hdev, arc_id)) continue; - gaudi2_scrub_arc_dccm(hdev, arc_id); + rc = gaudi2_scrub_arc_dccm(hdev, arc_id); + if (rc) + return rc; } + + return 0; } static int gaudi2_late_init(struct hl_device *hdev) @@ -2789,14 +3664,10 @@ static int gaudi2_late_init(struct hl_device *hdev) struct gaudi2_device *gaudi2 = hdev->asic_specific; int rc; - hdev->asic_prop.supports_advanced_cpucp_rc = true; - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, gaudi2->virt_msix_db_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + if (rc) return rc; - } rc = gaudi2_fetch_psoc_frequency(hdev); if (rc) { @@ -2804,8 +3675,20 @@ static int gaudi2_late_init(struct hl_device *hdev) goto disable_pci_access; } + rc = gaudi2_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); + goto disable_pci_access; + } + gaudi2_init_arcs(hdev); - gaudi2_scrub_arcs_dccm(hdev); + + rc = gaudi2_scrub_arcs_dccm(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); + goto disable_pci_access; + } + gaudi2_init_security(hdev); return 0; @@ -2989,6 +3872,13 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int i, j, k; + /* Initialize TPC interrupt */ + HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC); + + /* Initialize unexpected error interrupt */ + HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0, + HL_USR_INTERRUPT_UNEXPECTED); + /* Initialize common user CQ interrupt */ HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ); @@ -3051,7 +3941,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) int i, rc; /* Configure Special blocks */ - prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; + prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM; prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, sizeof(*prop->special_blocks), GFP_KERNEL); @@ -3115,6 +4005,48 @@ static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev) return gaudi2_special_blocks_config(hdev); } +static void gaudi2_test_queues_msgs_free(struct hl_device *hdev) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info; + int i; + + for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) { + /* bail-out if this is an allocation failure point */ + if (!msg_info[i].kern_addr) + break; + + hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr); + msg_info[i].kern_addr = NULL; + } +} + +static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info; + int i, rc; + + /* allocate a message-short buf for each Q we intend to test */ + for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) { + msg_info[i].kern_addr = + (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short), + GFP_KERNEL, &msg_info[i].dma_addr); + if (!msg_info[i].kern_addr) { + dev_err(hdev->dev, + "Failed to allocate dma memory for H/W queue %d testing\n", i); + rc = -ENOMEM; + goto err_exit; + } + } + + return 0; + +err_exit: + gaudi2_test_queues_msgs_free(hdev); + return rc; +} + static int gaudi2_sw_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -3188,13 +4120,7 @@ static int gaudi2_sw_init(struct hl_device *hdev) spin_lock_init(&gaudi2->hw_queues_lock); - gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, - &gaudi2->scratchpad_bus_address, - GFP_KERNEL | __GFP_ZERO); - if (!gaudi2->scratchpad_kernel_address) { - rc = -ENOMEM; - goto free_virt_msix_db_mem; - } + gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE; gaudi2_user_mapped_blocks_init(hdev); @@ -3208,17 +4134,28 @@ static int gaudi2_sw_init(struct hl_device *hdev) prop->supports_compute_reset = true; + /* Event queue sanity check added in FW version 1.11 */ + if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0) + hdev->event_queue.check_eqe_index = false; + else + hdev->event_queue.check_eqe_index = true; + hdev->asic_funcs->set_pci_memory_regions(hdev); rc = gaudi2_special_blocks_iterator_config(hdev); if (rc) - goto free_scratchpad_mem; + goto free_virt_msix_db_mem; + + rc = gaudi2_test_queues_msgs_alloc(hdev); + if (rc) + goto special_blocks_free; + + hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ; return 0; -free_scratchpad_mem: - hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address, - gaudi2->scratchpad_bus_address); +special_blocks_free: + gaudi2_special_blocks_iterator_free(hdev); free_virt_msix_db_mem: hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); free_cpu_accessible_dma_pool: @@ -3238,6 +4175,8 @@ static int gaudi2_sw_fini(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; + gaudi2_test_queues_msgs_free(hdev); + gaudi2_special_blocks_iterator_free(hdev); hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); @@ -3247,9 +4186,6 @@ static int gaudi2_sw_fini(struct hl_device *hdev) hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); - hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, - gaudi2->scratchpad_bus_address); - dma_pool_destroy(hdev->dma_pool); kfree(gaudi2); @@ -3646,8 +4582,14 @@ static const char *gaudi2_irq_name(u16 irq_number) return "gaudi2 completion"; case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; + case GAUDI2_IRQ_NUM_TPC_ASSERT: + return "gaudi2 tpc assert"; + case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR: + return "gaudi2 unexpected error"; case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: return "gaudi2 user completion"; + case GAUDI2_IRQ_NUM_EQ_ERROR: + return "gaudi2 eq error"; default: return "invalid"; } @@ -3677,7 +4619,6 @@ static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) static int gaudi2_dec_enable_msix(struct hl_device *hdev) { int rc, i, irq_init_cnt, irq, relative_idx; - irq_handler_t irq_handler; struct hl_dec *dec; for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; @@ -3687,20 +4628,22 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev) irq = pci_irq_vector(hdev->pdev, i); relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; - irq_handler = (relative_idx % 2) ? - hl_irq_handler_dec_abnrm : - hl_irq_handler_user_interrupt; - - dec = hdev->dec + relative_idx / 2; - /* We pass different structures depending on the irq handler. For the abnormal * interrupt we pass hl_dec and for the regular interrupt we pass the relevant * user_interrupt entry + * + * TODO: change the dec abnrm to threaded irq */ - rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), - ((relative_idx % 2) ? - (void *) dec : - (void *) &hdev->user_interrupt[dec->core_id])); + + dec = hdev->dec + relative_idx / 2; + if (relative_idx % 2) { + rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0, + gaudi2_irq_name(i), (void *) dec); + } else { + rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i), + (void *) &hdev->user_interrupt[dec->core_id]); + } + if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); goto free_dec_irqs; @@ -3719,12 +4662,13 @@ static int gaudi2_enable_msix(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; int rc, irq, i, j, user_irq_init_cnt; - irq_handler_t irq_handler; struct hl_cq *cq; if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) return 0; + hl_init_cpu_for_irq(hdev); + rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, PCI_IRQ_MSIX); if (rc < 0) { @@ -3755,20 +4699,47 @@ static int gaudi2_enable_msix(struct hl_device *hdev) goto free_event_irq; } + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); + rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, + gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), + &hdev->tpc_interrupt); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_dec_irq; + } + + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); + rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, + gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR), + &hdev->unexpected_error_interrupt); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_tpc_irq; + } + for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; user_irq_init_cnt < prop->user_interrupt_count; i++, j++, user_irq_init_cnt++) { irq = pci_irq_vector(hdev->pdev, i); - irq_handler = hl_irq_handler_user_interrupt; - - rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]); + hl_set_irq_affinity(hdev, irq); + rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i), + &hdev->user_interrupt[j]); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); goto free_user_irq; } } + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler, + IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR), + hdev); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_user_irq; + } + gaudi2->hw_cap_initialized |= HW_CAP_MSIX; return 0; @@ -3778,11 +4749,16 @@ free_user_irq: i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { irq = pci_irq_vector(hdev->pdev, i); + irq_set_affinity_and_hint(irq, NULL); free_irq(irq, &hdev->user_interrupt[j]); } - - gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); - + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); + free_irq(irq, &hdev->unexpected_error_interrupt); +free_tpc_irq: + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); + free_irq(irq, &hdev->tpc_interrupt); +free_dec_irq: + gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1); free_event_irq: irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); free_irq(irq, cq); @@ -3814,6 +4790,9 @@ static void gaudi2_sync_irqs(struct hl_device *hdev) synchronize_irq(irq); } + synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT)); + synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR)); + for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; i++, j++) { irq = pci_irq_vector(hdev->pdev, i); @@ -3821,6 +4800,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev) } synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); + synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR)); } static void gaudi2_disable_msix(struct hl_device *hdev) @@ -3840,10 +4820,17 @@ static void gaudi2_disable_msix(struct hl_device *hdev) gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); + free_irq(irq, &hdev->tpc_interrupt); + + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); + free_irq(irq, &hdev->unexpected_error_interrupt); + for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { irq = pci_irq_vector(hdev->pdev, i); + irq_set_affinity_and_hint(irq, NULL); free_irq(irq, &hdev->user_interrupt[j]); } @@ -3851,6 +4838,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev) cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; free_irq(irq, cq); + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + free_irq(irq, hdev); + pci_free_irq_vectors(hdev->pdev); gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; @@ -4037,7 +5027,6 @@ static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, { int i, rc; - for (i = 0 ; i < num_cores ; i++) { if (gaudi2_is_arc_enabled(hdev, core_ids[i])) gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); @@ -4059,6 +5048,139 @@ static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, return 0; } +static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u32 reg_base, reg_addr, reg_val, tpc_id; + + if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) + return 0; + + tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id]; + if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id))) + return 0; + + reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id]; + reg_addr = reg_base + TPC_CFG_STALL_OFFSET; + reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK, + (engine_command == HL_ENGINE_STALL) ? 1 : 0); + WREG32(reg_addr, reg_val); + + if (engine_command == HL_ENGINE_RESUME) { + reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id]; + reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET; + RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK); + } + + return 0; +} + +static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u32 reg_base, reg_addr, reg_val, mme_id; + + mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id]; + if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id))) + return 0; + + reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id]; + reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET; + reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK, + (engine_command == HL_ENGINE_STALL) ? 1 : 0); + WREG32(reg_addr, reg_val); + + return 0; +} + +static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u32 reg_base, reg_addr, reg_val, edma_id; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) + return 0; + + edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id]; + if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id))) + return 0; + + reg_base = gaudi2_dma_core_blocks_bases[edma_id]; + reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET; + reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, + (engine_command == HL_ENGINE_STALL) ? 1 : 0); + WREG32(reg_addr, reg_val); + + if (engine_command == HL_ENGINE_STALL) { + reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) | + FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1); + WREG32(reg_addr, reg_val); + } + + return 0; +} + +static int gaudi2_set_engine_modes(struct hl_device *hdev, + u32 *engine_ids, u32 num_engines, u32 engine_command) +{ + int i, rc; + + for (i = 0 ; i < num_engines ; ++i) { + switch (engine_ids[i]) { + case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5: + case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5: + case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5: + case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5: + rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command); + if (rc) + return rc; + + break; + case GAUDI2_DCORE0_ENGINE_ID_MME: + case GAUDI2_DCORE1_ENGINE_ID_MME: + case GAUDI2_DCORE2_ENGINE_ID_MME: + case GAUDI2_DCORE3_ENGINE_ID_MME: + rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command); + if (rc) + return rc; + + break; + case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1: + case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1: + case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1: + case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1: + rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command); + if (rc) + return rc; + + break; + default: + dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]); + return -EINVAL; + } + } + + return 0; +} + +static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids, + u32 num_engines, u32 engine_command) +{ + switch (engine_command) { + case HL_ENGINE_CORE_HALT: + case HL_ENGINE_CORE_RUN: + return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command); + + case HL_ENGINE_STALL: + case HL_ENGINE_RESUME: + return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command); + + default: + dev_err(hdev->dev, "failed to execute command id %u\n", engine_command); + return -EINVAL; + } +} + static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) { u32 wait_timeout_ms; @@ -4068,7 +5190,7 @@ static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw else wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; - if (fw_reset) + if (fw_reset || hdev->cpld_shutdown) goto skip_engines; gaudi2_stop_dma_qmans(hdev); @@ -4121,6 +5243,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; + pre_fw_load->wait_for_preboot_extended_timeout = + GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC; } static void gaudi2_init_firmware_loader(struct hl_device *hdev) @@ -4251,10 +5375,17 @@ static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, q = &hdev->kernel_queues[queue_id_base + pq_id]; pq_offset = pq_id * 4; - WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, - lower_32_bits(q->bus_address)); - WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, - upper_32_bits(q->bus_address)); + if (q->dram_bd) { + WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, + lower_32_bits(q->pq_dram_address)); + WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, + upper_32_bits(q->pq_dram_address)); + } else { + WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, + lower_32_bits(q->bus_address)); + WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, + upper_32_bits(q->bus_address)); + } WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); @@ -5141,7 +6272,8 @@ static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_har return rc; } -static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) +static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base, + bool host_resident_pgt) { struct asic_fixed_properties *prop = &hdev->asic_prop; u64 hop0_addr; @@ -5153,7 +6285,11 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) max_asid = min((u32) 8, max_asid); for (asid = 0 ; asid < max_asid ; asid++) { - hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; + if (host_resident_pgt) + hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; + else + hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size); + rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); if (rc) { dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); @@ -5164,7 +6300,8 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) return 0; } -static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) +static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base, + bool host_resident_pgt) { u32 status, timeout_usec; int rc; @@ -5187,7 +6324,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb if (rc) dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); - rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); + rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt); if (rc) return rc; @@ -5211,6 +6348,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb static int gaudi2_pci_mmu_init(struct hl_device *hdev) { + struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; u32 mmu_base, stlb_base; int rc; @@ -5250,7 +6388,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); - rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident); if (rc) return rc; @@ -5302,7 +6440,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); - rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident); if (rc) return rc; @@ -5509,11 +6647,10 @@ static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) * gaudi2_execute_hard_reset - execute hard reset by driver/FW * * @hdev: pointer to the habanalabs device structure - * @reset_sleep_ms: sleep time in msec after reset * * This function executes hard reset based on if driver/FW should do the reset */ -static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms) +static void gaudi2_execute_hard_reset(struct hl_device *hdev) { if (hdev->asic_prop.hard_reset_done_by_fw) { gaudi2_send_hard_reset_cmd(hdev); @@ -5535,27 +6672,16 @@ static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms * gaudi2_execute_soft_reset - execute soft reset by driver/FW * * @hdev: pointer to the habanalabs device structure - * @reset_sleep_ms: sleep time in msec after reset * @driver_performs_reset: true if driver should perform reset instead of f/w. + * @poll_timeout_us: time to wait for response from f/w. * * This function executes soft reset based on if driver/FW should do the reset */ -static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms, - bool driver_performs_reset) +static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset, + u32 poll_timeout_us) { - struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; - - if (!driver_performs_reset) { - /* set SP to indicate reset request sent to FW */ - if (dyn_regs->cpu_rst_status) - WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); - else - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); - - WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), - gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); - return; - } + if (!driver_performs_reset) + return hl_fw_send_soft_reset(hdev); /* Block access to engines, QMANs and SM during reset, these * RRs will be reconfigured after soft reset. @@ -5569,17 +6695,14 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); + return 0; } -static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms, - u32 poll_timeout_us) +static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us) { int i, rc = 0; u32 reg_val; - /* without this sleep reset will not work */ - msleep(reset_sleep_ms); - /* We poll the BTM done indication multiple times after reset due to * a HW errata 'GAUDI2_0300' */ @@ -5596,30 +6719,12 @@ static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_m dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); } -static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) -{ - int i, rc = 0; - u32 reg_val; - - for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) - rc = hl_poll_timeout( - hdev, - mmCPU_RST_STATUS_TO_HOST, - reg_val, - reg_val == CPU_RST_STATUS_SOFT_RST_DONE, - 1000, - poll_timeout_us); - - if (rc) - dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", - reg_val); -} - -static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) { struct gaudi2_device *gaudi2 = hdev->asic_specific; u32 poll_timeout_us, reset_sleep_ms; bool driver_performs_reset = false; + int rc; if (hdev->pldm) { reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : @@ -5637,7 +6742,7 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_rese if (hard_reset) { driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; - gaudi2_execute_hard_reset(hdev, reset_sleep_ms); + gaudi2_execute_hard_reset(hdev); } else { /* * As we have to support also work with preboot only (which does not supports @@ -5647,11 +6752,13 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_rese */ driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && !hdev->asic_prop.fw_security_enabled); - gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset); + rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us); + if (rc) + return rc; } skip_reset: - if (driver_performs_reset || hard_reset) + if (driver_performs_reset || hard_reset) { /* * Instead of waiting for BTM indication we should wait for preboot ready: * Consider the below scenario: @@ -5671,17 +6778,18 @@ skip_reset: * communicate with FW that is during reset. * to overcome this we will always wait to preboot ready indication */ - if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { - msleep(reset_sleep_ms); + + /* without this sleep reset will not work */ + msleep(reset_sleep_ms); + + if (hdev->fw_components & FW_TYPE_PREBOOT_CPU) hl_fw_wait_preboot_ready(hdev); - } else { - gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); - } - else - gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); + else + gaudi2_poll_btm_indication(hdev, poll_timeout_us); + } if (!gaudi2) - return; + return 0; gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); @@ -5708,17 +6816,12 @@ skip_reset: HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | HW_CAP_ROT_MASK); } + return 0; } static int gaudi2_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } static int gaudi2_resume(struct hl_device *hdev) @@ -5731,10 +6834,17 @@ static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, { int rc; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | - VM_DONTCOPY | VM_NORESERVE; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); #ifdef _HAS_DMA_MMAP_COHERENT + /* + * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP + * so vm_insert_page() can handle it safely. Without this, the kernel + * may BUG_ON due to VM_PFNMAP. + */ + if (is_vmalloc_addr(cpu_addr)) + vm_flags_set(vma, VM_MIXEDMAP); rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); if (rc) @@ -6019,31 +7129,14 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } -static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, - enum dma_data_direction dir) -{ - dma_addr_t dma_addr; - - dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); - if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) - return 0; - - return dma_addr; -} - -static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, - enum dma_data_direction dir) -{ - dma_unmap_single(&hdev->pdev->dev, addr, len, dir); -} - static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) { struct asic_fixed_properties *asic_prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { - dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); + dev_err(hdev->dev, "h/w queue %s is disabled\n", + GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id)); return -EINVAL; } @@ -6259,28 +7352,29 @@ static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, b } } -static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) +static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id) +{ + return hdev->asic_prop.first_available_user_sob[0] + + hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0; +} + +static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id) { - u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; + u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; - u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; - struct packet_msg_short *msg_short_pkt; - dma_addr_t pkt_dma_addr; - size_t pkt_size; - int rc; - if (hdev->pldm) - timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; - else - timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; + /* Reset the SOB value */ + WREG32(sob_addr, 0); +} - pkt_size = sizeof(*msg_short_pkt); - msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); - if (!msg_short_pkt) { - dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", - hw_queue_id); - return -ENOMEM; - } +static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val, + struct gaudi2_queues_test_info *msg_info) +{ + u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; + u32 tmp, sob_base = 1; + struct packet_msg_short *msg_short_pkt = msg_info->kern_addr; + size_t pkt_size = sizeof(struct packet_msg_short); + int rc; tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | (1 << GAUDI2_PKT_CTL_EB_SHIFT) | @@ -6291,15 +7385,26 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) msg_short_pkt->value = cpu_to_le32(sob_val); msg_short_pkt->ctl = cpu_to_le32(tmp); - /* Reset the SOB value */ - WREG32(sob_addr, 0); + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr); + if (rc) + dev_err(hdev->dev, + "Failed to send msg_short packet to H/W queue %s\n", + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id)); - rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", - hw_queue_id); - goto free_pkt; - } + return rc; +} + +static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val) +{ + u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; + u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; + u32 timeout_usec, tmp; + int rc; + + if (hdev->pldm) + timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; + else + timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; rc = hl_poll_timeout( hdev, @@ -6310,16 +7415,11 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) timeout_usec); if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", - hw_queue_id, tmp); + dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n", + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp); rc = -EIO; } - /* Reset the SOB value */ - WREG32(sob_addr, 0); - -free_pkt: - hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); return rc; } @@ -6339,42 +7439,60 @@ static int gaudi2_test_cpu_queue(struct hl_device *hdev) static int gaudi2_test_queues(struct hl_device *hdev) { - int i, rc, ret_val = 0; + struct gaudi2_device *gaudi2 = hdev->asic_specific; + struct gaudi2_queues_test_info *msg_info; + u32 sob_val = 0x5a5a; + int i, rc; + /* send test message on all enabled Qs */ for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { - if (!gaudi2_is_queue_enabled(hdev, i)) + if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i)) continue; + msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0]; gaudi2_qman_set_test_mode(hdev, i, true); - rc = gaudi2_test_queue(hdev, i); - gaudi2_qman_set_test_mode(hdev, i, false); - - if (rc) { - ret_val = -EINVAL; + gaudi2_test_queue_clear(hdev, i); + rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info); + if (rc) goto done; - } } rc = gaudi2_test_cpu_queue(hdev); - if (rc) { - ret_val = -EINVAL; + if (rc) goto done; + + /* verify that all messages were processed */ + for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { + if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i)) + continue; + + rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val); + if (rc) + /* chip is not usable, no need for cleanups, just bail-out with error */ + goto done; + + gaudi2_test_queue_clear(hdev, i); + gaudi2_qman_set_test_mode(hdev, i, false); } done: - return ret_val; + return rc; } static int gaudi2_compute_reset_late_init(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; size_t irq_arr_size; + int rc; - /* TODO: missing gaudi2_nic_resume. - * Until implemented nic_hw_cap_initialized will remain zeroed - */ gaudi2_init_arcs(hdev); - gaudi2_scrub_arcs_dccm(hdev); + + rc = gaudi2_scrub_arcs_dccm(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); + return rc; + } + gaudi2_init_security(hdev); /* Unmask all IRQs since some could have been received during the soft reset */ @@ -6382,74 +7500,21 @@ static int gaudi2_compute_reset_late_init(struct hl_device *hdev) return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); } -static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, - struct iterate_module_ctx *ctx) +static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { - struct gaudi2_tpc_idle_data *idle_data = ctx->data; - u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; - bool is_eng_idle; - int engine_idx; - - if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) - engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; - else - engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + - dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; - - tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); - qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); - qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); - qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); - - is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && - IS_TPC_IDLE(tpc_cfg_sts); - *(idle_data->is_idle) &= is_eng_idle; - - if (idle_data->mask && !is_eng_idle) - set_bit(engine_idx, idle_data->mask); - - if (idle_data->e) - hl_engine_data_sprintf(idle_data->e, - idle_data->tpc_fmt, dcore, inst, - is_eng_idle ? "Y" : "N", - qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); -} - -static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, - struct engines_data *e) -{ - u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, - mme_arch_sts, dec_swreg15, dec_enabled_bit; + u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; struct asic_fixed_properties *prop = &hdev->asic_prop; - const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n"; unsigned long *mask = (unsigned long *) mask_arr; - const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n"; - const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; - const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; - const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n"; - const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; - const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; + const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n"; bool is_idle = true, is_eng_idle; - u64 offset; - - struct gaudi2_tpc_idle_data tpc_idle_data = { - .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", - .e = e, - .mask = mask, - .is_idle = &is_idle, - }; - struct iterate_module_ctx tpc_iter = { - .fn = &gaudi2_is_tpc_engine_idle, - .data = &tpc_idle_data, - }; - int engine_idx, i, j; + u64 offset; - /* EDMA, Two engines per Dcore */ if (e) hl_engine_data_sprintf(e, - "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" - "---- ---- ------- ------------ ----------------------\n"); + "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" + "---- ---- ------- ------------ ------------- -------------\n"); for (i = 0; i < NUM_OF_DCORES; i++) { for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { @@ -6462,45 +7527,56 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; - dma_core_idle_ind_mask = - RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset); + dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset); + dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset); qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && - IS_DMA_IDLE(dma_core_idle_ind_mask); + IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); is_idle &= is_eng_idle; if (mask && !is_eng_idle) set_bit(engine_idx, mask); if (e) - hl_engine_data_sprintf(e, edma_fmt, i, j, - is_eng_idle ? "Y" : "N", - qm_glbl_sts0, - dma_core_idle_ind_mask); + hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, dma_core_sts0, dma_core_sts1); } } - /* PDMA, Two engines in Full chip */ + return is_idle; +} + +static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; + unsigned long *mask = (unsigned long *) mask_arr; + const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n"; + bool is_idle = true, is_eng_idle; + int engine_idx, i; + u64 offset; + if (e) hl_engine_data_sprintf(e, - "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" - "---- ------- ------------ ----------------------\n"); + "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" + "---- ------- ------------ ------------- -------------\n"); for (i = 0 ; i < NUM_OF_PDMA ; i++) { engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; offset = i * PDMA_OFFSET; - dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset); + dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset); + dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset); qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && - IS_DMA_IDLE(dma_core_idle_ind_mask); + IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); is_idle &= is_eng_idle; if (mask && !is_eng_idle) @@ -6508,9 +7584,22 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask if (e) hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", - qm_glbl_sts0, dma_core_idle_ind_mask); + qm_glbl_sts0, dma_core_sts0, dma_core_sts1); } + return is_idle; +} + +static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + unsigned long *mask = (unsigned long *) mask_arr; + const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; + u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; + bool is_idle = true, is_eng_idle; + int engine_idx, i; + u64 offset = 0; + /* NIC, twelve macros in Full chip */ if (e && hdev->nic_ports_mask) hl_engine_data_sprintf(e, @@ -6544,6 +7633,19 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask qm_glbl_sts0, qm_cgm_sts); } + return is_idle; +} + +static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts; + unsigned long *mask = (unsigned long *) mask_arr; + const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; + bool is_idle = true, is_eng_idle; + int engine_idx, i; + u64 offset; + if (e) hl_engine_data_sprintf(e, "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" @@ -6574,16 +7676,82 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask set_bit(engine_idx, mask); } - /* - * TPC - */ + return is_idle; +} + +static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, + struct iterate_module_ctx *ctx) +{ + struct gaudi2_tpc_idle_data *idle_data = ctx->data; + u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; + bool is_eng_idle; + int engine_idx; + + if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) + engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; + else + engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + + dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; + + tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); + qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); + qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); + qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); + + is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && + IS_TPC_IDLE(tpc_cfg_sts); + *(idle_data->is_idle) &= is_eng_idle; + + if (idle_data->mask && !is_eng_idle) + set_bit(engine_idx, idle_data->mask); + + if (idle_data->e) + hl_engine_data_sprintf(idle_data->e, + idle_data->tpc_fmt, dcore, inst, + is_eng_idle ? "Y" : "N", + qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); +} + +static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + unsigned long *mask = (unsigned long *) mask_arr; + bool is_idle = true; + + struct gaudi2_tpc_idle_data tpc_idle_data = { + .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", + .e = e, + .mask = mask, + .is_idle = &is_idle, + }; + struct iterate_module_ctx tpc_iter = { + .fn = &gaudi2_is_tpc_engine_idle, + .data = &tpc_idle_data, + }; + if (e && prop->tpc_enabled_mask) hl_engine_data_sprintf(e, - "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" - "---- --- -------- ------------ ---------- ----------------------\n"); + "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n" + "---- --- ------- ------------ ---------- ------\n"); gaudi2_iterate_tpcs(hdev, &tpc_iter); + return *tpc_idle_data.is_idle; +} + +static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + unsigned long *mask = (unsigned long *) mask_arr; + const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; + const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; + bool is_idle = true, is_eng_idle; + u32 dec_swreg15, dec_enabled_bit; + int engine_idx, i, j; + u64 offset; + /* Decoders, two each Dcore and two shared PCIe decoders */ if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) hl_engine_data_sprintf(e, @@ -6638,10 +7806,23 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask is_eng_idle ? "Y" : "N", dec_swreg15); } + return is_idle; +} + +static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n"; + unsigned long *mask = (unsigned long *) mask_arr; + u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; + bool is_idle = true, is_eng_idle; + int engine_idx, i; + u64 offset; + if (e) hl_engine_data_sprintf(e, - "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" - "---- ---- ------- ------------ ---------- -------------\n"); + "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n" + "---- --- ------- ------------ ------------ ----------\n"); for (i = 0 ; i < NUM_OF_ROT ; i++) { engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; @@ -6660,12 +7841,28 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask if (e) hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", - qm_glbl_sts0, qm_cgm_sts, "-"); + qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); } return is_idle; } +static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) +{ + bool is_idle = true; + + is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e); + is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e); + + return is_idle; +} + static void gaudi2_hw_queues_lock(struct hl_device *hdev) __acquires(&gaudi2->hw_queues_lock) { @@ -6996,11 +8193,13 @@ static inline bool is_info_event(u32 event) switch (event) { case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: + case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3: /* return in case of NIC status event - these events are received periodically and not as * an indication to an error. */ case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: return true; default: return false; @@ -7032,155 +8231,76 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data) { - u64 ecc_address = 0, ecc_syndrom = 0; + u64 ecc_address = 0, ecc_syndrome = 0; u8 memory_wrapper_idx = 0; + bool has_block_id = false; + u16 block_id; + + if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0) + has_block_id = true; ecc_address = le64_to_cpu(ecc_data->ecc_address); - ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); + ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom); memory_wrapper_idx = ecc_data->memory_wrapper_idx; - gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, - "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n", - ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); + if (has_block_id) { + block_id = le16_to_cpu(ecc_data->block_id); + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, block_id, + ecc_data->is_critical); + } else { + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical); + } return !!ecc_data->is_critical; } -/* - * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap - * - * @idx: the current pi/ci value - * @q_len: the queue length (power of 2) - * - * @return the cyclically decremented index - */ -static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) -{ - u32 mask = q_len - 1; - - /* - * modular decrement is equivalent to adding (queue_size -1) - * later we take LSBs to make sure the value is in the - * range [0, queue_len - 1] - */ - return (idx + q_len - 1) & mask; -} - -/** - * gaudi2_print_sw_config_stream_data - print SW config stream data - * - * @hdev: pointer to the habanalabs device structure - * @stream: the QMAN's stream - * @qman_base: base address of QMAN registers block - */ -static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, - u32 stream, u64 qman_base) +static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id) { - u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; - u32 cq_ptr_lo_off, size; - - cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; - - cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + - stream * cq_ptr_lo_off; - - cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); - - cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); - - cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); - size = RREG32(cq_tsize); - dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", - stream, cq_ptr, size); -} - -/** - * gaudi2_print_last_pqes_on_err - print last PQEs on error - * - * @hdev: pointer to the habanalabs device structure - * @qid_base: first QID of the QMAN (out of 4 streams) - * @stream: the QMAN's stream - * @qman_base: base address of QMAN registers block - * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) - */ -static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, - u64 qman_base, bool pr_sw_conf) -{ - u32 ci, qm_ci_stream_off; - struct hl_hw_queue *q; - u64 pq_ci; - int i; - - q = &hdev->kernel_queues[qid_base + stream]; - - qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; - pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + - stream * qm_ci_stream_off; - - hdev->asic_funcs->hw_queues_lock(hdev); - - if (pr_sw_conf) - gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; + u64 cq_ptr, cp_current_inst; + u32 lo, hi, cq_size, cp_sts; + bool is_arc_cq; - ci = RREG32(pq_ci); + cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET); + is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */ - /* we should start printing form ci -1 */ - ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); - - for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { - struct hl_bd *bd; - u64 addr; - u32 len; - - bd = q->kernel_address; - bd += ci; - - len = le32_to_cpu(bd->len); - /* len 0 means uninitialized entry- break */ - if (!len) - break; - - addr = le64_to_cpu(bd->ptr); - - dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", - stream, ci, addr, len); - - /* get previous ci, wrap if needed */ - ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); + if (is_arc_cq) { + lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET); + hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET); + cq_ptr = ((u64) hi) << 32 | lo; + cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); + } else { + lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET); + hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET); + cq_ptr = ((u64) hi) << 32 | lo; + cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); } - hdev->asic_funcs->hw_queues_unlock(hdev); -} + lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET); + hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET); + cp_current_inst = ((u64) hi) << 32 | lo; -/** - * print_qman_data_on_err - extract QMAN data on error - * - * @hdev: pointer to the habanalabs device structure - * @qid_base: first QID of the QMAN (out of 4 streams) - * @stream: the QMAN's stream - * @qman_base: base address of QMAN registers block - * - * This function attempt to extract as much data as possible on QMAN error. - * On upper CP print the SW config stream data and last 8 PQEs. - * On lower CP print SW config data and last PQEs of ALL 4 upper CPs - */ -static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) -{ - u32 i; + dev_info(hdev->dev, + "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n", + is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst); - if (stream != QMAN_STREAMS) { - gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); - return; + if (undef_opcode->write_enable) { + memset(undef_opcode, 0, sizeof(*undef_opcode)); + undef_opcode->timestamp = ktime_get(); + undef_opcode->cq_addr = cq_ptr; + undef_opcode->cq_size = cq_size; + undef_opcode->engine_id = engine_id; + undef_opcode->stream_id = QMAN_STREAMS; + undef_opcode->write_enable = 0; } - - gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); - - for (i = 0 ; i < QMAN_STREAMS ; i++) - gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); } static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, - u64 qman_base, u32 qid_base) + u64 qman_base, u32 qid_base, u64 *event_mask) { u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; u64 glbl_sts_addr, arb_err_addr; @@ -7197,8 +8317,8 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type continue; if (i == QMAN_STREAMS) { - snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); - num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; + snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM"); + num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE; } else { snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; @@ -7209,12 +8329,18 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type gaudi2_print_event(hdev, event_type, true, "%s. err cause: %s", reg_desc, i == QMAN_STREAMS ? - gaudi2_qman_lower_cp_error_cause[j] : + gaudi2_lower_qman_error_cause[j] : gaudi2_qman_error_cause[j]); error_count++; } - print_qman_data_on_err(hdev, qid_base, i, qman_base); + /* Check for undefined opcode error in lower QM */ + if ((i == QMAN_STREAMS) && + (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) { + handle_lower_qman_data_on_err(hdev, qman_base, + gaudi2_queue_id_to_engine_id[qid_base]); + *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; + } } arb_err_val = RREG32(arb_err_addr); @@ -7328,6 +8454,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, case RAZWI_ROT: return GAUDI2_ENGINE_ID_ROT_0 + module_idx; + case RAZWI_ARC_FARM: + return GAUDI2_ENGINE_ID_ARC_FARM; + default: return GAUDI2_ENGINE_ID_SIZE; } @@ -7342,7 +8471,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, u8 module_sub_idx, u64 *event_mask) { bool via_sft = false; - u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id; + u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx; u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr; u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; @@ -7350,17 +8479,15 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, switch (module) { case RAZWI_TPC: - hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; - - /* TODO : remove this check and depend only on tpc routers table - * when SW-118828 is resolved - */ - if (!hdev->asic_prop.fw_security_enabled && - ((module_idx == 0) || (module_idx == 1))) - lbw_rtr_id = DCORE0_RTR0; - else - lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; sprintf(initiator_name, "TPC_%u", module_idx); + if (hdev->tpc_binning) { + binned_idx = __ffs(hdev->tpc_binning); + if (binned_idx == module_idx) + module_idx = TPC_ID_DCORE0_TPC6; + } + + hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; + lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; break; case RAZWI_MME: sprintf(initiator_name, "MME_%u", module_idx); @@ -7419,15 +8546,25 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, sprintf(initiator_name, "NIC_%u", module_idx); break; case RAZWI_DEC: + sprintf(initiator_name, "DEC_%u", module_idx); + if (hdev->decoder_binning) { + binned_idx = __ffs(hdev->decoder_binning); + if (binned_idx == module_idx) + module_idx = DEC_ID_PCIE_VDEC1; + } hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx]; lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx]; - sprintf(initiator_name, "DEC_%u", module_idx); break; case RAZWI_ROT: hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx]; lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx]; sprintf(initiator_name, "ROT_%u", module_idx); break; + case RAZWI_ARC_FARM: + lbw_rtr_id = DCORE1_RTR5; + hbw_rtr_id = DCORE1_RTR7; + sprintf(initiator_name, "ARC_FARM_%u", module_idx); + break; default: return; } @@ -7526,297 +8663,126 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); } -static const char *gaudi2_get_initiators_name(u32 rtr_id) -{ - switch (rtr_id) { - case DCORE0_RTR0: - return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; - case DCORE0_RTR1: - return "TPC0/1"; - case DCORE0_RTR2: - return "TPC2/3"; - case DCORE0_RTR3: - return "TPC4/5"; - case DCORE0_RTR4: - return "MME0_SBTE0/1"; - case DCORE0_RTR5: - return "MME0_WAP0/SBTE2"; - case DCORE0_RTR6: - return "MME0_CTRL_WR/SBTE3"; - case DCORE0_RTR7: - return "MME0_WAP1/CTRL_RD/SBTE4"; - case DCORE1_RTR0: - return "MME1_WAP1/CTRL_RD/SBTE4"; - case DCORE1_RTR1: - return "MME1_CTRL_WR/SBTE3"; - case DCORE1_RTR2: - return "MME1_WAP0/SBTE2"; - case DCORE1_RTR3: - return "MME1_SBTE0/1"; - case DCORE1_RTR4: - return "TPC10/11"; - case DCORE1_RTR5: - return "TPC8/9"; - case DCORE1_RTR6: - return "TPC6/7"; - case DCORE1_RTR7: - return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; - case DCORE2_RTR0: - return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; - case DCORE2_RTR1: - return "TPC16/17"; - case DCORE2_RTR2: - return "TPC14/15"; - case DCORE2_RTR3: - return "TPC12/13"; - case DCORE2_RTR4: - return "MME2_SBTE0/1"; - case DCORE2_RTR5: - return "MME2_WAP0/SBTE2"; - case DCORE2_RTR6: - return "MME2_CTRL_WR/SBTE3"; - case DCORE2_RTR7: - return "MME2_WAP1/CTRL_RD/SBTE4"; - case DCORE3_RTR0: - return "MME3_WAP1/CTRL_RD/SBTE4"; - case DCORE3_RTR1: - return "MME3_CTRL_WR/SBTE3"; - case DCORE3_RTR2: - return "MME3_WAP0/SBTE2"; - case DCORE3_RTR3: - return "MME3_SBTE0/1"; - case DCORE3_RTR4: - return "TPC18/19"; - case DCORE3_RTR5: - return "TPC20/21"; - case DCORE3_RTR6: - return "TPC22/23"; - case DCORE3_RTR7: - return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; - default: - return "N/A"; - } -} - -static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) -{ - switch (rtr_id) { - case DCORE0_RTR0: - engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; - engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; - engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; - engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; - engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; - engines[5] = GAUDI2_ENGINE_ID_PDMA_0; - engines[6] = GAUDI2_ENGINE_ID_PDMA_1; - engines[7] = GAUDI2_ENGINE_ID_PCIE; - engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; - engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; - engines[10] = GAUDI2_ENGINE_ID_PSOC; - return 11; - - case DCORE0_RTR1: - engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; - engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; - return 2; - - case DCORE0_RTR2: - engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; - engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; - return 2; - - case DCORE0_RTR3: - engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; - engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; - return 2; - - case DCORE0_RTR4: - case DCORE0_RTR5: - case DCORE0_RTR6: - case DCORE0_RTR7: - engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; - return 1; - - case DCORE1_RTR0: - case DCORE1_RTR1: - case DCORE1_RTR2: - case DCORE1_RTR3: - engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; - return 1; - - case DCORE1_RTR4: - engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; - engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; - return 2; - - case DCORE1_RTR5: - engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; - engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; - return 2; - - case DCORE1_RTR6: - engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; - engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; - return 2; - - case DCORE1_RTR7: - engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; - engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; - engines[2] = GAUDI2_ENGINE_ID_NIC0_0; - engines[3] = GAUDI2_ENGINE_ID_NIC1_0; - engines[4] = GAUDI2_ENGINE_ID_NIC2_0; - engines[5] = GAUDI2_ENGINE_ID_NIC3_0; - engines[6] = GAUDI2_ENGINE_ID_NIC4_0; - engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; - engines[8] = GAUDI2_ENGINE_ID_KDMA; - engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; - engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; - return 11; - - case DCORE2_RTR0: - engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; - engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; - engines[2] = GAUDI2_ENGINE_ID_NIC5_0; - engines[3] = GAUDI2_ENGINE_ID_NIC6_0; - engines[4] = GAUDI2_ENGINE_ID_NIC7_0; - engines[5] = GAUDI2_ENGINE_ID_NIC8_0; - engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; - engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; - engines[8] = GAUDI2_ENGINE_ID_ROT_0; - return 9; - - case DCORE2_RTR1: - engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; - engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; - return 2; - - case DCORE2_RTR2: - engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; - engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; - return 2; - - case DCORE2_RTR3: - engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; - engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; - return 2; - - case DCORE2_RTR4: - case DCORE2_RTR5: - case DCORE2_RTR6: - case DCORE2_RTR7: - engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; - return 1; - case DCORE3_RTR0: - case DCORE3_RTR1: - case DCORE3_RTR2: - case DCORE3_RTR3: - engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; - return 1; - case DCORE3_RTR4: - engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; - engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; - return 2; - case DCORE3_RTR5: - engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; - engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; - return 2; - case DCORE3_RTR6: - engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; - engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; - return 2; - case DCORE3_RTR7: - engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; - engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; - engines[2] = GAUDI2_ENGINE_ID_NIC9_0; - engines[3] = GAUDI2_ENGINE_ID_NIC10_0; - engines[4] = GAUDI2_ENGINE_ID_NIC11_0; - engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; - engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; - engines[7] = GAUDI2_ENGINE_ID_ROT_1; - engines[8] = GAUDI2_ENGINE_ID_ROT_0; - return 9; - default: - return 0; - } -} - -static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, - u64 rtr_ctrl_base_addr, bool is_write, - u64 *event_mask) +static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size, + u32 axuser_xy, u32 *base, u16 *eng_id, + char *eng_name) { - u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; - u32 razwi_hi, razwi_lo; - u8 rd_wr_flag; - num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); - - if (is_write) { - razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); - razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); - rd_wr_flag = HL_RAZWI_WRITE; + int i, num_of_eng = 0; + u16 str_size = 0; - /* Clear set indication */ - WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); - } else { - razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); - razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); - rd_wr_flag = HL_RAZWI_READ; + for (i = 0 ; i < array_size ; i++) { + if (axuser_xy != razwi_info[i].axuser_xy) + continue; - /* Clear set indication */ - WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); + eng_id[num_of_eng] = razwi_info[i].eng_id; + base[num_of_eng] = razwi_info[i].rtr_ctrl; + if (!num_of_eng) + str_size += scnprintf(eng_name + str_size, + PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s", + razwi_info[i].eng_name); + else + str_size += scnprintf(eng_name + str_size, + PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s", + razwi_info[i].eng_name); + num_of_eng++; } - hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, - rd_wr_flag | HL_RAZWI_HBW, event_mask); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", - is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); - - dev_err_ratelimited(hdev->dev, - "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); + return num_of_eng; } -static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, - u64 rtr_ctrl_base_addr, bool is_write, - u64 *event_mask) +static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg, + u64 *event_mask) { - u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; - u64 razwi_addr = CFG_BASE; - u8 rd_wr_flag; + u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0; + u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR]; + u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR]; + char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE]; + bool razwi_happened = false; + u64 addr; + int i; - num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); + num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info), + axuser_xy, base, eng_id, eng_name_str); + + /* If no match for XY coordinates, try to find it in MME razwi table */ + if (!num_of_eng) { + axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg); + num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info, + ARRAY_SIZE(mme_razwi_info), + axuser_xy, base, eng_id, + eng_name_str); + } + + for (i = 0 ; i < num_of_eng ; i++) { + if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) { + addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI); + addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO); + addr = ((u64)addr_hi << 32) + addr_lo; + if (addr) { + dev_err(hdev->dev, + "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", + eng_name_str, addr); + hl_handle_razwi(hdev, addr, &eng_id[0], + num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask); + razwi_happened = true; + } + } - if (is_write) { - razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); - rd_wr_flag = HL_RAZWI_WRITE; + if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) { + addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI); + addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO); + addr = ((u64)addr_hi << 32) + addr_lo; + if (addr) { + dev_err(hdev->dev, + "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", + eng_name_str, addr); + hl_handle_razwi(hdev, addr, &eng_id[0], + num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask); + razwi_happened = true; + } + } - /* Clear set indication */ - WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); - } else { - razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); - rd_wr_flag = HL_RAZWI_READ; + if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) { + addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR); + if (addr_lo) { + dev_err(hdev->dev, + "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n", + eng_name_str, addr_lo); + hl_handle_razwi(hdev, addr_lo, &eng_id[0], + num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask); + razwi_happened = true; + } + } - /* Clear set indication */ - WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); + if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) { + addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR); + if (addr_lo) { + dev_err(hdev->dev, + "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n", + eng_name_str, addr_lo); + hl_handle_razwi(hdev, addr_lo, &eng_id[0], + num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask); + razwi_happened = true; + } + } + /* In common case the loop will break, when there is only one engine id, or + * several engines with the same router. The exceptional case is with psoc razwi + * from EDMA, where it's possible to get axuser id which fits 2 routers (2 + * interfaces of sft router). In this case, maybe the first router won't hold info + * and we will need to iterate on the other router. + */ + if (razwi_happened) + break; } - hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, - event_mask); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped LBW %s error, rtr id %u, address 0x%llX\n", - is_write ? "WR" : "RD", rtr_id, razwi_addr); - - dev_err_ratelimited(hdev->dev, - "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); + return razwi_happened; } /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) { - u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, - razwi_mask_info, razwi_intr = 0, error_count = 0; - int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES; - u64 rtr_ctrl_base_addr; + u32 razwi_mask_info, razwi_intr = 0, error_count = 0; if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); @@ -7825,63 +8791,22 @@ static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *even } razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); - xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); dev_err_ratelimited(hdev->dev, "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), - xy, + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info), FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); - if (xy == 0) { - dev_err_ratelimited(hdev->dev, - "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); - goto clear; - } - - /* Find router id by router coordinates */ - for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++) - if (rtr_coordinates_to_rtr_id[rtr_id] == xy) - break; - - if (rtr_id == rtr_map_arr_len) { + if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask)) + error_count++; + else dev_err_ratelimited(hdev->dev, - "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy); - goto clear; - } - - /* Find router mstr_if register base */ - dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE; - dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE; - rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET + - dcore_rtr_id * DCORE_RTR_OFFSET; - - hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET); - hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET); - lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET); - lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); + "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n", + razwi_mask_info); - if (hbw_aw_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, true, event_mask); - - if (hbw_ar_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, false, event_mask); - - if (lbw_aw_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, true, event_mask); - - if (lbw_ar_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, false, event_mask); - - error_count++; - -clear: /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); @@ -7976,7 +8901,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e { u32 qid_base, error_count = 0; u64 qman_base; - u8 index; + u8 index = 0; switch (event_type) { case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: @@ -8079,7 +9004,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e return 0; } - error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); + error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, + qid_base, event_mask); /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) { @@ -8092,25 +9018,31 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e return error_count; } -static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) +static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) { - u32 i, sts_val, sts_clr_val = 0, error_count = 0; + u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm; - sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); + for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) { + sts_clr_val = 0; + sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS + + (arc_farm * ARC_FARM_OFFSET)); - for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { - if (sts_val & BIT(i)) { - gaudi2_print_event(hdev, event_type, true, - "err cause: %s", gaudi2_arc_sei_error_cause[i]); - sts_clr_val |= BIT(i); - error_count++; + for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { + if (sts_val & BIT(i)) { + gaudi2_print_event(hdev, event_type, true, + "ARC FARM ARC %u err cause: %s", + arc_farm, gaudi2_arc_sei_error_cause[i]); + sts_clr_val |= BIT(i); + error_count++; + } } + WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET), + sts_clr_val); } + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask); hl_check_for_glbl_errors(hdev); - WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); - return error_count; } @@ -8248,21 +9180,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event return error_count; } -static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, - u64 intr_cause_data) +static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type) { - int i, error_count = 0; - - for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) - if (intr_cause_data & BIT(i)) { - gaudi2_print_event(hdev, event_type, true, - "err cause: %s", guadi2_mme_sbte_error_cause[i]); - error_count++; - } - + /* + * We have a single error cause here but the report mechanism is + * buggy. Hence there is no good reason to fetch the cause so we + * just check for glbl_errors and exit. + */ hl_check_for_glbl_errors(hdev); - return error_count; + return GAUDI2_NA_EVENT_CAUSE; } static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, @@ -8318,14 +9245,13 @@ static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, return error_count; } -static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, - u64 intr_cause_data) +static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause) { u32 error_count = 0; int i; for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) - if (intr_cause_data & BIT(i)) { + if (intr_cause & BIT(i)) { gaudi2_print_event(hdev, event_type, true, "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); error_count++; @@ -8433,7 +9359,7 @@ static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 i static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, u64 *event_mask) { - u32 valid, val, axid_l, axid_h; + u32 valid, val; u64 addr; valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); @@ -8446,14 +9372,18 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool addr <<= 32; addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); - axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB)); - axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB)); + if (is_pmmu) { + dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr); + } else { + addr = gaudi2_mmu_descramble_addr(hdev, addr); + addr &= HW_UNSCRAMBLED_BITS_MASK; + dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n", + addr, addr + ~HW_UNSCRAMBLED_BITS_MASK); + } - dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n", - is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l); hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); - WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); + WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0); } static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) @@ -8471,9 +9401,12 @@ static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, boo addr <<= 32; addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); + if (!is_pmmu) + addr = gaudi2_mmu_descramble_addr(hdev, addr); + dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", is_pmmu ? "PMMU" : "HMMU", addr); - WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); + WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0); } static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, @@ -8534,7 +9467,7 @@ static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_in continue; gaudi2_print_event(hdev, event_type, true, - "err cause: %s. %s: 0x%X\n", + "err cause: %s. %s: 0x%X", gaudi2_sm_sei_cause[i].cause_name, gaudi2_sm_sei_cause[i].log_name, sei_cause_log); @@ -8565,46 +9498,110 @@ static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_in return error_count; } +static u64 get_hmmu_base(u16 event_type) +{ + u8 dcore, index_in_dcore; + + switch (event_type) { + case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR: + dcore = 0; + index_in_dcore = 0; + break; + case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR: + dcore = 1; + index_in_dcore = 0; + break; + case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR: + dcore = 0; + index_in_dcore = 1; + break; + case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: + dcore = 1; + index_in_dcore = 1; + break; + case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: + dcore = 3; + index_in_dcore = 2; + break; + case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR: + dcore = 2; + index_in_dcore = 2; + break; + case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR: + dcore = 3; + index_in_dcore = 3; + break; + case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR: + dcore = 2; + index_in_dcore = 3; + break; + case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR: + dcore = 0; + index_in_dcore = 2; + break; + case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR: + dcore = 1; + index_in_dcore = 2; + break; + case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR: + dcore = 0; + index_in_dcore = 3; + break; + case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: + dcore = 1; + index_in_dcore = 3; + break; + case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: + dcore = 3; + index_in_dcore = 0; + break; + case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR: + dcore = 2; + index_in_dcore = 0; + break; + case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR: + dcore = 3; + index_in_dcore = 1; + break; + case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR: + dcore = 2; + index_in_dcore = 1; + break; + default: + return ULONG_MAX; + } + + return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET; +} + static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) { bool is_pmmu = false; u32 error_count = 0; u64 mmu_base; - u8 index; switch (event_type) { - case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: - index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; - mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: - index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); - mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: - index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; - mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: - index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); - mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: - index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; - mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: - index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); - mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: - index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; - mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; - break; - case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: - index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); - mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; + case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: + case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: + mmu_base = get_hmmu_base(event_type); break; + case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: is_pmmu = true; @@ -8614,6 +9611,9 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, return 0; } + if (mmu_base == ULONG_MAX) + return 0; + error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, is_pmmu, event_mask); hl_check_for_glbl_errors(hdev); @@ -8626,8 +9626,8 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) { + bool require_hard_reset = false; u32 addr, beat, beat_shift; - bool rc = false; dev_err_ratelimited(hdev->dev, "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", @@ -8659,7 +9659,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, beat, le32_to_cpu(rd_err_data->dbg_rd_err_dm), le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); - rc |= true; + require_hard_reset = true; } beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; @@ -8672,7 +9672,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); - rc |= true; + require_hard_reset = true; } dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); @@ -8682,7 +9682,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); } - return rc; + return require_hard_reset; } static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, @@ -8740,12 +9740,12 @@ static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { gaudi2_print_event(hdev, event_type, true, "err cause: %s", - "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx); + "Invalid HBM SEI event cause (%d) provided by FW", cause_idx); return true; } gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, - "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", + "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s", sei_data->hdr.is_critical ? "Critical" : "Non-critical", hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, hbm_mc_sei_cause[cause_idx]); @@ -8869,7 +9869,7 @@ static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; gaudi2_print_event(hdev, event_type, false, - "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } @@ -8883,7 +9883,7 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) if (p2p_intr) { gaudi2_print_event(hdev, event_type, true, - "pcie p2p transaction terminated due to security, req_id(0x%x)\n", + "pcie p2p transaction terminated due to security, req_id(0x%x)", RREG32(mmPCIE_WRAP_P2P_REQ_ID)); WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); @@ -8892,7 +9892,7 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) if (msix_gw_intr) { gaudi2_print_event(hdev, event_type, true, - "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n", + "pcie msi-x gen denied due to vector num check failure, vec(0x%X)", RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); @@ -8905,25 +9905,17 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) static int gaudi2_handle_pcie_drain(struct hl_device *hdev, struct hl_eq_pcie_drain_ind_data *drain_data) { - u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; + u64 cause, error_count = 0; cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); - lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); - lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); - hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); - hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); if (cause & BIT_ULL(0)) { - dev_err_ratelimited(hdev->dev, - "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", - !!lbw_rd, !!lbw_wr); + dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n"); error_count++; } if (cause & BIT_ULL(1)) { - dev_err_ratelimited(hdev->dev, - "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", - hbw_rd, hbw_wr); + dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n"); error_count++; } @@ -8954,7 +9946,7 @@ static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_ struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; gaudi2_print_event(hdev, event_type, false, - "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } @@ -8974,20 +9966,185 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; gaudi2_print_event(hdev, event_type, true, - "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n", - engine_id, intr_type, q->queue_index); + "ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u", + GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index); return 1; default: - gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n"); + gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type"); return 0; } } +static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type) +{ + enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX; + u16 index; + + switch (event_type) { + case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: + index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM: + index = event_type - GAUDI2_EVENT_TPC0_QM; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME0_QM: + index = 0; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME1_QM: + index = 1; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME2_QM: + index = 2; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME3_QM: + index = 3; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: + case GAUDI2_EVENT_KDMA_BM_SPMU: + case GAUDI2_EVENT_KDMA0_CORE: + return GAUDI2_ENGINE_ID_KDMA; + case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: + case GAUDI2_EVENT_PDMA0_CORE: + case GAUDI2_EVENT_PDMA0_BM_SPMU: + case GAUDI2_EVENT_PDMA0_QM: + return GAUDI2_ENGINE_ID_PDMA_0; + case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: + case GAUDI2_EVENT_PDMA1_CORE: + case GAUDI2_EVENT_PDMA1_BM_SPMU: + case GAUDI2_EVENT_PDMA1_QM: + return GAUDI2_ENGINE_ID_PDMA_1; + case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: + index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; + type = GAUDI2_BLOCK_TYPE_DEC; + break; + case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU: + index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1; + type = GAUDI2_BLOCK_TYPE_DEC; + break; + case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE: + index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE; + return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2); + case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: + index = event_type - GAUDI2_EVENT_NIC0_QM0; + return GAUDI2_ENGINE_ID_NIC0_0 + index; + case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR: + index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU; + return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2); + case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR: + index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_ROTATOR0_BMON_SPMU: + case GAUDI2_EVENT_ROTATOR0_ROT0_QM: + return GAUDI2_ENGINE_ID_ROT_0; + case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_ROTATOR1_BMON_SPMU: + case GAUDI2_EVENT_ROTATOR1_ROT1_QM: + return GAUDI2_ENGINE_ID_ROT_1; + case GAUDI2_EVENT_HDMA0_BM_SPMU: + case GAUDI2_EVENT_HDMA0_QM: + case GAUDI2_EVENT_HDMA0_CORE: + return GAUDI2_DCORE0_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA1_BM_SPMU: + case GAUDI2_EVENT_HDMA1_QM: + case GAUDI2_EVENT_HDMA1_CORE: + return GAUDI2_DCORE0_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA2_BM_SPMU: + case GAUDI2_EVENT_HDMA2_QM: + case GAUDI2_EVENT_HDMA2_CORE: + return GAUDI2_DCORE1_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA3_BM_SPMU: + case GAUDI2_EVENT_HDMA3_QM: + case GAUDI2_EVENT_HDMA3_CORE: + return GAUDI2_DCORE1_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA4_BM_SPMU: + case GAUDI2_EVENT_HDMA4_QM: + case GAUDI2_EVENT_HDMA4_CORE: + return GAUDI2_DCORE2_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA5_BM_SPMU: + case GAUDI2_EVENT_HDMA5_QM: + case GAUDI2_EVENT_HDMA5_CORE: + return GAUDI2_DCORE2_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA6_BM_SPMU: + case GAUDI2_EVENT_HDMA6_QM: + case GAUDI2_EVENT_HDMA6_CORE: + return GAUDI2_DCORE3_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA7_BM_SPMU: + case GAUDI2_EVENT_HDMA7_QM: + case GAUDI2_EVENT_HDMA7_CORE: + return GAUDI2_DCORE3_ENGINE_ID_EDMA_1; + default: + break; + } + + switch (type) { + case GAUDI2_BLOCK_TYPE_TPC: + switch (index) { + case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5: + return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index; + case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5: + return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0; + case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5: + return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0; + case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5: + return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0; + default: + break; + } + break; + case GAUDI2_BLOCK_TYPE_MME: + switch (index) { + case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME; + case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME; + case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME; + case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME; + default: + break; + } + break; + case GAUDI2_BLOCK_TYPE_DEC: + switch (index) { + case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0; + case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1; + case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0; + case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1; + case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0; + case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1; + case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0; + case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1; + case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0; + case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1; + default: + break; + } + break; + default: + break; + } + + return U16_MAX; +} + static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi2_device *gaudi2 = hdev->asic_specific; bool reset_required = false, is_critical = false; - u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0; + u32 index, ctl, reset_flags = 0, error_count = 0; u64 event_mask = 0; u16 event_type; @@ -9024,19 +10181,18 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: - reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; - error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); - event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_CPU_AXI_ERR_RSP: error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); - event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; break; case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: - reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -9149,13 +10305,19 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_KDMA0_CORE: error_count = gaudi2_handle_kdma_core_event(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; - case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: + case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE: error_count = gaudi2_handle_dma_core_event(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + break; + + case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE: + error_count = gaudi2_handle_dma_core_event(hdev, event_type, + le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -9199,6 +10361,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; + is_critical = eq_entry->sei_data.hdr.is_critical; } error_count++; break; @@ -9217,12 +10380,16 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0) + is_critical = true; break; case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: error_count = gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; @@ -9240,8 +10407,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: - error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + error_count = gaudi2_handle_mme_sbte_err(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: @@ -9251,6 +10417,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: error_count = GAUDI2_NA_EVENT_CAUSE; + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC_PRSTN_FALL: @@ -9264,6 +10431,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_PCIE_FATAL_ERR: error_count = GAUDI2_NA_EVENT_CAUSE; + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_TPC0_BMON_SPMU: @@ -9331,6 +10499,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); error_count = GAUDI2_NA_EVENT_CAUSE; + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; @@ -9366,12 +10535,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", le64_to_cpu(eq_entry->data[0])); error_count = GAUDI2_NA_EVENT_CAUSE; - event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask); break; case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); error_count = GAUDI2_NA_EVENT_CAUSE; + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; @@ -9381,12 +10551,27 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: - case GAUDI2_EVENT_DEV_RESET_REQ: + case GAUDI2_EVENT_CPU_DEV_RESET_REQ: event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; error_count = GAUDI2_NA_EVENT_CAUSE; is_critical = true; break; + case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY: + case GAUDI2_EVENT_ARC_PWR_BRK_EXT: + case GAUDI2_EVENT_ARC_PWR_RD_MODE0: + case GAUDI2_EVENT_ARC_PWR_RD_MODE1: + case GAUDI2_EVENT_ARC_PWR_RD_MODE2: + case GAUDI2_EVENT_ARC_PWR_RD_MODE3: + error_count = GAUDI2_NA_EVENT_CAUSE; + dev_info_ratelimited(hdev->dev, "%s event received\n", + gaudi2_irq_map_table[event_type].name); + break; + + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: + hl_eq_heartbeat_event_handle(hdev); + error_count = GAUDI2_NA_EVENT_CAUSE; + break; default: if (gaudi2_irq_map_table[event_type].valid) { dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", @@ -9395,6 +10580,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent } } + if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR) + hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count); + /* Make sure to dump an error in case no error cause was printed so far. * Note that although we have counted the errors, we use this number as * a boolean. @@ -9403,12 +10591,17 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_print_event(hdev, event_type, true, "%d", event_type); else if (error_count == 0) gaudi2_print_event(hdev, event_type, true, - "No error cause for H/W event %u\n", event_type); + "No error cause for H/W event %u", event_type); - if ((gaudi2_irq_map_table[event_type].reset || reset_required) && - (hdev->hard_reset_on_fw_events || - (hdev->asic_prop.fw_security_enabled && is_critical))) - goto reset_device; + if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) { + if (reset_required || + (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) + reset_flags |= HL_DRV_RESET_HARD; + + if (hdev->hard_reset_on_fw_events || + (hdev->asic_prop.fw_security_enabled && is_critical)) + goto reset_device; + } /* Send unmask irq only for interrupts not classified as MSG */ if (!gaudi2_irq_map_table[event_type].msg) @@ -9426,16 +10619,21 @@ reset_device: } else { reset_flags |= HL_DRV_RESET_DELAY; } + /* escalate general hw errors to critical/fatal error */ + if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) + hl_handle_critical_hw_err(hdev, event_type, &event_mask); + + hl_debugfs_cfg_access_history_dump(hdev); event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; hl_device_cond_reset(hdev, reset_flags, event_mask); } static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, - struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, - u32 hw_queue_id, u32 size, u64 addr, u32 val) + struct packet_lin_dma *lin_dma_pkt, + u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val) { u32 ctl, pkt_size; - int rc = 0; + int rc = 0, i; ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); @@ -9449,10 +10647,21 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, pkt_size = sizeof(struct packet_lin_dma); - rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); + for (i = 0; i < 3; i++) { + rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, + phys_addr + (i * sizeof(u64)), + ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64); + if (rc) { + dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n", + phys_addr); + return rc; + } + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr); if (rc) - dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", - hw_queue_id); + dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n", + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id)); return rc; } @@ -9464,12 +10673,11 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, - old_mmubp, mmubp, num_of_pkts, busy, pkt_size; + old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len; u64 comp_addr, cur_addr = addr, end_addr = addr + size; struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc = 0, dma_num = 0, i; void *lin_dma_pkts_arr; - dma_addr_t pkt_dma_addr; - int rc = 0, dma_num = 0; if (prop->edma_enabled_mask == 0) { dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); @@ -9487,9 +10695,19 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz /* Calculate how many lin dma pkts we'll need */ num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); pkt_size = sizeof(struct packet_lin_dma); + cb_len = pkt_size * num_of_pkts; - lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, - &pkt_dma_addr, GFP_KERNEL); + /* + * if we're not scrubing HMMU or NIC reserved sections in hbm, + * then it the scrubing of the user section, as we use the start of the user section + * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly + * and scrub the CB section before leaving this function. + */ + if ((addr >= prop->dram_user_base_address) && + (addr < prop->dram_user_base_address + cb_len)) + cur_addr += (prop->dram_user_base_address + cb_len) - addr; + + lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL); if (!lin_dma_pkts_arr) return -ENOMEM; @@ -9535,7 +10753,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, - pkt_dma_addr + dma_num * pkt_size, + prop->dram_user_base_address + (dma_num * pkt_size), edma_queues_id[dcore] + edma_idx * 4, chunk_size, cur_addr, val); if (rc) @@ -9544,14 +10762,16 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz dma_num++; cur_addr += chunk_size; if (cur_addr == end_addr) - break; + goto edma_wait; } } } +edma_wait: rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); if (rc) { - dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); + dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n", + busy, dma_num); goto end; } end: @@ -9572,8 +10792,16 @@ end: } } + memset(lin_dma_pkts_arr, 0, sizeof(u64)); + + /* Zero the HBM area where we copied the CB */ + for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64)) + rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, + prop->dram_user_base_address + i, + (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64); WREG32(sob_addr, 0); - hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); + + kvfree(lin_dma_pkts_arr); return rc; } @@ -9832,16 +11060,23 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v /* Create mapping on asic side */ mutex_lock(&hdev->mmu_lock); + rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); - hl_mmu_invalidate_cache_range(hdev, false, + if (rc) { + dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); + goto unreserve_va; + } + + rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&hdev->mmu_lock); if (rc) { - dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); + hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); goto unreserve_va; } + mutex_unlock(&hdev->mmu_lock); + /* Enable MMU on KDMA */ gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); @@ -9870,11 +11105,16 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); mutex_lock(&hdev->mmu_lock); - hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); - hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, + + rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); + if (rc) + goto unreserve_va; + + rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&hdev->mmu_lock); + unreserve_va: + mutex_unlock(&hdev->mmu_lock); hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); free_data_buffer: hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); @@ -9927,17 +11167,24 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c } mutex_lock(&hdev->mmu_lock); + rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&hdev->mmu_lock); - if (rc) goto unreserve_internal_cb_pool; + rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); + if (rc) + goto unmap_internal_cb_pool; + + mutex_unlock(&hdev->mmu_lock); + return 0; +unmap_internal_cb_pool: + hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); unreserve_internal_cb_pool: + mutex_unlock(&hdev->mmu_lock); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); destroy_internal_cb_pool: gen_pool_destroy(hdev->internal_cb_pool); @@ -10006,6 +11253,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx) { int rc; + if (ctx->asid == HL_KERNEL_ASID_ID) + return 0; + rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); if (rc) return rc; @@ -10375,8 +11625,8 @@ static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | - VM_DONTCOPY | VM_NORESERVE; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, block_size, vma->vm_page_prot); @@ -10497,6 +11747,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 static void gaudi2_get_msi_info(__le32 *table) { table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); + table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR); } static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) @@ -10608,7 +11859,7 @@ static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_p return 0; page_size_err: - dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", + dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n", page_size, mmu_prop->page_size >> 10); return -EFAULT; } @@ -10628,6 +11879,29 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open) return hl_fw_send_device_activity(hdev, open); } +static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u64 val; + + if (hdev->reset_info.hard_reset_pending) + return U64_MAX; + + val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr)); + + return val; +} + +static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + + if (hdev->reset_info.hard_reset_pending) + return; + + writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr)); +} + static const struct hl_asic_funcs gaudi2_funcs = { .early_init = gaudi2_early_init, .early_fini = gaudi2_early_fini, @@ -10653,11 +11927,9 @@ static const struct hl_asic_funcs gaudi2_funcs = { .asic_dma_pool_free = gaudi2_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, - .asic_dma_unmap_single = gaudi2_dma_unmap_single, - .asic_dma_map_single = gaudi2_dma_map_single, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = gaudi2_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = NULL, .update_eq_ci = gaudi2_update_eq_ci, .context_switch = gaudi2_context_switch, @@ -10666,8 +11938,8 @@ static const struct hl_asic_funcs gaudi2_funcs = { .add_device_attr = gaudi2_add_device_attr, .handle_eqe = gaudi2_handle_eqe, .get_events_stat = gaudi2_get_events_stat, - .read_pte = NULL, - .write_pte = NULL, + .read_pte = gaudi2_read_pte, + .write_pte = gaudi2_write_pte, .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, .mmu_prefetch_cache_range = NULL, @@ -10724,6 +11996,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, .set_engine_cores = gaudi2_set_engine_cores, + .set_engines = gaudi2_set_engines, .send_device_activity = gaudi2_send_device_activity, .set_dram_properties = gaudi2_set_dram_properties, .set_binning_masks = gaudi2_set_binning_masks, diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h index 2687404d9d21..bdf5c1bd2d63 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h @@ -10,7 +10,7 @@ #include <uapi/drm/habanalabs_accel.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> #include "../include/gaudi2/gaudi2.h" #include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_fw_if.h" @@ -19,8 +19,6 @@ #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" -#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */ - #define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define NUMBER_OF_PDMA_QUEUES 2 @@ -84,6 +82,7 @@ #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */ +#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */ #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */ @@ -98,7 +97,7 @@ #define GAUDI2_DEFAULT_CARD_NAME "HL225" #define QMAN_STREAMS 4 -#define PQ_FETCHER_CACHE_SIZE 8 + #define NUM_OF_MME_SBTE_PORTS 5 #define NUM_OF_MME_WB_PORTS 2 @@ -108,13 +107,11 @@ /* DRAM Memory Map */ #define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ - -/* This define should be used only when working in a debug mode without dram. - * When working with dram, the driver size will be calculated dynamically. - */ -#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */ - #define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE +#define PMMU_PAGE_TABLES_SIZE 0x10000000 /* 256MB */ +#define EDMA_PQS_SIZE SZ_2M +#define EDMA_SCRATCHPAD_SIZE SZ_1M +#define HMMU_PAGE_TABLES_SIZE SZ_1M #define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES @@ -240,7 +237,17 @@ #define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \ FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1)) -#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8 +#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0) + + +extern const char *gaudi2_engine_id_str[]; +extern const char *gaudi2_queue_id_str[]; + +#define GAUDI2_ENG_ID_TO_STR(initiator) ((initiator) >= GAUDI2_ENGINE_ID_SIZE ? "not found" : \ + gaudi2_engine_id_str[initiator]) + +#define GAUDI2_QUEUE_ID_TO_STR(initiator) ((initiator) >= GAUDI2_QUEUE_ID_SIZE ? "not found" : \ + gaudi2_queue_id_str[initiator]) enum gaudi2_reserved_sob_id { GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST, @@ -386,7 +393,9 @@ enum gaudi2_edma_id { /* User interrupt count is aligned with HW CQ count. * We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode */ -#define GAUDI2_NUM_USER_INTERRUPTS 255 +#define GAUDI2_NUM_USER_INTERRUPTS 64 +#define GAUDI2_NUM_RESERVED_INTERRUPTS 1 +#define GAUDI2_TOTAL_USER_INTERRUPTS (GAUDI2_NUM_USER_INTERRUPTS + GAUDI2_NUM_RESERVED_INTERRUPTS) enum gaudi2_irq_num { GAUDI2_IRQ_NUM_EVENT_QUEUE = GAUDI2_EVENT_QUEUE_MSIX_IDX, @@ -410,13 +419,17 @@ enum gaudi2_irq_num { GAUDI2_IRQ_NUM_SHARED_DEC0_ABNRM, GAUDI2_IRQ_NUM_SHARED_DEC1_NRM, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM, + GAUDI2_IRQ_NUM_DEC_LAST = GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM, GAUDI2_IRQ_NUM_COMPLETION, GAUDI2_IRQ_NUM_NIC_PORT_FIRST, GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1), - GAUDI2_IRQ_NUM_RESERVED_FIRST, - GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1), + GAUDI2_IRQ_NUM_TPC_ASSERT, + GAUDI2_IRQ_NUM_EQ_ERROR, GAUDI2_IRQ_NUM_USER_FIRST, GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1), + GAUDI2_IRQ_NUM_RESERVED_FIRST, + GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_RESERVED_INTERRUPTS - 1), + GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT, GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1) }; @@ -448,6 +461,17 @@ struct dup_block_ctx { }; /** + * struct gaudi2_queues_test_info - Holds the address of a the messages used for testing the + * device queues. + * @dma_addr: the address used by the HW for accessing the message. + * @kern_addr: The address used by the driver for accessing the message. + */ +struct gaudi2_queues_test_info { + dma_addr_t dma_addr; + void *kern_addr; +}; + +/** * struct gaudi2_device - ASIC specific manage structure. * @cpucp_info_get: get information on device from CPU-CP * @mapped_blocks: array that holds the base address and size of all blocks @@ -505,6 +529,7 @@ struct dup_block_ctx { * @flush_db_fifo: flag to force flush DB FIFO after a write. * @hbm_cfg: HBM subsystem settings * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. + * @queues_test_info: information used by the driver when testing the HW queues. */ struct gaudi2_device { int (*cpucp_info_get)(struct hl_device *hdev); @@ -532,6 +557,9 @@ struct gaudi2_device { u32 events_stat[GAUDI2_EVENT_SIZE]; u32 events_stat_aggregate[GAUDI2_EVENT_SIZE]; u32 num_of_valid_hw_events; + + /* Queue testing */ + struct gaudi2_queues_test_info queues_test_info[GAUDI2_NUM_TESTED_QS]; }; /* diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c index 1dfbe293ecec..bc3c57bda5cd 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c @@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = { [GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE, [GAUDI2_STM_PCIE] = mmPCIE_STM_BASE, [GAUDI2_STM_PSOC] = mmPSOC_STM_BASE, - [GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE, - [GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE, + [GAUDI2_STM_PSOC_ARC0_CS] = 0, + [GAUDI2_STM_PSOC_ARC1_CS] = 0, [GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE, [GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE, [GAUDI2_STM_CPU] = mmCPU_STM_BASE, @@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = { [GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE, [GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE, [GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE, - [GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE, - [GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE, + [GAUDI2_ETF_PSOC_ARC0_CS] = 0, + [GAUDI2_ETF_PSOC_ARC1_CS] = 0, [GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE, [GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE, [GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE, @@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = { [GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE, [GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE, [GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE, - [GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE, - [GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE, + [GAUDI2_FUNNEL_PSOC_ARC0] = 0, + [GAUDI2_FUNNEL_PSOC_ARC1] = 0, [GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE, [GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE, [GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE, @@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = { [GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE, [GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE, [GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE, - [GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE, - [GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE, - [GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE, - [GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE, + [GAUDI2_BMON_PSOC_ARC0_0] = 0, + [GAUDI2_BMON_PSOC_ARC0_1] = 0, + [GAUDI2_BMON_PSOC_ARC1_0] = 0, + [GAUDI2_BMON_PSOC_ARC1_1] = 0, [GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE, [GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE, [GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE, @@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = { [GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE, [GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE, [GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE, - [GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE, - [GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE, + [GAUDI2_SPMU_PSOC_ARC0_CS] = 0, + [GAUDI2_SPMU_PSOC_ARC1_CS] = 0, [GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE, [GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE, [GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE, @@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par if (rc) return -EIO; + val = RREG32(base_reg + mmETF_CTL_OFFSET); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + mmETF_FFCR_OFFSET); val |= 0x1000; WREG32(base_reg + mmETF_FFCR_OFFSET, val); @@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par if (!input) return -EINVAL; + val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2; + if (val) { + val = ffs(val); + WREG32(base_reg + mmETF_PSCR_OFFSET, val); + } else { + WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10); + } + WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC); WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode); WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001); - WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10); WREG32(base_reg + mmETF_CTL_OFFSET, 1); } else { WREG32(base_reg + mmETF_BUFWM_OFFSET, 0); @@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx, if (rc) return -EIO; + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); @@ -2409,7 +2426,7 @@ static int gaudi2_config_bmon(struct hl_device *hdev, struct hl_debug_params *pa WREG32(base_reg + mmBMON_ADDRH_E3_OFFSET, 0); WREG32(base_reg + mmBMON_REDUCTION_OFFSET, 0); WREG32(base_reg + mmBMON_STM_TRC_OFFSET, 0x7 | (0xA << 8)); - WREG32(base_reg + mmBMON_CR_OFFSET, 0x77 | 0xf << 24); + WREG32(base_reg + mmBMON_CR_OFFSET, 0x41); } return 0; @@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa * set enabled events mask based on input->event_types_num */ event_mask = 0x80000000; - event_mask |= GENMASK(input->event_types_num, 0); + if (input->event_types_num) + event_mask |= GENMASK(input->event_types_num - 1, 0); WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask); } else { @@ -2657,7 +2675,7 @@ int gaudi2_coresight_init(struct hl_device *hdev) /* * Mask out all the disabled binned offsets. * so when user request to configure a binned or masked out component, - * driver will ignore programing it ( happens when offset value is set to 0x0 ) + * driver will ignore programming it ( happens when offset value is set to 0x0 ) * this is being set in gaudi2_coresight_set_disabled_components */ diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h b/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h index e9ac87828221..e6664c4a2cf5 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h @@ -79,7 +79,6 @@ DCORE0_MME_CTRL_LO_ARCH_STATUS_QM_RDY_MASK) #define TPC_IDLE_MASK (DCORE0_TPC0_CFG_STATUS_SCALAR_PIPE_EMPTY_MASK | \ - DCORE0_TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK | \ DCORE0_TPC0_CFG_STATUS_IQ_EMPTY_MASK | \ DCORE0_TPC0_CFG_STATUS_SB_EMPTY_MASK | \ DCORE0_TPC0_CFG_STATUS_QM_IDLE_MASK | \ @@ -87,6 +86,8 @@ #define DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK 0x100 +#define DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK 0x40 + /* CGM_IDLE_MASK is valid for all engines CGM idle check */ #define CGM_IDLE_MASK DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index a212f82e6604..307ccb912ccd 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -479,6 +479,8 @@ static const u32 gaudi2_pb_dcr0_edma0_unsecured_regs[] = { mmDCORE0_EDMA0_CORE_CTX_TE_NUMROWS, mmDCORE0_EDMA0_CORE_CTX_IDX, mmDCORE0_EDMA0_CORE_CTX_IDX_INC, + mmDCORE0_EDMA0_CORE_WR_COMP_MAX_OUTSTAND, + mmDCORE0_EDMA0_CORE_RD_LBW_RATE_LIM_CFG, mmDCORE0_EDMA0_QM_CQ_CFG0_0, mmDCORE0_EDMA0_QM_CQ_CFG0_1, mmDCORE0_EDMA0_QM_CQ_CFG0_2, @@ -1533,6 +1535,10 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = { mmDCORE0_TPC0_CFG_QM_KERNEL_CONFIG, mmDCORE0_TPC0_CFG_QM_KERNEL_ID, mmDCORE0_TPC0_CFG_QM_POWER_LOOP, + mmDCORE0_TPC0_CFG_TSB_CFG_MTRR_2_0, + mmDCORE0_TPC0_CFG_TSB_CFG_MTRR_2_1, + mmDCORE0_TPC0_CFG_TSB_CFG_MTRR_2_2, + mmDCORE0_TPC0_CFG_TSB_CFG_MTRR_2_3, mmDCORE0_TPC0_CFG_LUT_FUNC32_BASE2_ADDR_LO, mmDCORE0_TPC0_CFG_LUT_FUNC32_BASE2_ADDR_HI, mmDCORE0_TPC0_CFG_LUT_FUNC64_BASE2_ADDR_LO, @@ -1541,6 +1547,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = { mmDCORE0_TPC0_CFG_LUT_FUNC128_BASE2_ADDR_HI, mmDCORE0_TPC0_CFG_LUT_FUNC256_BASE2_ADDR_LO, mmDCORE0_TPC0_CFG_LUT_FUNC256_BASE2_ADDR_HI, + mmDCORE0_TPC0_CFG_FP8_143_BIAS, mmDCORE0_TPC0_CFG_ROUND_CSR, mmDCORE0_TPC0_CFG_CONV_ROUND_CSR, mmDCORE0_TPC0_CFG_SEMAPHORE, @@ -1595,6 +1602,8 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = { mmDCORE0_TPC0_CFG_KERNEL_SRF_30, mmDCORE0_TPC0_CFG_KERNEL_SRF_31, mmDCORE0_TPC0_CFG_TPC_SB_L0CD, + mmDCORE0_TPC0_CFG_TPC_COUNT, + mmDCORE0_TPC0_CFG_TPC_ID, mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC, mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0, mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_1, @@ -2900,7 +2909,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev) * - range 11: NIC11_CFG + *_DBG (not including TPC_DBG) * * If F/W security is not enabled: - * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP) + * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF) */ u64 lbw_range_min_short[] = { mmNIC0_TX_AXUSER_BASE, @@ -2916,7 +2925,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev) mmNIC10_TX_AXUSER_BASE, mmNIC11_TX_AXUSER_BASE, mmPSOC_I2C_M0_BASE, - mmPSOC_EFUSE_BASE + mmPSOC_GPIO0_BASE }; u64 lbw_range_max_short[] = { mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE, @@ -3212,6 +3221,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev) */ static int gaudi2_init_protection_bits(struct hl_device *hdev) { + u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg; struct asic_fixed_properties *prop = &hdev->asic_prop; u32 instance_offset; int rc = 0; @@ -3382,11 +3392,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) /* PSOC. * Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are * protected by privileged RR. + * For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine + * cores to raise events towards F/W. */ + engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE); + if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 && + engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) { + user_regs_array = &engine_core_intr_reg; + user_regs_array_size = 1; + } else { + dev_err(hdev->dev, + "Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n", + engine_core_intr_reg); + } + rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf), - NULL, HL_PB_NA); + user_regs_array, user_regs_array_size); if (!hdev->asic_prop.fw_security_enabled) rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, @@ -3441,15 +3464,6 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) ARRAY_SIZE(gaudi2_pb_thermal_sensor0), NULL, HL_PB_NA); } - /* HBM */ - /* Temporarily skip until SW-63348 is solved - * instance_offset = mmHBM1_MC0_BASE - mmHBM0_MC0_BASE; - * rc |= hl_init_pb_with_mask(hdev, HL_PB_SHARED, HL_PB_NA, GAUDI2_HBM_NUM, - * instance_offset, gaudi2_pb_hbm, - * ARRAY_SIZE(gaudi2_pb_hbm), NULL, HL_PB_NA, - * prop->dram_enabled_mask); - */ - /* Scheduler ARCs */ instance_offset = mmARC_FARM_ARC1_AUX_BASE - mmARC_FARM_ARC0_AUX_BASE; rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c index 2b135e856607..84768e306269 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -413,8 +413,6 @@ int goya_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; - prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = true; @@ -435,8 +433,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->dmmu.num_hops = MMU_ARCH_5_HOPS; prop->dmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->dmmu.hop_table_size = prop->mmu_hop_table_size; - prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); @@ -446,8 +444,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; /* TODO: will be duplicated until implementing per-MMU props */ - prop->pmmu.hop_table_size = prop->mmu_hop_table_size; - prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; + prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -466,12 +464,14 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GOYA_MAX_PENDING_CS; prop->first_available_user_interrupt = USHRT_MAX; + prop->tpc_interrupt_id = USHRT_MAX; + prop->eq_interrupt_id = GOYA_EVENT_QUEUE_MSIX_IDX; for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; @@ -668,13 +668,18 @@ pci_init: rc = hl_fw_read_preboot_status(hdev); if (rc) { if (hdev->reset_on_preboot_fail) + /* we are already on failure flow, so don't check if hw_fini fails. */ hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); + goto pci_fini; + } } if (!hdev->pldm) { @@ -888,11 +893,8 @@ int goya_late_init(struct hl_device *hdev) WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); - if (rc) { - dev_err(hdev->dev, - "Failed to enable PCI access from CPU %d\n", rc); + if (rc) return rc; - } /* force setting to low frequency */ goya->curr_pll_profile = PLL_LOW; @@ -2664,9 +2666,6 @@ int goya_mmu_init(struct hl_device *hdev) u64 hop0_addr; int rc, i; - if (!hdev->mmu_enable) - return 0; - if (goya->hw_cap_initialized & HW_CAP_MMU) return 0; @@ -2674,7 +2673,7 @@ int goya_mmu_init(struct hl_device *hdev) for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + - (i * prop->mmu_hop_table_size); + (i * prop->dmmu.hop_table_size); rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); if (rc) { @@ -2782,7 +2781,7 @@ disable_queues: return rc; } -static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) { struct goya_device *goya = hdev->asic_specific; u32 reset_timeout_ms, cpu_timeout_ms, status; @@ -2828,17 +2827,17 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) msleep(reset_timeout_ms); status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); - if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) - dev_err(hdev->dev, - "Timeout while waiting for device to reset 0x%x\n", - status); + if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { + dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); + return -ETIMEDOUT; + } if (!hard_reset && goya) { goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME | HW_CAP_GOLDEN | HW_CAP_TPC); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GOYA_ASYNC_EVENT_ID_SOFT_RESET); - return; + return 0; } /* Chicken bit to re-initiate boot sequencer flow */ @@ -2857,17 +2856,12 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) memset(goya->events_stat, 0, sizeof(goya->events_stat)); } + return 0; } int goya_suspend(struct hl_device *hdev) { - int rc; - - rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); - if (rc) - dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); - - return rc; + return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); } int goya_resume(struct hl_device *hdev) @@ -2880,8 +2874,8 @@ static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma, { int rc; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | - VM_DONTCOPY | VM_NORESERVE; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, (dma_addr - HOST_PHYS_BASE), size); @@ -3353,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -5117,7 +5111,7 @@ int goya_cpucp_info_get(struct hl_device *hdev) } if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); return 0; @@ -5460,9 +5454,9 @@ static const struct hl_asic_funcs goya_funcs = { .asic_dma_pool_free = goya_dma_pool_free, .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = goya_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = goya_add_end_of_cb_packets, .update_eq_ci = goya_update_eq_ci, .context_switch = goya_context_switch, diff --git a/drivers/accel/habanalabs/goya/goyaP.h b/drivers/accel/habanalabs/goya/goyaP.h index 5df3d30b91fd..194c2ae157cd 100644 --- a/drivers/accel/habanalabs/goya/goyaP.h +++ b/drivers/accel/habanalabs/goya/goyaP.h @@ -9,8 +9,8 @@ #define GOYAP_H_ #include <uapi/drm/habanalabs_accel.h> +#include <linux/habanalabs/hl_boot_if.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" #include "../include/goya/goya_packets.h" #include "../include/goya/goya.h" #include "../include/goya/goya_async_events.h" diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c index e7ac3046cfaa..3827ea4c02f7 100644 --- a/drivers/accel/habanalabs/goya/goya_coresight.c +++ b/drivers/accel/habanalabs/goya/goya_coresight.c @@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev, WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + val = RREG32(base_reg + 0x20); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + 0x304); val |= 0x1000; WREG32(base_reg + 0x304, val); @@ -371,13 +376,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, return false; } - if (hdev->mmu_enable) { - range_start = prop->dmmu.start_addr; - range_end = prop->dmmu.end_addr; - } else { - range_start = prop->dram_user_base_address; - range_end = prop->dram_end_address; - } + range_start = prop->dmmu.start_addr; + range_end = prop->dmmu.end_addr; return hl_mem_area_inside_range(addr, size, range_start, range_end); } @@ -391,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); @@ -571,7 +576,6 @@ static int goya_config_spmu(struct hl_device *hdev, struct hl_debug_params *params) { u64 base_reg; - struct hl_debug_params_spmu *input = params->input; u64 *output; u32 output_arr_len; u32 events_num; @@ -587,7 +591,7 @@ static int goya_config_spmu(struct hl_device *hdev, base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE; if (params->enable) { - input = params->input; + struct hl_debug_params_spmu *input = params->input; if (!input) return -EINVAL; diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h deleted file mode 100644 index d713252a4f13..000000000000 --- a/drivers/accel/habanalabs/include/common/cpucp_if.h +++ /dev/null @@ -1,1390 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2020-2022 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef CPUCP_IF_H -#define CPUCP_IF_H - -#include <linux/types.h> -#include <linux/if_ether.h> - -#include "hl_boot_if.h" - -#define NUM_HBM_PSEUDO_CH 2 -#define NUM_HBM_CH_PER_DEV 8 -#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 -#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 -#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 -#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 -#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 -#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 -#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 -#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 -#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 -#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 -#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 -#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 -#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 -#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 - -#define PLL_MAP_MAX_BITS 128 -#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) - -/* - * info of the pkt queue pointers in the first async occurrence - */ -struct cpucp_pkt_sync_err { - __le32 pi; - __le32 ci; -}; - -struct hl_eq_hbm_ecc_data { - /* SERR counter */ - __le32 sec_cnt; - /* DERR counter */ - __le32 dec_cnt; - /* Supplemental Information according to the mask bits */ - __le32 hbm_ecc_info; - /* Address in hbm where the ecc happened */ - __le32 first_addr; - /* SERR continuous address counter */ - __le32 sec_cont_cnt; - __le32 pad; -}; - -/* - * EVENT QUEUE - */ - -struct hl_eq_header { - __le32 reserved; - __le32 ctl; -}; - -struct hl_eq_ecc_data { - __le64 ecc_address; - __le64 ecc_syndrom; - __u8 memory_wrapper_idx; - __u8 is_critical; - __u8 pad[6]; -}; - -enum hl_sm_sei_cause { - SM_SEI_SO_OVERFLOW, - SM_SEI_LBW_4B_UNALIGNED, - SM_SEI_AXI_RESPONSE_ERR -}; - -struct hl_eq_sm_sei_data { - __le32 sei_log; - /* enum hl_sm_sei_cause */ - __u8 sei_cause; - __u8 pad[3]; -}; - -enum hl_fw_alive_severity { - FW_ALIVE_SEVERITY_MINOR, - FW_ALIVE_SEVERITY_CRITICAL -}; - -struct hl_eq_fw_alive { - __le64 uptime_seconds; - __le32 process_id; - __le32 thread_id; - /* enum hl_fw_alive_severity */ - __u8 severity; - __u8 pad[7]; -}; - -struct hl_eq_intr_cause { - __le64 intr_cause_data; -}; - -struct hl_eq_pcie_drain_ind_data { - struct hl_eq_intr_cause intr_cause; - __le64 drain_wr_addr_lbw; - __le64 drain_rd_addr_lbw; - __le64 drain_wr_addr_hbw; - __le64 drain_rd_addr_hbw; -}; - -struct hl_eq_razwi_lbw_info_regs { - __le32 rr_aw_razwi_reg; - __le32 rr_aw_razwi_id_reg; - __le32 rr_ar_razwi_reg; - __le32 rr_ar_razwi_id_reg; -}; - -struct hl_eq_razwi_hbw_info_regs { - __le32 rr_aw_razwi_hi_reg; - __le32 rr_aw_razwi_lo_reg; - __le32 rr_aw_razwi_id_reg; - __le32 rr_ar_razwi_hi_reg; - __le32 rr_ar_razwi_lo_reg; - __le32 rr_ar_razwi_id_reg; -}; - -/* razwi_happened masks */ -#define RAZWI_HAPPENED_HBW 0x1 -#define RAZWI_HAPPENED_LBW 0x2 -#define RAZWI_HAPPENED_AW 0x4 -#define RAZWI_HAPPENED_AR 0x8 - -struct hl_eq_razwi_info { - __le32 razwi_happened_mask; - union { - struct hl_eq_razwi_lbw_info_regs lbw; - struct hl_eq_razwi_hbw_info_regs hbw; - }; - __le32 pad; -}; - -struct hl_eq_razwi_with_intr_cause { - struct hl_eq_razwi_info razwi_info; - struct hl_eq_intr_cause intr_cause; -}; - -#define HBM_CA_ERR_CMD_LIFO_LEN 8 -#define HBM_RD_ERR_DATA_LIFO_LEN 8 -#define HBM_WR_PAR_CMD_LIFO_LEN 11 - -enum hl_hbm_sei_cause { - /* Command/address parity error event is split into 2 events due to - * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix - * for even HBM CK_t cycles - */ - HBM_SEI_CMD_PARITY_EVEN, - HBM_SEI_CMD_PARITY_ODD, - /* Read errors can be reflected as a combination of SERR/DERR/parity - * errors. Therefore, we define one event for all read error types. - * LKD will perform further proccessing. - */ - HBM_SEI_READ_ERR, - HBM_SEI_WRITE_DATA_PARITY_ERR, - HBM_SEI_CATTRIP, - HBM_SEI_MEM_BIST_FAIL, - HBM_SEI_DFI, - HBM_SEI_INV_TEMP_READ_OUT, - HBM_SEI_BIST_FAIL, -}; - -/* Masks for parsing hl_hbm_sei_headr fields */ -#define HBM_ECC_SERR_CNTR_MASK 0xFF -#define HBM_ECC_DERR_CNTR_MASK 0xFF00 -#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 - -/* HBM index and MC index are known by the event_id */ -struct hl_hbm_sei_header { - union { - /* relevant only in case of HBM read error */ - struct { - __u8 ecc_serr_cnt; - __u8 ecc_derr_cnt; - __u8 read_par_cnt; - __u8 reserved; - }; - /* All other cases */ - __le32 cnt; - }; - __u8 sei_cause; /* enum hl_hbm_sei_cause */ - __u8 mc_channel; /* range: 0-3 */ - __u8 mc_pseudo_channel; /* range: 0-7 */ - __u8 is_critical; -}; - -#define HBM_RD_ADDR_SID_SHIFT 0 -#define HBM_RD_ADDR_SID_MASK 0x1 -#define HBM_RD_ADDR_BG_SHIFT 1 -#define HBM_RD_ADDR_BG_MASK 0x6 -#define HBM_RD_ADDR_BA_SHIFT 3 -#define HBM_RD_ADDR_BA_MASK 0x18 -#define HBM_RD_ADDR_COL_SHIFT 5 -#define HBM_RD_ADDR_COL_MASK 0x7E0 -#define HBM_RD_ADDR_ROW_SHIFT 11 -#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 - -struct hbm_rd_addr { - union { - /* bit fields are only for FW use */ - struct { - u32 dbg_rd_err_addr_sid:1; - u32 dbg_rd_err_addr_bg:2; - u32 dbg_rd_err_addr_ba:2; - u32 dbg_rd_err_addr_col:6; - u32 dbg_rd_err_addr_row:15; - u32 reserved:6; - }; - __le32 rd_addr_val; - }; -}; - -#define HBM_RD_ERR_BEAT_SHIFT 2 -/* dbg_rd_err_misc fields: */ -/* Read parity is calculated per DW on every beat */ -#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 -#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 -#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 -#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 -/* ECC is calculated per PC on every beat */ -#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 -#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 -#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 -#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 - -struct hl_eq_hbm_sei_read_err_intr_info { - /* DFI_RD_ERR_REP_ADDR */ - struct hbm_rd_addr dbg_rd_err_addr; - /* DFI_RD_ERR_REP_ERR */ - union { - struct { - /* bit fields are only for FW use */ - u32 dbg_rd_err_par:8; - u32 dbg_rd_err_par_data:8; - u32 dbg_rd_err_serr:4; - u32 dbg_rd_err_derr:4; - u32 reserved:8; - }; - __le32 dbg_rd_err_misc; - }; - /* DFI_RD_ERR_REP_DM */ - __le32 dbg_rd_err_dm; - /* DFI_RD_ERR_REP_SYNDROME */ - __le32 dbg_rd_err_syndrome; - /* DFI_RD_ERR_REP_DATA */ - __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; -}; - -struct hl_eq_hbm_sei_ca_par_intr_info { - /* 14 LSBs */ - __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; - /* 18 LSBs */ - __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; -}; - -#define WR_PAR_LAST_CMD_COL_SHIFT 0 -#define WR_PAR_LAST_CMD_COL_MASK 0x3F -#define WR_PAR_LAST_CMD_BG_SHIFT 6 -#define WR_PAR_LAST_CMD_BG_MASK 0xC0 -#define WR_PAR_LAST_CMD_BA_SHIFT 8 -#define WR_PAR_LAST_CMD_BA_MASK 0x300 -#define WR_PAR_LAST_CMD_SID_SHIFT 10 -#define WR_PAR_LAST_CMD_SID_MASK 0x400 - -/* Row address isn't latched */ -struct hbm_sei_wr_cmd_address { - /* DFI_DERR_LAST_CMD */ - union { - struct { - /* bit fields are only for FW use */ - u32 col:6; - u32 bg:2; - u32 ba:2; - u32 sid:1; - u32 reserved:21; - }; - __le32 dbg_wr_cmd_addr; - }; -}; - -struct hl_eq_hbm_sei_wr_par_intr_info { - /* entry 0: WR command address from the 1st cycle prior to the error - * entry 1: WR command address from the 2nd cycle prior to the error - * and so on... - */ - struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; - /* derr[0:1] - 1st HBM cycle DERR output - * derr[2:3] - 2nd HBM cycle DERR output - */ - __u8 dbg_derr; - /* extend to reach 8B */ - __u8 pad[3]; -}; - -/* - * this struct represents the following sei causes: - * command parity, ECC double error, ECC single error, dfi error, cattrip, - * temperature read-out, read parity error and write parity error. - * some only use the header while some have extra data. - */ -struct hl_eq_hbm_sei_data { - struct hl_hbm_sei_header hdr; - union { - struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; - struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; - struct hl_eq_hbm_sei_read_err_intr_info read_err_info; - struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; - }; -}; - -/* Engine/farm arc interrupt type */ -enum hl_engine_arc_interrupt_type { - /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ - ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 -}; - -/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ -struct hl_engine_arc_dccm_queue_full_irq { - /* Queue index value which caused DCCM QUEUE FULL */ - __le32 queue_index; - __le32 pad; -}; - -/* Data structure specifies details of QM/FARM ARC interrupt */ -struct hl_eq_engine_arc_intr_data { - /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ - __le32 engine_id; - __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ - /* More info related to the interrupt e.g. queue index - * incase of DCCM_QUEUE_FULL interrupt. - */ - __le64 payload; - __le64 pad[5]; -}; - -#define ADDR_DEC_ADDRESS_COUNT_MAX 4 - -/* Data structure specifies details of ADDR_DEC interrupt */ -struct hl_eq_addr_dec_intr_data { - struct hl_eq_intr_cause intr_cause; - __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; - __u8 addr_cnt; - __u8 pad[7]; -}; - -struct hl_eq_entry { - struct hl_eq_header hdr; - union { - struct hl_eq_ecc_data ecc_data; - struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Gaudi1 HBM */ - struct hl_eq_sm_sei_data sm_sei_data; - struct cpucp_pkt_sync_err pkt_sync_err; - struct hl_eq_fw_alive fw_alive; - struct hl_eq_intr_cause intr_cause; - struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; - struct hl_eq_razwi_info razwi_info; - struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; - struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ - struct hl_eq_engine_arc_intr_data arc_data; - struct hl_eq_addr_dec_intr_data addr_dec; - __le64 data[7]; - }; -}; - -#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) - -#define EQ_CTL_READY_SHIFT 31 -#define EQ_CTL_READY_MASK 0x80000000 - -#define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 - -#define EQ_CTL_INDEX_SHIFT 0 -#define EQ_CTL_INDEX_MASK 0x0000FFFF - -enum pq_init_status { - PQ_INIT_STATUS_NA = 0, - PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, - PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, - PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR -}; - -/* - * CpuCP Primary Queue Packets - * - * During normal operation, the host's kernel driver needs to send various - * messages to CpuCP, usually either to SET some value into a H/W periphery or - * to GET the current value of some H/W periphery. For example, SET the - * frequency of MME/TPC and GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by the - * host's driver itself, e.g. power management code. In either case, the - * communication from the host's driver to CpuCP will *always* be in - * synchronous mode, meaning that the host will send a single message and poll - * until the message was acknowledged and the results are ready (if results are - * needed). - * - * This means that only a single message can be sent at a time and the host's - * driver must wait for its result before sending the next message. Having said - * that, because these are control messages which are sent in a relatively low - * frequency, this limitation seems acceptable. It's important to note that - * in case of multiple devices, messages to different devices *can* be sent - * at the same time. - * - * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by the host's driver. - * During device initialization phase, the host will pass to CpuCP that address. - * Most of the message types will contain inputs/outputs inside the message - * itself. The common part of each message will contain the opcode of the - * message (its type) and a field representing a fence object. - * - * When the host's driver wishes to send a message to CPU CP, it will write the - * message contents to the device DDR, clear the fence object and then write to - * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. - * - * Upon receiving the interrupt (#121), CpuCP will read the message from the - * DDR. In case the message is a SET operation, CpuCP will first perform the - * operation and then write to the fence object on the device DDR. In case the - * message is a GET operation, CpuCP will first fill the results section on the - * device DDR and then write to the fence object. If an error occurred, CpuCP - * will fill the rc field with the right error code. - * - * In the meantime, the host's driver will poll on the fence object. Once the - * host sees that the fence object is signaled, it will read the results from - * the device DDR (if relevant) and resume the code execution in the host's - * driver. - * - * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the host's driver matches the value read by CpuCP - * - * Non-QMAN packets should be limited to values 1 through (2^8 - 1) - * - * Detailed description: - * - * CPUCP_PACKET_DISABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU must NOT issue PCI - * transactions (read/write) towards the Host CPU. This also include - * sending MSI-X interrupts. - * This packet is usually sent before the device is moved to D3Hot state. - * - * CPUCP_PACKET_ENABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU is allowed to issue PCI - * transactions towards the Host CPU, including sending MSI-X interrupts. - * This packet is usually send after the device is moved to D0 state. - * - * CPUCP_PACKET_TEMPERATURE_GET - - * Fetch the current temperature / Max / Max Hyst / Critical / - * Critical Hyst of a specified thermal sensor. The packet's - * arguments specify the desired sensor and the field to get. - * - * CPUCP_PACKET_VOLTAGE_GET - - * Fetch the voltage / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_CURRENT_GET - - * Fetch the current / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_FAN_SPEED_GET - - * Fetch the speed / Max / Min of a specified fan. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_PWM_GET - - * Fetch the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_PWM_SET - - * Set the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor, type and value. - * - * CPUCP_PACKET_FREQUENCY_SET - - * Set the frequency of a specified PLL. The packet's arguments specify - * the PLL and the desired frequency. The actual frequency in the device - * might differ from the requested frequency. - * - * CPUCP_PACKET_FREQUENCY_GET - - * Fetch the frequency of a specified PLL. The packet's arguments specify - * the PLL. - * - * CPUCP_PACKET_LED_SET - - * Set the state of a specified led. The packet's arguments - * specify the led and the desired state. - * - * CPUCP_PACKET_I2C_WR - - * Write 32-bit value to I2C device. The packet's arguments specify the - * I2C bus, address and value. - * - * CPUCP_PACKET_I2C_RD - - * Read 32-bit value from I2C device. The packet's arguments specify the - * I2C bus and address. - * - * CPUCP_PACKET_INFO_GET - - * Fetch information from the device as specified in the packet's - * structure. The host's driver passes the max size it allows the CpuCP to - * write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed - * - * CPUCP_PACKET_UNMASK_RAZWI_IRQ - - * Unmask the given IRQ. The IRQ number is specified in the value field. - * The packet is sent after receiving an interrupt and printing its - * relevant information. - * - * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - - * Unmask the given IRQs. The IRQs numbers are specified in an array right - * after the cpucp_packet structure, where its first element is the array - * length. The packet is sent after a soft reset was done in order to - * handle any interrupts that were sent during the reset process. - * - * CPUCP_PACKET_TEST - - * Test packet for CpuCP connectivity. The CPU will put the fence value - * in the result field. - * - * CPUCP_PACKET_FREQUENCY_CURR_GET - - * Fetch the current frequency of a specified PLL. The packet's arguments - * specify the PLL. - * - * CPUCP_PACKET_MAX_POWER_GET - - * Fetch the maximal power of the device. - * - * CPUCP_PACKET_MAX_POWER_SET - - * Set the maximal power of the device. The packet's arguments specify - * the power. - * - * CPUCP_PACKET_EEPROM_DATA_GET - - * Get EEPROM data from the CpuCP kernel. The buffer is specified in the - * addr field. The CPU will put the returned data size in the result - * field. In addition, the host's driver passes the max size it allows the - * CpuCP to write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * CPUCP_PACKET_NIC_INFO_GET - - * Fetch information from the device regarding the NIC. the host's driver - * passes the max size it allows the CpuCP to write to the structure, to - * prevent data corruption in case of mismatched driver/FW versions. - * - * CPUCP_PACKET_TEMPERATURE_SET - - * Set the value of the offset property of a specified thermal sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_VOLTAGE_SET - - * Trigger the reset_history property of a specified voltage sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_CURRENT_SET - - * Trigger the reset_history property of a specified current sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_PCIE_THROUGHPUT_GET - - * Get throughput of PCIe. - * The packet's arguments specify the transaction direction (TX/RX). - * The window measurement is 10[msec], and the return value is in KB/sec. - * - * CPUCP_PACKET_PCIE_REPLAY_CNT_GET - * Replay count measures number of "replay" events, which is basicly - * number of retries done by PCIe. - * - * CPUCP_PACKET_TOTAL_ENERGY_GET - - * Total Energy is measurement of energy from the time FW Linux - * is loaded. It is calculated by multiplying the average power - * by time (passed from armcp start). The units are in MilliJouls. - * - * CPUCP_PACKET_PLL_INFO_GET - - * Fetch frequencies of PLL from the required PLL IP. - * The packet's arguments specify the device PLL type - * Pll type is the PLL from device pll_index enum. - * The result is composed of 4 outputs, each is 16-bit - * frequency in MHz. - * - * CPUCP_PACKET_POWER_GET - - * Fetch the present power consumption of the device (Current * Voltage). - * - * CPUCP_PACKET_NIC_PFC_SET - - * Enable/Disable the NIC PFC feature. The packet's arguments specify the - * NIC port, relevant lanes to configure and one bit indication for - * enable/disable. - * - * CPUCP_PACKET_NIC_FAULT_GET - - * Fetch the current indication for local/remote faults from the NIC MAC. - * The result is 32-bit value of the relevant register. - * - * CPUCP_PACKET_NIC_LPBK_SET - - * Enable/Disable the MAC loopback feature. The packet's arguments specify - * the NIC port, relevant lanes to configure and one bit indication for - * enable/disable. - * - * CPUCP_PACKET_NIC_MAC_INIT - - * Configure the NIC MAC channels. The packet's arguments specify the - * NIC port and the speed. - * - * CPUCP_PACKET_MSI_INFO_SET - - * set the index number for each supported msi type going from - * host to device - * - * CPUCP_PACKET_NIC_XPCS91_REGS_GET - - * Fetch the un/correctable counters values from the NIC MAC. - * - * CPUCP_PACKET_NIC_STAT_REGS_GET - - * Fetch various NIC MAC counters from the NIC STAT. - * - * CPUCP_PACKET_NIC_STAT_REGS_CLR - - * Clear the various NIC MAC counters in the NIC STAT. - * - * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - - * Fetch all NIC MAC counters from the NIC STAT. - * - * CPUCP_PACKET_IS_IDLE_CHECK - - * Check if the device is IDLE in regard to the DMA/compute engines - * and QMANs. The f/w will return a bitmask where each bit represents - * a different engine or QMAN according to enum cpucp_idle_mask. - * The bit will be 1 if the engine is NOT idle. - * - * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - - * Fetch all HBM replaced-rows and prending to be replaced rows data. - * - * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - - * Fetch status of HBM rows pending replacement and need a reboot to - * be replaced. - * - * CPUCP_PACKET_POWER_SET - - * Resets power history of device to 0 - * - * CPUCP_PACKET_ENGINE_CORE_ASID_SET - - * Packet to perform engine core ASID configuration - * - * CPUCP_PACKET_SEC_ATTEST_GET - - * Get the attestaion data that is collected during various stages of the - * boot sequence. the attestation data is also hashed with some unique - * number (nonce) provided by the host to prevent replay attacks. - * public key and certificate also provided as part of the FW response. - * - * CPUCP_PACKET_MONITOR_DUMP_GET - - * Get monitors registers dump from the CpuCP kernel. - * The CPU will put the registers dump in the a buffer allocated by the driver - * which address is passed via the CpuCp packet. In addition, the host's driver - * passes the max size it allows the CpuCP to write to the structure, to prevent - * data corruption in case of mismatched driver/FW versions. - * Relevant only to Gaudi. - * - * CPUCP_PACKET_GENERIC_PASSTHROUGH - - * Generic opcode for all firmware info that is only passed to host - * through the LKD, without getting parsed there. - * - * CPUCP_PACKET_ACTIVE_STATUS_SET - - * LKD sends FW indication whether device is free or in use, this indication is reported - * also to the BMC. - */ - -enum cpucp_packet_id { - CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ - CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ - CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ - CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ - CPUCP_PACKET_CURRENT_GET, /* sysfs */ - CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ - CPUCP_PACKET_PWM_GET, /* sysfs */ - CPUCP_PACKET_PWM_SET, /* sysfs */ - CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ - CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ - CPUCP_PACKET_LED_SET, /* debugfs */ - CPUCP_PACKET_I2C_WR, /* debugfs */ - CPUCP_PACKET_I2C_RD, /* debugfs */ - CPUCP_PACKET_INFO_GET, /* IOCTL */ - CPUCP_PACKET_FLASH_PROGRAM_REMOVED, - CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ - CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ - CPUCP_PACKET_TEST, /* internal */ - CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ - CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ - CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ - CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ - CPUCP_PACKET_NIC_INFO_GET, /* internal */ - CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ - CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ - CPUCP_PACKET_CURRENT_SET, /* sysfs */ - CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ - CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ - CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ - CPUCP_PACKET_PLL_INFO_GET, /* internal */ - CPUCP_PACKET_NIC_STATUS, /* internal */ - CPUCP_PACKET_POWER_GET, /* internal */ - CPUCP_PACKET_NIC_PFC_SET, /* internal */ - CPUCP_PACKET_NIC_FAULT_GET, /* internal */ - CPUCP_PACKET_NIC_LPBK_SET, /* internal */ - CPUCP_PACKET_NIC_MAC_CFG, /* internal */ - CPUCP_PACKET_MSI_INFO_SET, /* internal */ - CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ - CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ - CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ - CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ - CPUCP_PACKET_POWER_SET, /* internal */ - CPUCP_PACKET_RESERVED, /* not used */ - CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ - CPUCP_PACKET_RESERVED2, /* not used */ - CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ - CPUCP_PACKET_RESERVED3, /* not used */ - CPUCP_PACKET_RESERVED4, /* not used */ - CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ - CPUCP_PACKET_RESERVED5, /* not used */ - CPUCP_PACKET_RESERVED6, /* not used */ - CPUCP_PACKET_RESERVED7, /* not used */ - CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ - CPUCP_PACKET_RESERVED8, /* not used */ - CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ - CPUCP_PACKET_RESERVED9, /* not used */ - CPUCP_PACKET_RESERVED10, /* not used */ - CPUCP_PACKET_RESERVED11, /* not used */ - CPUCP_PACKET_ID_MAX /* must be last */ -}; - -#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 - -#define CPUCP_PKT_CTL_RC_SHIFT 12 -#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 - -#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 -#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 - -#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 -#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull -#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 -#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull -#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 -#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull -#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 -#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull - -#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 -#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull -#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 -#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull - -#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull - -#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull - -#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull - -/* heartbeat status bits */ -#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 -#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 - -struct cpucp_packet { - union { - __le64 value; /* For SET packets */ - __le64 result; /* For GET packets */ - __le64 addr; /* For PQ */ - }; - - __le32 ctl; - - __le32 fence; /* Signal to host that message is completed */ - - union { - struct {/* For temperature/current/voltage/fan/pwm get/set */ - __le16 sensor_index; - __le16 type; - }; - - struct { /* For I2C read/write */ - __u8 i2c_bus; - __u8 i2c_addr; - __u8 i2c_reg; - /* - * In legacy implemetations, i2c_len was not present, - * was unused and just added as pad. - * So if i2c_len is 0, it is treated as legacy - * and r/w 1 Byte, else if i2c_len is specified, - * its treated as new multibyte r/w support. - */ - __u8 i2c_len; - }; - - struct {/* For PLL info fetch */ - __le16 pll_type; - /* TODO pll_reg is kept temporary before removal */ - __le16 pll_reg; - }; - - /* For any general request */ - __le32 index; - - /* For frequency get/set */ - __le32 pll_index; - - /* For led set */ - __le32 led_index; - - /* For get CpuCP info/EEPROM data/NIC info */ - __le32 data_max_size; - - /* - * For any general status bitmask. Shall be used whenever the - * result cannot be used to hold general purpose data. - */ - __le32 status_mask; - - /* random, used once number, for security packets */ - __le32 nonce; - }; - - union { - /* For NIC requests */ - __le32 port_index; - - /* For Generic packet sub index */ - __le32 pkt_subidx; - }; -}; - -struct cpucp_unmask_irq_arr_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 irqs[]; -}; - -struct cpucp_nic_status_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 data[]; -}; - -struct cpucp_array_data_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 data[]; -}; - -enum cpucp_led_index { - CPUCP_LED0_INDEX = 0, - CPUCP_LED1_INDEX, - CPUCP_LED2_INDEX -}; - -/* - * enum cpucp_packet_rc - Error return code - * @cpucp_packet_success -> in case of success. - * @cpucp_packet_invalid -> this is to support Goya and Gaudi platform. - * @cpucp_packet_fault -> in case of processing error like failing to - * get device binding or semaphore etc. - * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is - * supported Greco onwards. - * @cpucp_packet_invalid_params -> when checking parameter like length of buffer - * or attribute value etc. Supported Greco onwards. - * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. - */ -enum cpucp_packet_rc { - cpucp_packet_success, - cpucp_packet_invalid, - cpucp_packet_fault, - cpucp_packet_invalid_pkt, - cpucp_packet_invalid_params, - cpucp_packet_rc_max -}; - -/* - * cpucp_temp_type should adhere to hwmon_temp_attributes - * defined in Linux kernel hwmon.h file - */ -enum cpucp_temp_type { - cpucp_temp_input, - cpucp_temp_min = 4, - cpucp_temp_min_hyst, - cpucp_temp_max = 6, - cpucp_temp_max_hyst, - cpucp_temp_crit, - cpucp_temp_crit_hyst, - cpucp_temp_offset = 19, - cpucp_temp_lowest = 21, - cpucp_temp_highest = 22, - cpucp_temp_reset_history = 23, - cpucp_temp_warn = 24, - cpucp_temp_max_crit = 25, - cpucp_temp_max_warn = 26, -}; - -enum cpucp_in_attributes { - cpucp_in_input, - cpucp_in_min, - cpucp_in_max, - cpucp_in_lowest = 6, - cpucp_in_highest = 7, - cpucp_in_reset_history, - cpucp_in_intr_alarm_a, - cpucp_in_intr_alarm_b, -}; - -enum cpucp_curr_attributes { - cpucp_curr_input, - cpucp_curr_min, - cpucp_curr_max, - cpucp_curr_lowest = 6, - cpucp_curr_highest = 7, - cpucp_curr_reset_history -}; - -enum cpucp_fan_attributes { - cpucp_fan_input, - cpucp_fan_min = 2, - cpucp_fan_max -}; - -enum cpucp_pwm_attributes { - cpucp_pwm_input, - cpucp_pwm_enable -}; - -enum cpucp_pcie_throughput_attributes { - cpucp_pcie_throughput_tx, - cpucp_pcie_throughput_rx -}; - -/* TODO temporary kept before removal */ -enum cpucp_pll_reg_attributes { - cpucp_pll_nr_reg, - cpucp_pll_nf_reg, - cpucp_pll_od_reg, - cpucp_pll_div_factor_reg, - cpucp_pll_div_sel_reg -}; - -/* TODO temporary kept before removal */ -enum cpucp_pll_type_attributes { - cpucp_pll_cpu, - cpucp_pll_pci, -}; - -/* - * cpucp_power_type aligns with hwmon_power_attributes - * defined in Linux kernel hwmon.h file - */ -enum cpucp_power_type { - CPUCP_POWER_INPUT = 8, - CPUCP_POWER_INPUT_HIGHEST = 9, - CPUCP_POWER_RESET_INPUT_HISTORY = 11 -}; - -/* - * MSI type enumeration table for all ASICs and future SW versions. - * For future ASIC-LKD compatibility, we can only add new enumerations. - * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). - * Changing the order of entries or removing entries is not allowed. - */ -enum cpucp_msi_type { - CPUCP_EVENT_QUEUE_MSI_TYPE, - CPUCP_NIC_PORT1_MSI_TYPE, - CPUCP_NIC_PORT3_MSI_TYPE, - CPUCP_NIC_PORT5_MSI_TYPE, - CPUCP_NIC_PORT7_MSI_TYPE, - CPUCP_NIC_PORT9_MSI_TYPE, - CPUCP_NUM_OF_MSI_TYPES -}; - -/* - * PLL enumeration table used for all ASICs and future SW versions. - * For future ASIC-LKD compatibility, we can only add new enumerations. - * at the end of the table. - * Changing the order of entries or removing entries is not allowed. - */ -enum pll_index { - CPU_PLL = 0, - PCI_PLL = 1, - NIC_PLL = 2, - DMA_PLL = 3, - MESH_PLL = 4, - MME_PLL = 5, - TPC_PLL = 6, - IF_PLL = 7, - SRAM_PLL = 8, - NS_PLL = 9, - HBM_PLL = 10, - MSS_PLL = 11, - DDR_PLL = 12, - VID_PLL = 13, - BANK_PLL = 14, - MMU_PLL = 15, - IC_PLL = 16, - MC_PLL = 17, - EMMC_PLL = 18, - D2D_PLL = 19, - CS_PLL = 20, - C2C_PLL = 21, - NCH_PLL = 22, - C2M_PLL = 23, - PLL_MAX -}; - -enum rl_index { - TPC_RL = 0, - MME_RL, - EDMA_RL, -}; - -enum pvt_index { - PVT_SW, - PVT_SE, - PVT_NW, - PVT_NE -}; - -/* Event Queue Packets */ - -struct eq_generic_event { - __le64 data[7]; -}; - -/* - * CpuCP info - */ - -#define CARD_NAME_MAX_LEN 16 -#define CPUCP_MAX_SENSORS 128 -#define CPUCP_MAX_NICS 128 -#define CPUCP_LANES_PER_NIC 4 -#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 -#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) -#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) -#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) -#define CPUCP_HBM_ROW_REPLACE_MAX 32 - -struct cpucp_sensor { - __le32 type; - __le32 flags; -}; - -/** - * struct cpucp_card_types - ASIC card type. - * @cpucp_card_type_pci: PCI card. - * @cpucp_card_type_pmc: PCI Mezzanine Card. - */ -enum cpucp_card_types { - cpucp_card_type_pci, - cpucp_card_type_pmc -}; - -#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 -#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 - -#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 -#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 - -#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 -#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 - -/** - * struct cpucp_security_info - Security information. - * @config: configuration bit field - * @keys_num: number of stored keys - * @revoked_keys: revoked keys bit field - * @min_svn: minimal security version - */ -struct cpucp_security_info { - __u8 config; - __u8 keys_num; - __u8 revoked_keys; - __u8 min_svn; -}; - -/** - * struct cpucp_info - Info from CpuCP that is necessary to the host's driver - * @sensors: available sensors description. - * @kernel_version: CpuCP linux kernel version. - * @reserved: reserved field. - * @card_type: card configuration type. - * @card_location: in a server, each card has different connections topology - * depending on its location (relevant for PMC card type) - * @cpld_version: CPLD programmed F/W version. - * @infineon_version: Infineon main DC-DC version. - * @fuse_version: silicon production FUSE information. - * @thermal_version: thermald S/W version. - * @cpucp_version: CpuCP S/W version. - * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. - * @dram_size: available DRAM size. - * @card_name: card name that will be displayed in HWMON subsystem on the host - * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance - * (0 = functional, 1 = binned) - * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance - * (0 = functional, 1 = binned), maximum 1 per dcore - * @sram_binning: Categorize SRAM functionality - * (0 = fully functional, 1 = lower-half is not functional, - * 2 = upper-half is not functional) - * @sec_info: security information - * @pll_map: Bit map of supported PLLs for current ASIC version. - * @mme_binning_mask: MME binning mask, - * bits [0:6] <==> dcore0 mme fma - * bits [7:13] <==> dcore1 mme fma - * bits [14:20] <==> dcore0 mme ima - * bits [21:27] <==> dcore1 mme ima - * For each group, if the 6th bit is set then first 5 bits - * represent the col's idx [0-31], otherwise these bits are - * ignored, and col idx 32 is binned. 7th bit is don't care. - * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance - * (0 = functional 1 = binned) - * @memory_repair_flag: eFuse flag indicating memory repair - * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance - * (0 = functional 1 = binned) - * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance - * (0 = functional 1 = binned) - * @interposer_version: Interposer version programmed in eFuse - * @substrate_version: Substrate version programmed in eFuse - * @fw_os_version: Firmware OS Version - */ -struct cpucp_info { - struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; - __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; - __le32 card_type; - __le32 card_location; - __le32 cpld_version; - __le32 infineon_version; - __u8 fuse_version[VERSION_MAX_LEN]; - __u8 thermal_version[VERSION_MAX_LEN]; - __u8 cpucp_version[VERSION_MAX_LEN]; - __le32 infineon_second_stage_version; - __le64 dram_size; - char card_name[CARD_NAME_MAX_LEN]; - __le64 tpc_binning_mask; - __le64 decoder_binning_mask; - __u8 sram_binning; - __u8 dram_binning_mask; - __u8 memory_repair_flag; - __u8 edma_binning_mask; - __u8 xbar_binning_mask; - __u8 interposer_version; - __u8 substrate_version; - __u8 reserved2; - struct cpucp_security_info sec_info; - __le32 reserved3; - __u8 pll_map[PLL_MAP_LEN]; - __le64 mme_binning_mask; - __u8 fw_os_version[VERSION_MAX_LEN]; -}; - -struct cpucp_mac_addr { - __u8 mac_addr[ETH_ALEN]; -}; - -enum cpucp_serdes_type { - TYPE_1_SERDES_TYPE, - TYPE_2_SERDES_TYPE, - HLS1_SERDES_TYPE, - HLS1H_SERDES_TYPE, - HLS2_SERDES_TYPE, - HLS2_TYPE_1_SERDES_TYPE, - MAX_NUM_SERDES_TYPE, /* number of types */ - UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ -}; - -struct cpucp_nic_info { - struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; - __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; - __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; - __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; - __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; - __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; - __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; - __le16 serdes_type; /* enum cpucp_serdes_type */ - __le16 tx_swap_map[CPUCP_MAX_NICS]; - __u8 reserved[6]; -}; - -#define PAGE_DISCARD_MAX 64 - -struct page_discard_info { - __u8 num_entries; - __u8 reserved[7]; - __le32 mmu_page_idx[PAGE_DISCARD_MAX]; -}; - -/* - * struct frac_val - fracture value represented by "integer.frac". - * @integer: the integer part of the fracture value; - * @frac: the fracture part of the fracture value. - */ -struct frac_val { - union { - struct { - __le16 integer; - __le16 frac; - }; - __le32 val; - }; -}; - -/* - * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". - * @integer: the integer part of the SER value; - * @exp: the exponent part of the SER value. - */ -struct ser_val { - __le16 integer; - __le16 exp; -}; - -/* - * struct cpucp_nic_status - describes the status of a NIC port. - * @port: NIC port index. - * @bad_format_cnt: e.g. CRC. - * @responder_out_of_sequence_psn_cnt: e.g NAK. - * @high_ber_reinit_cnt: link reinit due to high BER. - * @correctable_err_cnt: e.g. bit-flip. - * @uncorrectable_err_cnt: e.g. MAC errors. - * @retraining_cnt: re-training counter. - * @up: is port up. - * @pcs_link: has PCS link. - * @phy_ready: is PHY ready. - * @auto_neg: is Autoneg enabled. - * @timeout_retransmission_cnt: timeout retransmission events. - * @high_ber_cnt: high ber events. - * @pre_fec_ser: pre FEC SER value. - * @post_fec_ser: post FEC SER value. - * @throughput: measured throughput. - * @latency: measured latency. - */ -struct cpucp_nic_status { - __le32 port; - __le32 bad_format_cnt; - __le32 responder_out_of_sequence_psn_cnt; - __le32 high_ber_reinit; - __le32 correctable_err_cnt; - __le32 uncorrectable_err_cnt; - __le32 retraining_cnt; - __u8 up; - __u8 pcs_link; - __u8 phy_ready; - __u8 auto_neg; - __le32 timeout_retransmission_cnt; - __le32 high_ber_cnt; - struct ser_val pre_fec_ser; - struct ser_val post_fec_ser; - struct frac_val bandwidth; - struct frac_val lat; -}; - -enum cpucp_hbm_row_replace_cause { - REPLACE_CAUSE_DOUBLE_ECC_ERR, - REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, -}; - -struct cpucp_hbm_row_info { - __u8 hbm_idx; - __u8 pc; - __u8 sid; - __u8 bank_idx; - __le16 row_addr; - __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ - __u8 pad; -}; - -struct cpucp_hbm_row_replaced_rows_info { - __le16 num_replaced_rows; - __u8 pad[6]; - struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; -}; - -enum cpu_reset_status { - CPU_RST_STATUS_NA = 0, - CPU_RST_STATUS_SOFT_RST_DONE = 1, -}; - -#define SEC_PCR_DATA_BUF_SZ 256 -#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ -#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ - -/* - * struct cpucp_sec_attest_info - attestation report of the boot - * @pcr_data: raw values of the PCR registers - * @pcr_num_reg: number of PCR registers in the pcr_data array - * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) - * @nonce: number only used once. random number provided by host. this also - * passed to the quote command as a qualifying data. - * @pcr_quote_len: length of the attestation quote data (bytes) - * @pcr_quote: attestation report data structure - * @quote_sig_len: length of the attestation report signature (bytes) - * @quote_sig: signature structure of the attestation report - * @pub_data_len: length of the public data (bytes) - * @public_data: public key for the signed attestation - * (outPublic + name + qualifiedName) - * @certificate_len: length of the certificate (bytes) - * @certificate: certificate for the attestation signing key - */ -struct cpucp_sec_attest_info { - __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; - __u8 pcr_num_reg; - __u8 pcr_reg_len; - __le16 pad0; - __le32 nonce; - __le16 pcr_quote_len; - __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; - __u8 quote_sig_len; - __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; - __le16 pub_data_len; - __u8 public_data[SEC_PUB_DATA_BUF_SZ]; - __le16 certificate_len; - __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; -}; - -/* - * struct cpucp_dev_info_signed - device information signed by a secured device - * @info: device information structure as defined above - * @nonce: number only used once. random number provided by host. this number is - * hashed and signed along with the device information. - * @info_sig_len: length of the attestation signature (bytes) - * @info_sig: signature of the info + nonce data. - * @pub_data_len: length of the public data (bytes) - * @public_data: public key info signed info data - * (outPublic + name + qualifiedName) - * @certificate_len: length of the certificate (bytes) - * @certificate: certificate for the signing key - */ -struct cpucp_dev_info_signed { - struct cpucp_info info; /* assumed to be 64bit aligned */ - __le32 nonce; - __le32 pad0; - __u8 info_sig_len; - __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; - __le16 pub_data_len; - __u8 public_data[SEC_PUB_DATA_BUF_SZ]; - __le16 certificate_len; - __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; -}; - -#define DCORE_MON_REGS_SZ 512 -/* - * struct dcore_monitor_regs_data - DCORE monitor regs data. - * the structure follows sync manager block layout. relevant only to Gaudi. - * @mon_pay_addrl: array of payload address low bits. - * @mon_pay_addrh: array of payload address high bits. - * @mon_pay_data: array of payload data. - * @mon_arm: array of monitor arm. - * @mon_status: array of monitor status. - */ -struct dcore_monitor_regs_data { - __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; - __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; - __le32 mon_pay_data[DCORE_MON_REGS_SZ]; - __le32 mon_arm[DCORE_MON_REGS_SZ]; - __le32 mon_status[DCORE_MON_REGS_SZ]; -}; - -/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */ -struct cpucp_monitor_dump { - struct dcore_monitor_regs_data sync_mngr_w_s; - struct dcore_monitor_regs_data sync_mngr_e_s; - struct dcore_monitor_regs_data sync_mngr_w_n; - struct dcore_monitor_regs_data sync_mngr_e_n; -}; - -/* - * The Type of the generic request (and other input arguments) will be fetched from user by reading - * from "pkt_subidx" field in struct cpucp_packet. - * - * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. - */ -enum hl_passthrough_type { - HL_PASSTHROUGH_VERSIONS, -}; - -#endif /* CPUCP_IF_H */ diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h deleted file mode 100644 index 2256add057c5..000000000000 --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h +++ /dev/null @@ -1,835 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2018-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef HL_BOOT_IF_H -#define HL_BOOT_IF_H - -#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ -#define HL_POWER9_HOST_MAGIC 0x1DA30009 - -#define BOOT_FIT_SRAM_OFFSET 0x200000 - -#define VERSION_MAX_LEN 128 - -enum cpu_boot_err { - CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, - CPU_BOOT_ERR_FIT_CORRUPTED = 1, - CPU_BOOT_ERR_TS_INIT_FAIL = 2, - CPU_BOOT_ERR_DRAM_SKIPPED = 3, - CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, - CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, - CPU_BOOT_ERR_NIC_FW_FAIL = 6, - CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, - CPU_BOOT_ERR_SECURITY_FAIL = 8, - CPU_BOOT_ERR_EFUSE_FAIL = 9, - CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, - CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, - CPU_BOOT_ERR_PLL_FAIL = 12, - CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, - CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, - CPU_BOOT_ERR_BINNING_FAIL = 19, - CPU_BOOT_ERR_TPM_FAIL = 20, - CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, - CPU_BOOT_ERR_EEPROM_FAIL = 22, - CPU_BOOT_ERR_ENABLED = 31, - CPU_BOOT_ERR_SCND_EN = 63, - CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ -}; - -/* - * Mask for fatal failures - * This mask contains all possible fatal failures, and a dynamic code - * will clear the non-relevant ones. - */ -#define CPU_BOOT_ERR_FATAL_MASK \ - ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ - (1 << CPU_BOOT_ERR_PLL_FAIL) | \ - (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ - (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ - (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ - (1 << CPU_BOOT_ERR_EEPROM_FAIL)) - -/* - * CPU error bits in BOOT_ERROR registers - * - * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. - * DRAM is not reliable to use. - * - * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the - * image provided by the host has failed. - * - * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. - * Boot continues as usual, but keep in - * mind this is a warning. - * - * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. - * Skipping DRAM initialization has been - * requested (e.g. strap, command, etc.) - * and FW skipped the DRAM initialization. - * Host can initialize the DRAM. - * - * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. - * Meaning the BMC data might not be - * available until reset. - * - * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. - * BMC has not provided the NIC data yet. - * Once provided this bit will be cleared. - * - * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. - * The NIC FW loading and initialization - * failed. This means NICs are not usable. - * - * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been - * started, but is not ready yet - chip - * cannot be accessed. - * - * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. - * The tasks are security init (root of - * trust), boot authentication (chain of - * trust), data packets authentication. - * - * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. - * The PCI device ID might be wrong. - * - * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. - * It mean that ppboot checksum - * verification for the preboot primary - * image has failed to match expected - * checksum. Trying to program image again - * might solve this. - * - * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. - * It mean that ppboot checksum - * verification for the preboot secondary - * image has failed to match expected - * checksum. Trying to program image again - * might solve this. - * - * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one - * of the PLLs remains in REF_CLK - * - * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support - * should be contacted. - * - * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during - * the execution of ppboot or preboot. - * for example: stack overflow. - * - * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning - * malfunctioning components might still be - * in use. - * - * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. - * - * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature - * sensor. - * - * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults - * are used. - * - * CPU_BOOT_ERR0_ENABLED Error registers enabled. - * This is a main indication that the - * running FW populates the error - * registers. Meaning the error bits are - * not garbage, but actual error statuses. - */ -#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) -#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) -#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) -#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) -#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) -#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) -#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) -#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) -#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) -#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) -#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) -#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) -#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) -#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) -#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) -#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) -#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) -#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) -#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) -#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) -#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) - -enum cpu_boot_dev_sts { - CPU_BOOT_DEV_STS_SECURITY_EN = 0, - CPU_BOOT_DEV_STS_DEBUG_EN = 1, - CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, - CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, - CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, - CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, - CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, - CPU_BOOT_DEV_STS_RL_EN = 7, - CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, - CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, - CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, - CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, - CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, - CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, - CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, - CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, - CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, - CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, - CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, - CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, - CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, - CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, - CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, - CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, - CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, - CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, - CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, - CPU_BOOT_DEV_STS_ENABLED = 31, - CPU_BOOT_DEV_STS_SCND_EN = 63, - CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ -}; - -/* - * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers - * - * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. - * This is an indication for security - * enabled in FW, which means that - * all conditions for security are met: - * device is indicated as security enabled, - * registers are protected, and device - * uses keys for image verification. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. - * Enabled when JTAG or DEBUG is enabled - * in FW. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. - * Watchdog is enabled in FW. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. - * DRAM initialization has been done in FW. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. - * If set, it means that during boot, - * FW waited for BMC data. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. - * FW initialized E2E credits. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. - * FW initialized HBM credits. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. - * FW initialized rate limiter. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. - * FW initialized SRAM scrambler. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. - * FW initialized DRAM scrambler. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. - * FW has the hard reset procedure - * implemented. This means that FW will - * perform hard reset procedure on - * receiving the halt-machine event. - * Initialized in: preboot, u-boot, linux - * - * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available - * for use. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. - * FW initialized Clock Gating. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. - * FW handles HBM ECC indications. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd - * is set to the PI counter. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication - * protocol is enabled. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. - * This bit if set, means the iATU has been - * configured and is ready for use. - * Initialized in: ppboot - * - * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and - * any access to them is done via the FW. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. - * FW sends to host a bitmap of supported - * PLLs. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from - * previleged entity. FW sets this status - * bit for host. If this bit is set then - * GIC can not be accessed from host. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running - * index for each new event sent to host. - * This is used as a method in host to - * identify that the waiting event in - * queue is actually a new event which - * was not served before. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to - * prevent IRQs overriding each other. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN - * NIC STAT and XPCS91 access is restricted - * and is done via FW only. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN - * NIC STAT get all is supported. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN - * F/W checks if the device is idle by reading defined set - * of registers. It returns a bitmask of all the engines, - * where a bit is set if the engine is not idle. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_MAP_HWMON_EN - * If set, means f/w supports proprietary - * HWMON enum mapping to cpucp enums. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. - * This is a main indication that the - * running FW populates the device status - * register. Meaning the device status - * bits are not garbage, but actual - * statuses. - * Initialized in: preboot - * - */ -#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) -#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) -#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) -#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) -#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) -#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) -#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) -#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) -#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) -#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) -#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) -#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) -#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) -#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) -#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) -#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) -#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) -#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) -#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) -#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) -#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) -#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) -#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) -#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) -#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) -#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) - -enum cpu_boot_status { - CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ - CPU_BOOT_STATUS_IN_WFE = 1, - CPU_BOOT_STATUS_DRAM_RDY = 2, - CPU_BOOT_STATUS_SRAM_AVAIL = 3, - CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ - CPU_BOOT_STATUS_IN_PREBOOT = 5, - CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ - CPU_BOOT_STATUS_IN_UBOOT = 7, - CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ - /* U-Boot console prompt activated, commands are not processed */ - CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, - /* Finished NICs init, reported after DRAM and NICs */ - CPU_BOOT_STATUS_NIC_FW_RDY = 11, - CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ - CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ - /* Last boot loader progress status, ready to receive commands */ - CPU_BOOT_STATUS_READY_TO_BOOT = 15, - /* Internal Boot finished, ready for boot-fit */ - CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, - /* Internal Security has been initialized, device can be accessed */ - CPU_BOOT_STATUS_SECURITY_READY = 17, -}; - -enum kmd_msg { - KMD_MSG_NA = 0, - KMD_MSG_GOTO_WFE, - KMD_MSG_FIT_RDY, - KMD_MSG_SKIP_BMC, - RESERVED, - KMD_MSG_RST_DEV, - KMD_MSG_LAST -}; - -enum cpu_msg_status { - CPU_MSG_CLR = 0, - CPU_MSG_OK, - CPU_MSG_ERR, -}; - -/* communication registers mapping - consider ABI when changing */ -struct cpu_dyn_regs { - __le32 cpu_pq_base_addr_low; - __le32 cpu_pq_base_addr_high; - __le32 cpu_pq_length; - __le32 cpu_pq_init_status; - __le32 cpu_eq_base_addr_low; - __le32 cpu_eq_base_addr_high; - __le32 cpu_eq_length; - __le32 cpu_eq_ci; - __le32 cpu_cq_base_addr_low; - __le32 cpu_cq_base_addr_high; - __le32 cpu_cq_length; - __le32 cpu_pf_pq_pi; - __le32 cpu_boot_dev_sts0; - __le32 cpu_boot_dev_sts1; - __le32 cpu_boot_err0; - __le32 cpu_boot_err1; - __le32 cpu_boot_status; - __le32 fw_upd_sts; - __le32 fw_upd_cmd; - __le32 fw_upd_pending_sts; - __le32 fuse_ver_offset; - __le32 preboot_ver_offset; - __le32 uboot_ver_offset; - __le32 hw_state; - __le32 kmd_msg_to_cpu; - __le32 cpu_cmd_status_to_host; - __le32 gic_host_pi_upd_irq; - __le32 gic_tpc_qm_irq_ctrl; - __le32 gic_mme_qm_irq_ctrl; - __le32 gic_dma_qm_irq_ctrl; - __le32 gic_nic_qm_irq_ctrl; - __le32 gic_dma_core_irq_ctrl; - __le32 gic_host_halt_irq; - __le32 gic_host_ints_irq; - __le32 gic_host_soft_rst_irq; - __le32 gic_rot_qm_irq_ctrl; - __le32 cpu_rst_status; - __le32 eng_arc_irq_ctrl; - __le32 reserved1[20]; /* reserve for future use */ -}; - -/* TODO: remove the desc magic after the code is updated to use message */ -/* HCDM - Habana Communications Descriptor Magic */ -#define HL_COMMS_DESC_MAGIC 0x4843444D -#define HL_COMMS_DESC_VER 3 - -/* HCMv - Habana Communications Message + header version */ -#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 -#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 -#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF - -#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ - ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) -#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC -#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) -#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) -#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) - -#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 - -#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ - (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ - HL_COMMS_MSG_MAGIC_VALUE) - -#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ - (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ - ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) - -#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ - (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ - HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) - -enum comms_msg_type { - HL_COMMS_DESC_TYPE = 0, - HL_COMMS_RESET_CAUSE_TYPE = 1, - HL_COMMS_FW_CFG_SKIP_TYPE = 2, - HL_COMMS_BINNING_CONF_TYPE = 3, -}; - -/* - * Binning information shared between LKD and FW - * @tpc_mask_l - TPC binning information lower 64 bit - * @dec_mask - Decoder binning information - * @dram_mask - DRAM binning information - * @edma_mask - EDMA binning information - * @mme_mask_l - MME binning information lower 32 - * @mme_mask_h - MME binning information upper 32 - * @rot_mask - Rotator binning information - * @xbar_mask - xBAR binning information - * @reserved - reserved field for future binning info w/o ABI change - * @tpc_mask_h - TPC binning information upper 64 bit - * @nic_mask - NIC binning information - */ -struct lkd_fw_binning_info { - __le64 tpc_mask_l; - __le32 dec_mask; - __le32 dram_mask; - __le32 edma_mask; - __le32 mme_mask_l; - __le32 mme_mask_h; - __le32 rot_mask; - __le32 xbar_mask; - __le32 reserved0; - __le64 tpc_mask_h; - __le64 nic_mask; - __le32 reserved1[8]; -}; - -/* TODO: remove this struct after the code is updated to use message */ -/* this is the comms descriptor header - meta data */ -struct comms_desc_header { - __le32 magic; /* magic for validation */ - __le32 crc32; /* CRC32 of the descriptor w/o header */ - __le16 size; /* size of the descriptor w/o header */ - __u8 version; /* descriptor version */ - __u8 reserved[5]; /* pad to 64 bit */ -}; - -/* this is the comms message header - meta data */ -struct comms_msg_header { - __le32 magic; /* magic for validation */ - __le32 crc32; /* CRC32 of the message w/o header */ - __le16 size; /* size of the message w/o header */ - __u8 version; /* message payload version */ - __u8 type; /* message type */ - __u8 reserved[4]; /* pad to 64 bit */ -}; - -enum lkd_fw_ascii_msg_lvls { - LKD_FW_ASCII_MSG_ERR = 0, - LKD_FW_ASCII_MSG_WRN = 1, - LKD_FW_ASCII_MSG_INF = 2, - LKD_FW_ASCII_MSG_DBG = 3, -}; - -#define LKD_FW_ASCII_MSG_MAX_LEN 128 -#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ - -struct lkd_fw_ascii_msg { - __u8 valid; - __u8 msg_lvl; - __u8 reserved[6]; - char msg[LKD_FW_ASCII_MSG_MAX_LEN]; -}; - -/* this is the main FW descriptor - consider ABI when changing */ -struct lkd_fw_comms_desc { - struct comms_desc_header header; - struct cpu_dyn_regs cpu_dyn_regs; - char fuse_ver[VERSION_MAX_LEN]; - char cur_fw_ver[VERSION_MAX_LEN]; - /* can be used for 1 more version w/o ABI change */ - char reserved0[VERSION_MAX_LEN]; - __le64 img_addr; /* address for next FW component load */ - struct lkd_fw_binning_info binning_info; - struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; -}; - -enum comms_reset_cause { - HL_RESET_CAUSE_UNKNOWN = 0, - HL_RESET_CAUSE_HEARTBEAT = 1, - HL_RESET_CAUSE_TDR = 2, -}; - -/* TODO: remove define after struct name is aligned on all projects */ -#define lkd_msg_comms lkd_fw_comms_msg - -/* this is the comms message descriptor */ -struct lkd_fw_comms_msg { - struct comms_msg_header header; - /* union for future expantions of new messages */ - union { - struct { - struct cpu_dyn_regs cpu_dyn_regs; - char fuse_ver[VERSION_MAX_LEN]; - char cur_fw_ver[VERSION_MAX_LEN]; - /* can be used for 1 more version w/o ABI change */ - char reserved0[VERSION_MAX_LEN]; - /* address for next FW component load */ - __le64 img_addr; - struct lkd_fw_binning_info binning_info; - struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; - }; - struct { - __u8 reset_cause; - }; - struct { - __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ - }; - struct lkd_fw_binning_info binning_conf; - }; -}; - -/* - * LKD commands: - * - * COMMS_NOOP Used to clear the command register and no actual - * command is send. - * - * COMMS_CLR_STS Clear status command - FW should clear the - * status register. Used for synchronization - * between the commands as part of the race free - * protocol. - * - * COMMS_RST_STATE Reset the current communication state which is - * kept by FW for proper responses. - * Should be used in the beginning of the - * communication cycle to clean any leftovers from - * previous communication attempts. - * - * COMMS_PREP_DESC Prepare descriptor for setting up the - * communication and other dynamic data: - * struct lkd_fw_comms_desc. - * This command has a parameter stating the next FW - * component size, so the FW can actually prepare a - * space for it and in the status response provide - * the descriptor offset. The Offset of the next FW - * data component is a part of the descriptor - * structure. - * - * COMMS_DATA_RDY The FW data has been uploaded and is ready for - * validation. - * - * COMMS_EXEC Execute the next FW component. - * - * COMMS_RST_DEV Reset the device. - * - * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure - * devices. - * - * COMMS_SKIP_BMC Perform actions required for BMC-less servers. - * Do not wait for BMC response. - * - * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory - * space is allocated in a ELBI access only - * address range. - * - */ -enum comms_cmd { - COMMS_NOOP = 0, - COMMS_CLR_STS = 1, - COMMS_RST_STATE = 2, - COMMS_PREP_DESC = 3, - COMMS_DATA_RDY = 4, - COMMS_EXEC = 5, - COMMS_RST_DEV = 6, - COMMS_GOTO_WFE = 7, - COMMS_SKIP_BMC = 8, - COMMS_PREP_DESC_ELBI = 10, - COMMS_INVLD_LAST -}; - -#define COMMS_COMMAND_SIZE_SHIFT 0 -#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF -#define COMMS_COMMAND_CMD_SHIFT 27 -#define COMMS_COMMAND_CMD_MASK 0xF8000000 - -/* - * LKD command to FW register structure - * @size - FW component size - * @cmd - command from enum comms_cmd - */ -struct comms_command { - union { /* bit fields are only for FW use */ - struct { - u32 size :25; /* 32MB max. */ - u32 reserved :2; - enum comms_cmd cmd :5; /* 32 commands */ - }; - __le32 val; - }; -}; - -/* - * FW status - * - * COMMS_STS_NOOP Used to clear the status register and no actual - * status is provided. - * - * COMMS_STS_ACK Command has been received and recognized. - * - * COMMS_STS_OK Command execution has finished successfully. - * - * COMMS_STS_ERR Command execution was unsuccessful and resulted - * in error. - * - * COMMS_STS_VALID_ERR FW validation has failed. - * - * COMMS_STS_TIMEOUT_ERR Command execution has timed out. - */ -enum comms_sts { - COMMS_STS_NOOP = 0, - COMMS_STS_ACK = 1, - COMMS_STS_OK = 2, - COMMS_STS_ERR = 3, - COMMS_STS_VALID_ERR = 4, - COMMS_STS_TIMEOUT_ERR = 5, - COMMS_STS_INVLD_LAST -}; - -/* RAM types for FW components loading - defines the base address */ -enum comms_ram_types { - COMMS_SRAM = 0, - COMMS_DRAM = 1, -}; - -#define COMMS_STATUS_OFFSET_SHIFT 0 -#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF -#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 -#define COMMS_STATUS_RAM_TYPE_SHIFT 26 -#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 -#define COMMS_STATUS_STATUS_SHIFT 28 -#define COMMS_STATUS_STATUS_MASK 0xF0000000 - -/* - * FW status to LKD register structure - * @offset - an offset from the base of the ram_type shifted right by - * 2 bits (always aligned to 32 bits). - * Allows a maximum addressable offset of 256MB from RAM base. - * Example: for real offset in RAM of 0x800000 (8MB), the value - * in offset field is (0x800000 >> 2) = 0x200000. - * @ram_type - the RAM type that should be used for offset from - * enum comms_ram_types - * @status - status from enum comms_sts - */ -struct comms_status { - union { /* bit fields are only for FW use */ - struct { - u32 offset :26; - enum comms_ram_types ram_type :2; - enum comms_sts status :4; /* 16 statuses */ - }; - __le32 val; - }; -}; - -/** - * HL_MODULES_MAX_NUM is determined by the size of modules_mask in struct - * hl_component_versions - */ -enum hl_modules { - HL_MODULES_BOOT_INFO = 0, - HL_MODULES_EEPROM, - HL_MODULES_FDT, - HL_MODULES_I2C, - HL_MODULES_LZ4, - HL_MODULES_MBEDTLS, - HL_MODULES_MAX_NUM = 16 -}; - -/** - * HL_COMPONENTS_MAX_NUM is determined by the size of components_mask in - * struct cpucp_versions - */ -enum hl_components { - HL_COMPONENTS_PID = 0, - HL_COMPONENTS_MGMT, - HL_COMPONENTS_PREBOOT, - HL_COMPONENTS_PPBOOT, - HL_COMPONENTS_ARMCP, - HL_COMPONENTS_CPLD, - HL_COMPONENTS_UBOOT, - HL_COMPONENTS_MAX_NUM = 16 -}; - -/** - * struct hl_component_versions - versions associated with hl component. - * @struct_size: size of all the struct (including dynamic size of modules). - * @modules_offset: offset of the modules field in this struct. - * @component: version of the component itself. - * @fw_os: Firmware OS Version. - * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum - * hl_modules is used. - * @modules_counter: number of set bits in modules_mask. - * @reserved: reserved for future use. - * @modules: versions of the component's modules. Elborated explanation in - * struct cpucp_versions. - */ -struct hl_component_versions { - __le16 struct_size; - __le16 modules_offset; - __u8 component[VERSION_MAX_LEN]; - __u8 fw_os[VERSION_MAX_LEN]; - __le16 modules_mask; - __u8 modules_counter; - __u8 reserved[1]; - __u8 modules[][VERSION_MAX_LEN]; -}; - -/** - * struct hl_fw_versions - all versions (fuse, cpucp's components with their - * modules) - * @struct_size: size of all the struct (including dynamic size of components). - * @components_offset: offset of the components field in this struct. - * @fuse: silicon production FUSE information. - * @components_mask: i'th bit (from LSB) is a flag - on if component i in enum - * hl_components is used. - * @components_counter: number of set bits in components_mask. - * @reserved: reserved for future use. - * @components: versions of hl components. Index i corresponds to the i'th bit - * that is *on* in components_mask. For example, if - * components_mask=0b101, then *components represents arcpid and - * *(hl_component_versions*)((char*)components + 1') represents - * preboot, where 1' = components[0].struct_size. - */ -struct hl_fw_versions { - __le16 struct_size; - __le16 components_offset; - __u8 fuse[VERSION_MAX_LEN]; - __le16 components_mask; - __u8 components_counter; - __u8 reserved[1]; - struct hl_component_versions components[]; -}; - -/* Max size of struct hl_component_versions */ -#define HL_COMPONENT_VERSIONS_MAX_SIZE \ - (sizeof(struct hl_component_versions) + HL_MODULES_MAX_NUM * \ - VERSION_MAX_LEN) - -/* Max size of struct hl_fw_versions */ -#define HL_FW_VERSIONS_MAX_SIZE (sizeof(struct hl_fw_versions) + \ - HL_COMPONENTS_MAX_NUM * HL_COMPONENT_VERSIONS_MAX_SIZE) - -#endif /* HL_BOOT_IF_H */ diff --git a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h index 2dba02757d37..a2547f306750 100644 --- a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h +++ b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h @@ -44,38 +44,6 @@ struct eq_nic_sei_event { __u8 pad[6]; }; -/* - * struct gaudi_nic_status - describes the status of a NIC port. - * @port: NIC port index. - * @bad_format_cnt: e.g. CRC. - * @responder_out_of_sequence_psn_cnt: e.g NAK. - * @high_ber_reinit_cnt: link reinit due to high BER. - * @correctable_err_cnt: e.g. bit-flip. - * @uncorrectable_err_cnt: e.g. MAC errors. - * @retraining_cnt: re-training counter. - * @up: is port up. - * @pcs_link: has PCS link. - * @phy_ready: is PHY ready. - * @auto_neg: is Autoneg enabled. - * @timeout_retransmission_cnt: timeout retransmission events - * @high_ber_cnt: high ber events - */ -struct gaudi_nic_status { - __u32 port; - __u32 bad_format_cnt; - __u32 responder_out_of_sequence_psn_cnt; - __u32 high_ber_reinit; - __u32 correctable_err_cnt; - __u32 uncorrectable_err_cnt; - __u32 retraining_cnt; - __u8 up; - __u8 pcs_link; - __u8 phy_ready; - __u8 auto_neg; - __u32 timeout_retransmission_cnt; - __u32 high_ber_cnt; -}; - struct gaudi_cold_rst_data { union { struct { diff --git a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index 0bf3092bfeea..d21fcd3880b4 100644 --- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2020-2022 HabanaLabs, Ltd. + * Copyright 2020-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -164,6 +164,8 @@ #define mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR 0x4800040 +#define mmDCORE0_TPC0_EML_CFG_DBG_CNT 0x40000 + #define SM_OBJS_PROT_BITS_OFFS 0x14000 #define DCORE_OFFSET (mmDCORE1_TPC0_QM_BASE - mmDCORE0_TPC0_QM_BASE) @@ -185,7 +187,10 @@ #define TPC_CFG_STALL_ON_ERR_OFFSET (mmDCORE0_TPC0_CFG_STALL_ON_ERR - mmDCORE0_TPC0_CFG_BASE) #define TPC_CFG_TPC_INTR_MASK_OFFSET (mmDCORE0_TPC0_CFG_TPC_INTR_MASK - mmDCORE0_TPC0_CFG_BASE) #define TPC_CFG_MSS_CONFIG_OFFSET (mmDCORE0_TPC0_CFG_MSS_CONFIG - mmDCORE0_TPC0_CFG_BASE) +#define TPC_EML_CFG_DBG_CNT_OFFSET (mmDCORE0_TPC0_EML_CFG_DBG_CNT - mmDCORE0_TPC0_EML_CFG_BASE) +#define EDMA_CORE_CFG_STALL_OFFSET (mmDCORE0_EDMA0_CORE_CFG_1 - mmDCORE0_EDMA0_CORE_BASE) +#define MME_CTRL_LO_QM_STALL_OFFSET (mmDCORE0_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_BASE) #define MME_ACC_INTR_MASK_OFFSET (mmDCORE0_MME_ACC_INTR_MASK - mmDCORE0_MME_ACC_BASE) #define MME_ACC_WR_AXI_AGG_COUT0_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT0 - mmDCORE0_MME_ACC_BASE) #define MME_ACC_WR_AXI_AGG_COUT1_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT1 - mmDCORE0_MME_ACC_BASE) @@ -237,6 +242,18 @@ #define QM_FENCE2_OFFSET (mmPDMA0_QM_CP_FENCE2_RDATA_0 - mmPDMA0_QM_BASE) #define QM_SEI_STATUS_OFFSET (mmPDMA0_QM_SEI_STATUS - mmPDMA0_QM_BASE) +#define QM_CQ_TSIZE_STS_4_OFFSET (mmPDMA0_QM_CQ_TSIZE_STS_4 - mmPDMA0_QM_BASE) +#define QM_CQ_PTR_LO_STS_4_OFFSET (mmPDMA0_QM_CQ_PTR_LO_STS_4 - mmPDMA0_QM_BASE) +#define QM_CQ_PTR_HI_STS_4_OFFSET (mmPDMA0_QM_CQ_PTR_HI_STS_4 - mmPDMA0_QM_BASE) + +#define QM_ARC_CQ_TSIZE_STS_OFFSET (mmPDMA0_QM_ARC_CQ_TSIZE_STS - mmPDMA0_QM_BASE) +#define QM_ARC_CQ_PTR_LO_STS_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_LO_STS - mmPDMA0_QM_BASE) +#define QM_ARC_CQ_PTR_HI_STS_OFFSET (mmPDMA0_QM_ARC_CQ_PTR_HI_STS - mmPDMA0_QM_BASE) + +#define QM_CP_STS_4_OFFSET (mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE) +#define QM_CP_CURRENT_INST_LO_4_OFFSET (mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE) +#define QM_CP_CURRENT_INST_HI_4_OFFSET (mmPDMA0_QM_CP_CURRENT_INST_HI_4 - mmPDMA0_QM_BASE) + #define SFT_OFFSET (mmSFT1_HBW_RTR_IF0_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) @@ -538,6 +555,8 @@ #define HBM_MC_SPI_IEEE1500_COMP_MASK BIT(3) #define HBM_MC_SPI_IEEE1500_PAUSED_MASK BIT(4) +#define ARC_FARM_OFFSET (mmARC_FARM_ARC1_AUX_BASE - mmARC_FARM_ARC0_AUX_BASE) + #include "nic0_qpc0_regs.h" #include "nic0_qm0_regs.h" #include "nic0_qm_arc_aux0_regs.h" diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h index 5b4f9e108798..753d46a2836b 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h @@ -63,7 +63,9 @@ #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START 0xFFF0F80000000000ull #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END 0xFFF0FFFFFFFFFFFFull -#define GAUDI2_MSIX_ENTRIES 512 +#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 127 + +#define GAUDI2_MSIX_ENTRIES 128 #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h index 50852cc80373..a426410139af 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2018-2021 HabanaLabs, Ltd. + * Copyright 2018-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -958,7 +958,14 @@ enum gaudi2_async_event_id { GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318, GAUDI2_EVENT_ARC_DCCM_FULL = 1319, GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320, - GAUDI2_EVENT_DEV_RESET_REQ = 1321, + GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321, + GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322, + GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323, + GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324, + GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325, + GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326, + GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327, + GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328, GAUDI2_EVENT_SIZE, }; diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h index 82be01bea98e..82d639990cca 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h @@ -10,6 +10,12 @@ ** DO NOT EDIT BELOW ** ************************************/ +enum event_reset_type { + EVENT_RESET_TYPE_NONE, + EVENT_RESET_TYPE_COMPUTE, + EVENT_RESET_TYPE_HARD, +}; + #ifndef __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ #define __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ @@ -23,2650 +29,2664 @@ struct gaudi2_async_events_ids_map { }; static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { - { .fc_id = 0, .cpu_id = 0, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1, .cpu_id = 1, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 2, .cpu_id = 2, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 3, .cpu_id = 3, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 4, .cpu_id = 4, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 5, .cpu_id = 5, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 6, .cpu_id = 6, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 7, .cpu_id = 7, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 8, .cpu_id = 8, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 9, .cpu_id = 9, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 10, .cpu_id = 10, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 11, .cpu_id = 11, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 12, .cpu_id = 12, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 13, .cpu_id = 13, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 14, .cpu_id = 14, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 15, .cpu_id = 15, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 16, .cpu_id = 16, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 17, .cpu_id = 17, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 18, .cpu_id = 18, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 19, .cpu_id = 19, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 20, .cpu_id = 20, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 21, .cpu_id = 21, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 22, .cpu_id = 22, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 23, .cpu_id = 23, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 24, .cpu_id = 24, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 25, .cpu_id = 25, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 26, .cpu_id = 26, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 27, .cpu_id = 27, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 28, .cpu_id = 28, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 29, .cpu_id = 29, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 30, .cpu_id = 30, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 31, .cpu_id = 31, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 32, .cpu_id = 32, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_CORE_SERR" }, - { .fc_id = 33, .cpu_id = 33, .valid = 1, - .msg = 0, .reset = 1, .name = "PCIE_CORE_DERR" }, - { .fc_id = 34, .cpu_id = 34, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_IF_SERR" }, - { .fc_id = 35, .cpu_id = 35, .valid = 1, - .msg = 0, .reset = 1, .name = "PCIE_IF_DERR" }, - { .fc_id = 36, .cpu_id = 36, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_PHY_SERR" }, - { .fc_id = 37, .cpu_id = 37, .valid = 1, - .msg = 0, .reset = 1, .name = "PCIE_PHY_DERR" }, - { .fc_id = 38, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC0_ECC_SERR" }, - { .fc_id = 39, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC1_ECC_SERR" }, - { .fc_id = 40, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC2_ECC_SERR" }, - { .fc_id = 41, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC3_ECC_SERR" }, - { .fc_id = 42, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC4_ECC_SERR" }, - { .fc_id = 43, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC5_ECC_SERR" }, - { .fc_id = 44, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC6_ECC_SERR" }, - { .fc_id = 45, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC7_ECC_SERR" }, - { .fc_id = 46, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC8_ECC_SERR" }, - { .fc_id = 47, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC9_ECC_SERR" }, - { .fc_id = 48, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC10_ECC_SERR" }, - { .fc_id = 49, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC11_ECC_SERR" }, - { .fc_id = 50, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC12_ECC_SERR" }, - { .fc_id = 51, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC13_ECC_SERR" }, - { .fc_id = 52, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC14_ECC_SERR" }, - { .fc_id = 53, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC15_ECC_SERR" }, - { .fc_id = 54, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC16_ECC_SERR" }, - { .fc_id = 55, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC17_ECC_SERR" }, - { .fc_id = 56, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC18_ECC_SERR" }, - { .fc_id = 57, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC19_ECC_SERR" }, - { .fc_id = 58, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC20_ECC_SERR" }, - { .fc_id = 59, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC21_ECC_SERR" }, - { .fc_id = 60, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC22_ECC_SERR" }, - { .fc_id = 61, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC23_ECC_SERR" }, - { .fc_id = 62, .cpu_id = 38, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC24_ECC_SERR" }, - { .fc_id = 63, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC0_ECC_DERR" }, - { .fc_id = 64, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC1_ECC_DERR" }, - { .fc_id = 65, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC2_ECC_DERR" }, - { .fc_id = 66, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC3_ECC_DERR" }, - { .fc_id = 67, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC4_ECC_DERR" }, - { .fc_id = 68, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC5_ECC_DERR" }, - { .fc_id = 69, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC6_ECC_DERR" }, - { .fc_id = 70, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC7_ECC_DERR" }, - { .fc_id = 71, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC8_ECC_DERR" }, - { .fc_id = 72, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC9_ECC_DERR" }, - { .fc_id = 73, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC10_ECC_DERR" }, - { .fc_id = 74, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC11_ECC_DERR" }, - { .fc_id = 75, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC12_ECC_DERR" }, - { .fc_id = 76, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC13_ECC_DERR" }, - { .fc_id = 77, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC14_ECC_DERR" }, - { .fc_id = 78, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC15_ECC_DERR" }, - { .fc_id = 79, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC16_ECC_DERR" }, - { .fc_id = 80, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC17_ECC_DERR" }, - { .fc_id = 81, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC18_ECC_DERR" }, - { .fc_id = 82, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC19_ECC_DERR" }, - { .fc_id = 83, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC20_ECC_DERR" }, - { .fc_id = 84, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC21_ECC_DERR" }, - { .fc_id = 85, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC22_ECC_DERR" }, - { .fc_id = 86, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC23_ECC_DERR" }, - { .fc_id = 87, .cpu_id = 39, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC24_ECC_DERR" }, - { .fc_id = 88, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE0_ECC_SERR" }, - { .fc_id = 89, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE1_ECC_SERR" }, - { .fc_id = 90, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE2_ECC_SERR" }, - { .fc_id = 91, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE3_ECC_SERR" }, - { .fc_id = 92, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE4_ECC_SERR" }, - { .fc_id = 93, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_CTRL_ECC_SERR" }, - { .fc_id = 94, .cpu_id = 40, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_WAP_ECC_SERR" }, - { .fc_id = 95, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE0_ECC_SERR" }, - { .fc_id = 96, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE1_ECC_SERR" }, - { .fc_id = 97, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE2_ECC_SERR" }, - { .fc_id = 98, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE3_ECC_SERR" }, - { .fc_id = 99, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE4_ECC_SERR" }, - { .fc_id = 100, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_CTRL_ECC_SERR" }, - { .fc_id = 101, .cpu_id = 41, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_WAP_ECC_SERR" }, - { .fc_id = 102, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE0_ECC_SERR" }, - { .fc_id = 103, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE1_ECC_SERR" }, - { .fc_id = 104, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE2_ECC_SERR" }, - { .fc_id = 105, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE3_ECC_SERR" }, - { .fc_id = 106, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE4_ECC_SERR" }, - { .fc_id = 107, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_CTRL_ECC_SERR" }, - { .fc_id = 108, .cpu_id = 42, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_WAP_ECC_SERR" }, - { .fc_id = 109, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE0_ECC_SERR" }, - { .fc_id = 110, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE1_ECC_SERR" }, - { .fc_id = 111, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE2_ECC_SERR" }, - { .fc_id = 112, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE3_ECC_SERR" }, - { .fc_id = 113, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE4_ECC_SERR" }, - { .fc_id = 114, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_CTRL_ECC_SERR" }, - { .fc_id = 115, .cpu_id = 43, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_WAP_ECC_SERR" }, - { .fc_id = 116, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE0_ECC_DERR" }, - { .fc_id = 117, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE1_ECC_DERR" }, - { .fc_id = 118, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE2_ECC_DERR" }, - { .fc_id = 119, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE3_ECC_DERR" }, - { .fc_id = 120, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE4_ECC_DERR" }, - { .fc_id = 121, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_CTRL_ECC_DERR" }, - { .fc_id = 122, .cpu_id = 44, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_WAP_ECC_DERR" }, - { .fc_id = 123, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE0_ECC_DERR" }, - { .fc_id = 124, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE1_ECC_DERR" }, - { .fc_id = 125, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE2_ECC_DERR" }, - { .fc_id = 126, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE3_ECC_DERR" }, - { .fc_id = 127, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE4_ECC_DERR" }, - { .fc_id = 128, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_CTRL_ECC_DERR" }, - { .fc_id = 129, .cpu_id = 45, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_WAP_ECC_DERR" }, - { .fc_id = 130, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE0_ECC_DERR" }, - { .fc_id = 131, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE1_ECC_DERR" }, - { .fc_id = 132, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE2_ECC_DERR" }, - { .fc_id = 133, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE3_ECC_DERR" }, - { .fc_id = 134, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE4_ECC_DERR" }, - { .fc_id = 135, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_CTRL_ECC_DERR" }, - { .fc_id = 136, .cpu_id = 46, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_WAP_ECC_DERR" }, - { .fc_id = 137, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE0_ECC_DERR" }, - { .fc_id = 138, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE1_ECC_DERR" }, - { .fc_id = 139, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE2_ECC_DERR" }, - { .fc_id = 140, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE3_ECC_DERR" }, - { .fc_id = 141, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE4_ECC_DERR" }, - { .fc_id = 142, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_CTRL_ECC_DERR" }, - { .fc_id = 143, .cpu_id = 47, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_WAP_ECC_DERR" }, - { .fc_id = 144, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA2_ECC_SERR" }, - { .fc_id = 145, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA3_ECC_SERR" }, - { .fc_id = 146, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA0_ECC_SERR" }, - { .fc_id = 147, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA1_ECC_SERR" }, - { .fc_id = 148, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA6_ECC_SERR" }, - { .fc_id = 149, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA7_ECC_SERR" }, - { .fc_id = 150, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA4_ECC_SERR" }, - { .fc_id = 151, .cpu_id = 48, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA5_ECC_SERR" }, - { .fc_id = 152, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA2_ECC_DERR" }, - { .fc_id = 153, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA3_ECC_DERR" }, - { .fc_id = 154, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA0_ECC_DERR" }, - { .fc_id = 155, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA1_ECC_DERR" }, - { .fc_id = 156, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA6_ECC_DERR" }, - { .fc_id = 157, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA7_ECC_DERR" }, - { .fc_id = 158, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA4_ECC_DERR" }, - { .fc_id = 159, .cpu_id = 49, .valid = 1, - .msg = 0, .reset = 1, .name = "HDMA5_ECC_DERR" }, - { .fc_id = 160, .cpu_id = 50, .valid = 1, - .msg = 0, .reset = 0, .name = "KDMA0_ECC_SERR" }, - { .fc_id = 161, .cpu_id = 51, .valid = 1, - .msg = 0, .reset = 0, .name = "PDMA0_ECC_SERR" }, - { .fc_id = 162, .cpu_id = 51, .valid = 1, - .msg = 0, .reset = 0, .name = "PDMA1_ECC_SERR" }, - { .fc_id = 163, .cpu_id = 52, .valid = 1, - .msg = 0, .reset = 1, .name = "KDMA0_ECC_DERR" }, - { .fc_id = 164, .cpu_id = 53, .valid = 1, - .msg = 0, .reset = 1, .name = "PDMA0_ECC_DERR" }, - { .fc_id = 165, .cpu_id = 53, .valid = 1, - .msg = 0, .reset = 1, .name = "PDMA1_ECC_DERR" }, - { .fc_id = 166, .cpu_id = 54, .valid = 1, - .msg = 0, .reset = 0, .name = "CPU_IF_ECC_SERR" }, - { .fc_id = 167, .cpu_id = 55, .valid = 1, - .msg = 0, .reset = 1, .name = "CPU_IF_ECC_DERR" }, - { .fc_id = 168, .cpu_id = 56, .valid = 1, - .msg = 0, .reset = 0, .name = "PSOC_MEM_SERR" }, - { .fc_id = 169, .cpu_id = 57, .valid = 1, - .msg = 0, .reset = 1, .name = "PSOC_MEM_DERR" }, - { .fc_id = 170, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM0_ECC_SERR" }, - { .fc_id = 171, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM1_ECC_SERR" }, - { .fc_id = 172, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM2_ECC_SERR" }, - { .fc_id = 173, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM3_ECC_SERR" }, - { .fc_id = 174, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM4_ECC_SERR" }, - { .fc_id = 175, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM5_ECC_SERR" }, - { .fc_id = 176, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM6_ECC_SERR" }, - { .fc_id = 177, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM7_ECC_SERR" }, - { .fc_id = 178, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM8_ECC_SERR" }, - { .fc_id = 179, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM9_ECC_SERR" }, - { .fc_id = 180, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM10_ECC_SERR" }, - { .fc_id = 181, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM11_ECC_SERR" }, - { .fc_id = 182, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM12_ECC_SERR" }, - { .fc_id = 183, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM13_ECC_SERR" }, - { .fc_id = 184, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM14_ECC_SERR" }, - { .fc_id = 185, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM15_ECC_SERR" }, - { .fc_id = 186, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM16_ECC_SERR" }, - { .fc_id = 187, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM17_ECC_SERR" }, - { .fc_id = 188, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM18_ECC_SERR" }, - { .fc_id = 189, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM19_ECC_SERR" }, - { .fc_id = 190, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM20_ECC_SERR" }, - { .fc_id = 191, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM21_ECC_SERR" }, - { .fc_id = 192, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM22_ECC_SERR" }, - { .fc_id = 193, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM23_ECC_SERR" }, - { .fc_id = 194, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM24_ECC_SERR" }, - { .fc_id = 195, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM25_ECC_SERR" }, - { .fc_id = 196, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM26_ECC_SERR" }, - { .fc_id = 197, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM27_ECC_SERR" }, - { .fc_id = 198, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM28_ECC_SERR" }, - { .fc_id = 199, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM29_ECC_SERR" }, - { .fc_id = 200, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM30_ECC_SERR" }, - { .fc_id = 201, .cpu_id = 58, .valid = 1, - .msg = 0, .reset = 0, .name = "SRAM31_ECC_SERR" }, - { .fc_id = 202, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM0_ECC_DERR" }, - { .fc_id = 203, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM1_ECC_DERR" }, - { .fc_id = 204, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM2_ECC_DERR" }, - { .fc_id = 205, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM3_ECC_DERR" }, - { .fc_id = 206, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM4_ECC_DERR" }, - { .fc_id = 207, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM5_ECC_DERR" }, - { .fc_id = 208, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM6_ECC_DERR" }, - { .fc_id = 209, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM7_ECC_DERR" }, - { .fc_id = 210, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM8_ECC_DERR" }, - { .fc_id = 211, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM9_ECC_DERR" }, - { .fc_id = 212, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM10_ECC_DERR" }, - { .fc_id = 213, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM11_ECC_DERR" }, - { .fc_id = 214, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM12_ECC_DERR" }, - { .fc_id = 215, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM13_ECC_DERR" }, - { .fc_id = 216, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM14_ECC_DERR" }, - { .fc_id = 217, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM15_ECC_DERR" }, - { .fc_id = 218, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM16_ECC_DERR" }, - { .fc_id = 219, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM17_ECC_DERR" }, - { .fc_id = 220, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM18_ECC_DERR" }, - { .fc_id = 221, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM19_ECC_DERR" }, - { .fc_id = 222, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM20_ECC_DERR" }, - { .fc_id = 223, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM21_ECC_DERR" }, - { .fc_id = 224, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM22_ECC_DERR" }, - { .fc_id = 225, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM23_ECC_DERR" }, - { .fc_id = 226, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM24_ECC_DERR" }, - { .fc_id = 227, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM25_ECC_DERR" }, - { .fc_id = 228, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM26_ECC_DERR" }, - { .fc_id = 229, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM27_ECC_DERR" }, - { .fc_id = 230, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM28_ECC_DERR" }, - { .fc_id = 231, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM29_ECC_DERR" }, - { .fc_id = 232, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM30_ECC_DERR" }, - { .fc_id = 233, .cpu_id = 59, .valid = 1, - .msg = 0, .reset = 1, .name = "SRAM31_ECC_DERR" }, - { .fc_id = 234, .cpu_id = 60, .valid = 1, - .msg = 0, .reset = 1, .name = "GIC500" }, - { .fc_id = 235, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_0_MC0_ECC_SERR" }, - { .fc_id = 236, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_1_MC0_ECC_SERR" }, - { .fc_id = 237, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_2_MC0_ECC_SERR" }, - { .fc_id = 238, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_3_MC0_ECC_SERR" }, - { .fc_id = 239, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_4_MC0_ECC_SERR" }, - { .fc_id = 240, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_5_MC0_ECC_SERR" }, - { .fc_id = 241, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_0_MC1_ECC_SERR" }, - { .fc_id = 242, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_1_MC1_ECC_SERR" }, - { .fc_id = 243, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_2_MC1_ECC_SERR" }, - { .fc_id = 244, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_3_MC1_ECC_SERR" }, - { .fc_id = 245, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_4_MC1_ECC_SERR" }, - { .fc_id = 246, .cpu_id = 61, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM_5_MC1_ECC_SERR" }, - { .fc_id = 247, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_0_MC0_ECC_DERR" }, - { .fc_id = 248, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_1_MC0_ECC_DERR" }, - { .fc_id = 249, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_2_MC0_ECC_DERR" }, - { .fc_id = 250, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_3_MC0_ECC_DERR" }, - { .fc_id = 251, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_4_MC0_ECC_DERR" }, - { .fc_id = 252, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_5_MC0_ECC_DERR" }, - { .fc_id = 253, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_0_MC1_ECC_DERR" }, - { .fc_id = 254, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_1_MC1_ECC_DERR" }, - { .fc_id = 255, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_2_MC1_ECC_DERR" }, - { .fc_id = 256, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_3_MC1_ECC_DERR" }, - { .fc_id = 257, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_4_MC1_ECC_DERR" }, - { .fc_id = 258, .cpu_id = 62, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_5_MC1_ECC_DERR" }, - { .fc_id = 259, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_0_ECC_SERR" }, - { .fc_id = 260, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_1_ECC_SERR" }, - { .fc_id = 261, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_2_ECC_SERR" }, - { .fc_id = 262, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_3_ECC_SERR" }, - { .fc_id = 263, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_8_ECC_SERR" }, - { .fc_id = 264, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_9_ECC_SERR" }, - { .fc_id = 265, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_10_ECC_SERR" }, - { .fc_id = 266, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_11_ECC_SERR" }, - { .fc_id = 267, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_7_ECC_SERR" }, - { .fc_id = 268, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_6_ECC_SERR" }, - { .fc_id = 269, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_5_ECC_SERR" }, - { .fc_id = 270, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_4_ECC_SERR" }, - { .fc_id = 271, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_15_ECC_SERR" }, - { .fc_id = 272, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_14_ECC_SERR" }, - { .fc_id = 273, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_13_ECC_SERR" }, - { .fc_id = 274, .cpu_id = 63, .valid = 1, - .msg = 0, .reset = 0, .name = "HMMU_12_ECC_SERR" }, - { .fc_id = 275, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_0_ECC_DERR" }, - { .fc_id = 276, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_1_ECC_DERR" }, - { .fc_id = 277, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_2_ECC_DERR" }, - { .fc_id = 278, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_3_ECC_DERR" }, - { .fc_id = 279, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_8_ECC_DERR" }, - { .fc_id = 280, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_9_ECC_DERR" }, - { .fc_id = 281, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_10_ECC_DERR" }, - { .fc_id = 282, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_11_ECC_DERR" }, - { .fc_id = 283, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_7_ECC_DERR" }, - { .fc_id = 284, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_6_ECC_DERR" }, - { .fc_id = 285, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_5_ECC_DERR" }, - { .fc_id = 286, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_4_ECC_DERR" }, - { .fc_id = 287, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_15_ECC_DERR" }, - { .fc_id = 288, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_14_ECC_DERR" }, - { .fc_id = 289, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_13_ECC_DERR" }, - { .fc_id = 290, .cpu_id = 64, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_12_ECC_DERR" }, - { .fc_id = 291, .cpu_id = 65, .valid = 1, - .msg = 0, .reset = 0, .name = "PMMU_ECC_SERR" }, - { .fc_id = 292, .cpu_id = 66, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU_ECC_DERR" }, - { .fc_id = 293, .cpu_id = 67, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 294, .cpu_id = 68, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 295, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC0_VCD_ECC_SERR" }, - { .fc_id = 296, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC1_VCD_ECC_SERR" }, - { .fc_id = 297, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC2_VCD_ECC_SERR" }, - { .fc_id = 298, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC3_VCD_ECC_SERR" }, - { .fc_id = 299, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC4_VCD_ECC_SERR" }, - { .fc_id = 300, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC5_VCD_ECC_SERR" }, - { .fc_id = 301, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC6_VCD_ECC_SERR" }, - { .fc_id = 302, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC7_VCD_ECC_SERR" }, - { .fc_id = 303, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC8_VCD_ECC_SERR" }, - { .fc_id = 304, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC9_VCD_ECC_SERR" }, - { .fc_id = 305, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC0_L2C_ECC_SERR" }, - { .fc_id = 306, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC1_L2C_ECC_SERR" }, - { .fc_id = 307, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC2_L2C_ECC_SERR" }, - { .fc_id = 308, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC3_L2C_ECC_SERR" }, - { .fc_id = 309, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC4_L2C_ECC_SERR" }, - { .fc_id = 310, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC5_L2C_ECC_SERR" }, - { .fc_id = 311, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC6_L2C_ECC_SERR" }, - { .fc_id = 312, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC7_L2C_ECC_SERR" }, - { .fc_id = 313, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC8_L2C_ECC_SERR" }, - { .fc_id = 314, .cpu_id = 69, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC9_L2C_ECC_SERR" }, - { .fc_id = 315, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC0_VCD_ECC_DERR" }, - { .fc_id = 316, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC1_VCD_ECC_DERR" }, - { .fc_id = 317, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC2_VCD_ECC_DERR" }, - { .fc_id = 318, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC3_VCD_ECC_DERR" }, - { .fc_id = 319, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC4_VCD_ECC_DERR" }, - { .fc_id = 320, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC5_VCD_ECC_DERR" }, - { .fc_id = 321, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC6_VCD_ECC_DERR" }, - { .fc_id = 322, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC7_VCD_ECC_DERR" }, - { .fc_id = 323, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC8_VCD_ECC_DERR" }, - { .fc_id = 324, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC9_VCD_ECC_DERR" }, - { .fc_id = 325, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC0_L2C_ECC_DERR" }, - { .fc_id = 326, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC1_L2C_ECC_DERR" }, - { .fc_id = 327, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC2_L2C_ECC_DERR" }, - { .fc_id = 328, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC3_L2C_ECC_DERR" }, - { .fc_id = 329, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC4_L2C_ECC_DERR" }, - { .fc_id = 330, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC5_L2C_ECC_DERR" }, - { .fc_id = 331, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC6_L2C_ECC_DERR" }, - { .fc_id = 332, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC7_L2C_ECC_DERR" }, - { .fc_id = 333, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC8_L2C_ECC_DERR" }, - { .fc_id = 334, .cpu_id = 70, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC9_L2C_ECC_DERR" }, - { .fc_id = 335, .cpu_id = 71, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 336, .cpu_id = 72, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 337, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF0_ECC_SERR" }, - { .fc_id = 338, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF1_ECC_SERR" }, - { .fc_id = 339, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF2_ECC_SERR" }, - { .fc_id = 340, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF3_ECC_SERR" }, - { .fc_id = 341, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF8_ECC_SERR" }, - { .fc_id = 342, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF9_ECC_SERR" }, - { .fc_id = 343, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF10_ECC_SERR" }, - { .fc_id = 344, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF11_ECC_SERR" }, - { .fc_id = 345, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF7_ECC_SERR" }, - { .fc_id = 346, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF6_ECC_SERR" }, - { .fc_id = 347, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF5_ECC_SERR" }, - { .fc_id = 348, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF4_ECC_SERR" }, - { .fc_id = 349, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF15_ECC_SERR" }, - { .fc_id = 350, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF14_ECC_SERR" }, - { .fc_id = 351, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF13_ECC_SERR" }, - { .fc_id = 352, .cpu_id = 73, .valid = 1, - .msg = 0, .reset = 0, .name = "HIF12_ECC_SERR" }, - { .fc_id = 353, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF0_ECC_DERR" }, - { .fc_id = 354, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF1_ECC_DERR" }, - { .fc_id = 355, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF2_ECC_DERR" }, - { .fc_id = 356, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF3_ECC_DERR" }, - { .fc_id = 357, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF8_ECC_DERR" }, - { .fc_id = 358, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF9_ECC_DERR" }, - { .fc_id = 359, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF10_ECC_DERR" }, - { .fc_id = 360, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF11_ECC_DERR" }, - { .fc_id = 361, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF7_ECC_DERR" }, - { .fc_id = 362, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF6_ECC_DERR" }, - { .fc_id = 363, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF5_ECC_DERR" }, - { .fc_id = 364, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF4_ECC_DERR" }, - { .fc_id = 365, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF15_ECC_DERR" }, - { .fc_id = 366, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF14_ECC_DERR" }, - { .fc_id = 367, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF13_ECC_DERR" }, - { .fc_id = 368, .cpu_id = 74, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF12_ECC_DERR" }, - { .fc_id = 369, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC0_ECC_SERR" }, - { .fc_id = 370, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC1_ECC_SERR" }, - { .fc_id = 371, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC2_ECC_SERR" }, - { .fc_id = 372, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC3_ECC_SERR" }, - { .fc_id = 373, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC4_ECC_SERR" }, - { .fc_id = 374, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC5_ECC_SERR" }, - { .fc_id = 375, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC6_ECC_SERR" }, - { .fc_id = 376, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC7_ECC_SERR" }, - { .fc_id = 377, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC8_ECC_SERR" }, - { .fc_id = 378, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC9_ECC_SERR" }, - { .fc_id = 379, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC10_ECC_SERR" }, - { .fc_id = 380, .cpu_id = 75, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC11_ECC_SERR" }, - { .fc_id = 381, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC0_ECC_DERR" }, - { .fc_id = 382, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC1_ECC_DERR" }, - { .fc_id = 383, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC2_ECC_DERR" }, - { .fc_id = 384, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC3_ECC_DERR" }, - { .fc_id = 385, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC4_ECC_DERR" }, - { .fc_id = 386, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC5_ECC_DERR" }, - { .fc_id = 387, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC6_ECC_DERR" }, - { .fc_id = 388, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC7_ECC_DERR" }, - { .fc_id = 389, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC8_ECC_DERR" }, - { .fc_id = 390, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC9_ECC_DERR" }, - { .fc_id = 391, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC10_ECC_DERR" }, - { .fc_id = 392, .cpu_id = 76, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC11_ECC_DERR" }, - { .fc_id = 393, .cpu_id = 77, .valid = 1, - .msg = 0, .reset = 1, .name = "SM0_ECC_DERR" }, - { .fc_id = 394, .cpu_id = 77, .valid = 1, - .msg = 0, .reset = 1, .name = "SM1_ECC_DERR" }, - { .fc_id = 395, .cpu_id = 77, .valid = 1, - .msg = 0, .reset = 1, .name = "SM2_ECC_DERR" }, - { .fc_id = 396, .cpu_id = 77, .valid = 1, - .msg = 0, .reset = 1, .name = "SM3_ECC_DERR" }, - { .fc_id = 397, .cpu_id = 78, .valid = 1, - .msg = 0, .reset = 0, .name = "SM0_ECC_SERR" }, - { .fc_id = 398, .cpu_id = 78, .valid = 1, - .msg = 0, .reset = 0, .name = "SM1_ECC_SERR" }, - { .fc_id = 399, .cpu_id = 78, .valid = 1, - .msg = 0, .reset = 0, .name = "SM2_ECC_SERR" }, - { .fc_id = 400, .cpu_id = 78, .valid = 1, - .msg = 0, .reset = 0, .name = "SM3_ECC_SERR" }, - { .fc_id = 401, .cpu_id = 79, .valid = 1, - .msg = 0, .reset = 0, .name = "XBAR0_ECC_SERR" }, - { .fc_id = 402, .cpu_id = 79, .valid = 1, - .msg = 0, .reset = 0, .name = "XBAR1_ECC_SERR" }, - { .fc_id = 403, .cpu_id = 79, .valid = 1, - .msg = 0, .reset = 0, .name = "XBAR2_ECC_SERR" }, - { .fc_id = 404, .cpu_id = 79, .valid = 1, - .msg = 0, .reset = 0, .name = "XBAR3_ECC_SERR" }, - { .fc_id = 405, .cpu_id = 80, .valid = 1, - .msg = 0, .reset = 1, .name = "XBAR0_ECC_DERR" }, - { .fc_id = 406, .cpu_id = 80, .valid = 1, - .msg = 0, .reset = 1, .name = "XBAR1_ECC_DERR" }, - { .fc_id = 407, .cpu_id = 80, .valid = 1, - .msg = 0, .reset = 1, .name = "XBAR2_ECC_DERR" }, - { .fc_id = 408, .cpu_id = 80, .valid = 1, - .msg = 0, .reset = 1, .name = "XBAR3_ECC_DERR" }, - { .fc_id = 409, .cpu_id = 81, .valid = 1, - .msg = 0, .reset = 0, .name = "ARC0_ECC_SERR" }, - { .fc_id = 410, .cpu_id = 82, .valid = 1, - .msg = 0, .reset = 1, .name = "ARC0_ECC_DERR" }, - { .fc_id = 411, .cpu_id = 83, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 412, .cpu_id = 84, .valid = 1, - .msg = 0, .reset = 1, .name = "PCIE_ADDR_DEC_ERR" }, - { .fc_id = 413, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC0_AXI_ERR_RSP" }, - { .fc_id = 414, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC1_AXI_ERR_RSP" }, - { .fc_id = 415, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC2_AXI_ERR_RSP" }, - { .fc_id = 416, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC3_AXI_ERR_RSP" }, - { .fc_id = 417, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC4_AXI_ERR_RSP" }, - { .fc_id = 418, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC5_AXI_ERR_RSP" }, - { .fc_id = 419, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC6_AXI_ERR_RSP" }, - { .fc_id = 420, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC7_AXI_ERR_RSP" }, - { .fc_id = 421, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC8_AXI_ERR_RSP" }, - { .fc_id = 422, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC9_AXI_ERR_RSP" }, - { .fc_id = 423, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC10_AXI_ERR_RSP" }, - { .fc_id = 424, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC11_AXI_ERR_RSP" }, - { .fc_id = 425, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC12_AXI_ERR_RSP" }, - { .fc_id = 426, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC13_AXI_ERR_RSP" }, - { .fc_id = 427, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC14_AXI_ERR_RSP" }, - { .fc_id = 428, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC15_AXI_ERR_RSP" }, - { .fc_id = 429, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC16_AXI_ERR_RSP" }, - { .fc_id = 430, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC17_AXI_ERR_RSP" }, - { .fc_id = 431, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC18_AXI_ERR_RSP" }, - { .fc_id = 432, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC19_AXI_ERR_RSP" }, - { .fc_id = 433, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC20_AXI_ERR_RSP" }, - { .fc_id = 434, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC21_AXI_ERR_RSP" }, - { .fc_id = 435, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC22_AXI_ERR_RSP" }, - { .fc_id = 436, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC23_AXI_ERR_RSP" }, - { .fc_id = 437, .cpu_id = 85, .valid = 1, - .msg = 0, .reset = 1, .name = "TPC24_AXI_ERR_RSP" }, - { .fc_id = 438, .cpu_id = 86, .valid = 1, - .msg = 0, .reset = 1, .name = "AXI_ECC" }, - { .fc_id = 439, .cpu_id = 87, .valid = 1, - .msg = 0, .reset = 1, .name = "L2_RAM_ECC" }, - { .fc_id = 440, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE0_AXI_ERR_RSP" }, - { .fc_id = 441, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE1_AXI_ERR_RSP" }, - { .fc_id = 442, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE2_AXI_ERR_RSP" }, - { .fc_id = 443, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE3_AXI_ERR_RSP" }, - { .fc_id = 444, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_SBTE4_AXI_ERR_RSP" }, - { .fc_id = 445, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_CTRL_AXI_ERROR_RESPONSE" }, - { .fc_id = 446, .cpu_id = 88, .valid = 1, - .msg = 0, .reset = 1, .name = "MME0_QMAN_SW_ERROR" }, - { .fc_id = 447, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE0_AXI_ERR_RSP" }, - { .fc_id = 448, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE1_AXI_ERR_RSP" }, - { .fc_id = 449, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE2_AXI_ERR_RSP" }, - { .fc_id = 450, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE3_AXI_ERR_RSP" }, - { .fc_id = 451, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_SBTE4_AXI_ERR_RSP" }, - { .fc_id = 452, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_CTRL_AXI_ERROR_RESPONSE" }, - { .fc_id = 453, .cpu_id = 89, .valid = 1, - .msg = 0, .reset = 1, .name = "MME1_QMAN_SW_ERROR" }, - { .fc_id = 454, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE0_AXI_ERR_RSP" }, - { .fc_id = 455, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE1_AXI_ERR_RSP" }, - { .fc_id = 456, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE2_AXI_ERR_RSP" }, - { .fc_id = 457, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE3_AXI_ERR_RSP" }, - { .fc_id = 458, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_SBTE4_AXI_ERR_RSP" }, - { .fc_id = 459, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_CTRL_AXI_ERROR_RESPONSE" }, - { .fc_id = 460, .cpu_id = 90, .valid = 1, - .msg = 0, .reset = 1, .name = "MME2_QMAN_SW_ERROR" }, - { .fc_id = 461, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE0_AXI_ERR_RSP" }, - { .fc_id = 462, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE1_AXI_ERR_RSP" }, - { .fc_id = 463, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE2_AXI_ERR_RSP" }, - { .fc_id = 464, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE3_AXI_ERR_RSP" }, - { .fc_id = 465, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_SBTE4_AXI_ERR_RSP" }, - { .fc_id = 466, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_CTRL_AXI_ERROR_RESPONSE" }, - { .fc_id = 467, .cpu_id = 91, .valid = 1, - .msg = 0, .reset = 1, .name = "MME3_QMAN_SW_ERROR" }, - { .fc_id = 468, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "PSOC_MME_PLL_LOCK_ERR" }, - { .fc_id = 469, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "PSOC_CPU_PLL_LOCK_ERR" }, - { .fc_id = 470, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_TPC_PLL_LOCK_ERR" }, - { .fc_id = 471, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_NIC_PLL_LOCK_ERR" }, - { .fc_id = 472, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 473, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 474, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 475, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_XBAR_BANK_PLL_LOCK_ERR" }, - { .fc_id = 476, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 477, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 478, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 479, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_XBAR_MESH_PLL_LOCK_ERR" }, - { .fc_id = 480, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_TPC_PLL_LOCK_ERR" }, - { .fc_id = 481, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_NIC_PLL_LOCK_ERR" }, - { .fc_id = 482, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU_MME_PLL_LOCK_ERR" }, - { .fc_id = 483, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_TPC_PLL_LOCK_ERR" }, - { .fc_id = 484, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_PCI_PLL_LOCK_ERR" }, - { .fc_id = 485, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 486, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 487, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 488, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_XBAR_MESH_PLL_LOCK_ERR" }, - { .fc_id = 489, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_XBAR_MMU_PLL_LOCK_ERR" }, - { .fc_id = 490, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_XBAR_DMA_PLL_LOCK_ERR" }, - { .fc_id = 491, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_XBAR_IF_PLL_LOCK_ERR" }, - { .fc_id = 492, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_XBAR_BANK_PLL_LOCK_ERR" }, - { .fc_id = 493, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_TPC_PLL_LOCK_ERR" }, - { .fc_id = 494, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "PSOC_VID_PLL_LOCK_ERR" }, - { .fc_id = 495, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU_VID_PLL_LOCK_ERR" }, - { .fc_id = 496, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE3_HBM_PLL_LOCK_ERR" }, - { .fc_id = 497, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_XBAR_HBM_PLL_LOCK_ERR" }, - { .fc_id = 498, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE1_HBM_PLL_LOCK_ERR" }, - { .fc_id = 499, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE0_HBM_PLL_LOCK_ERR" }, - { .fc_id = 500, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_XBAR_HBM_PLL_LOCK_ERR" }, - { .fc_id = 501, .cpu_id = 92, .valid = 1, - .msg = 0, .reset = 1, .name = "DCORE2_HBM_PLL_LOCK_ERR" }, - { .fc_id = 502, .cpu_id = 93, .valid = 1, - .msg = 0, .reset = 1, .name = "CPU_AXI_ERR_RSP" }, - { .fc_id = 503, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_0_AXI_ERR_RSP" }, - { .fc_id = 504, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_1_AXI_ERR_RSP" }, - { .fc_id = 505, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_2_AXI_ERR_RSP" }, - { .fc_id = 506, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_3_AXI_ERR_RSP" }, - { .fc_id = 507, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_8_AXI_ERR_RSP" }, - { .fc_id = 508, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_9_AXI_ERR_RSP" }, - { .fc_id = 509, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_10_AXI_ERR_RSP" }, - { .fc_id = 510, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_11_AXI_ERR_RSP" }, - { .fc_id = 511, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_7_AXI_ERR_RSP" }, - { .fc_id = 512, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_6_AXI_ERR_RSP" }, - { .fc_id = 513, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_5_AXI_ERR_RSP" }, - { .fc_id = 514, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_4_AXI_ERR_RSP" }, - { .fc_id = 515, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_15_AXI_ERR_RSP" }, - { .fc_id = 516, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_14_AXI_ERR_RSP" }, - { .fc_id = 517, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_13_AXI_ERR_RSP" }, - { .fc_id = 518, .cpu_id = 94, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU_12_AXI_ERR_RSP" }, - { .fc_id = 519, .cpu_id = 95, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU_FATAL" }, - { .fc_id = 520, .cpu_id = 96, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU_AXI_ERR_RSP" }, - { .fc_id = 521, .cpu_id = 97, .valid = 1, - .msg = 0, .reset = 0, .name = "VM0_ALARM_A" }, - { .fc_id = 522, .cpu_id = 98, .valid = 1, - .msg = 0, .reset = 0, .name = "VM0_ALARM_B" }, - { .fc_id = 523, .cpu_id = 99, .valid = 1, - .msg = 0, .reset = 0, .name = "VM1_ALARM_A" }, - { .fc_id = 524, .cpu_id = 100, .valid = 1, - .msg = 0, .reset = 0, .name = "VM1_ALARM_B" }, - { .fc_id = 525, .cpu_id = 101, .valid = 1, - .msg = 0, .reset = 0, .name = "VM2_ALARM_A" }, - { .fc_id = 526, .cpu_id = 102, .valid = 1, - .msg = 0, .reset = 0, .name = "VM2_ALARM_B" }, - { .fc_id = 527, .cpu_id = 103, .valid = 1, - .msg = 0, .reset = 0, .name = "VM3_ALARM_A" }, - { .fc_id = 528, .cpu_id = 104, .valid = 1, - .msg = 0, .reset = 0, .name = "VM3_ALARM_B" }, - { .fc_id = 529, .cpu_id = 105, .valid = 1, - .msg = 0, .reset = 1, .name = "PSOC_AXI_ERR_RSP" }, - { .fc_id = 530, .cpu_id = 106, .valid = 1, - .msg = 0, .reset = 0, .name = "PSOC_PRSTN_FALL" }, - { .fc_id = 531, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 532, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 533, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 534, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 535, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 536, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 537, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 538, .cpu_id = 107, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 539, .cpu_id = 108, .valid = 1, - .msg = 0, .reset = 1, .name = "KDMA_CH0_AXI_ERR_RSP" }, - { .fc_id = 540, .cpu_id = 109, .valid = 1, - .msg = 0, .reset = 1, .name = "PDMA_CH0_AXI_ERR_RSP" }, - { .fc_id = 541, .cpu_id = 109, .valid = 1, - .msg = 0, .reset = 1, .name = "PDMA_CH1_AXI_ERR_RSP" }, - { .fc_id = 542, .cpu_id = 110, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_0" }, - { .fc_id = 543, .cpu_id = 111, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_1" }, - { .fc_id = 544, .cpu_id = 112, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_2" }, - { .fc_id = 545, .cpu_id = 113, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_3" }, - { .fc_id = 546, .cpu_id = 114, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_4" }, - { .fc_id = 547, .cpu_id = 115, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM_CATTRIP_5" }, - { .fc_id = 548, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM0_MC0_SEI_SEVERE" }, - { .fc_id = 549, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM0_MC0_SEI_NON_SEVERE" }, - { .fc_id = 550, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM0_MC1_SEI_SEVERE" }, - { .fc_id = 551, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM0_MC1_SEI_NON_SEVERE" }, - { .fc_id = 552, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM1_MC0_SEI_SEVERE" }, - { .fc_id = 553, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM1_MC0_SEI_NON_SEVERE" }, - { .fc_id = 554, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM1_MC1_SEI_SEVERE" }, - { .fc_id = 555, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM1_MC1_SEI_NON_SEVERE" }, - { .fc_id = 556, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM2_MC0_SEI_SEVERE" }, - { .fc_id = 557, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM2_MC0_SEI_NON_SEVERE" }, - { .fc_id = 558, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM2_MC1_SEI_SEVERE" }, - { .fc_id = 559, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM2_MC1_SEI_NON_SEVERE" }, - { .fc_id = 560, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM3_MC0_SEI_SEVERE" }, - { .fc_id = 561, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM3_MC0_SEI_NON_SEVERE" }, - { .fc_id = 562, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM3_MC1_SEI_SEVERE" }, - { .fc_id = 563, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM3_MC1_SEI_NON_SEVERE" }, - { .fc_id = 564, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM4_MC0_SEI_SEVERE" }, - { .fc_id = 565, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM4_MC0_SEI_NON_SEVERE" }, - { .fc_id = 566, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM4_MC1_SEI_SEVERE" }, - { .fc_id = 567, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM4_MC1_SEI_NON_SEVERE" }, - { .fc_id = 568, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM5_MC0_SEI_SEVERE" }, - { .fc_id = 569, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM5_MC0_SEI_NON_SEVERE" }, - { .fc_id = 570, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 1, .name = "HBM5_MC1_SEI_SEVERE" }, - { .fc_id = 571, .cpu_id = 116, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM5_MC1_SEI_NON_SEVERE" }, - { .fc_id = 572, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC0_AXI_ERR_RSPONSE" }, - { .fc_id = 573, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC1_AXI_ERR_RSPONSE" }, - { .fc_id = 574, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC2_AXI_ERR_RSPONSE" }, - { .fc_id = 575, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC3_AXI_ERR_RSPONSE" }, - { .fc_id = 576, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC4_AXI_ERR_RSPONSE" }, - { .fc_id = 577, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC5_AXI_ERR_RSPONSE" }, - { .fc_id = 578, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC6_AXI_ERR_RSPONSE" }, - { .fc_id = 579, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC7_AXI_ERR_RSPONSE" }, - { .fc_id = 580, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC8_AXI_ERR_RSPONSE" }, - { .fc_id = 581, .cpu_id = 117, .valid = 1, - .msg = 0, .reset = 1, .name = "DEC9_AXI_ERR_RSPONSE" }, - { .fc_id = 582, .cpu_id = 118, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 583, .cpu_id = 119, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 584, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF0_FATAL" }, - { .fc_id = 585, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF1_FATAL" }, - { .fc_id = 586, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF2_FATAL" }, - { .fc_id = 587, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF3_FATAL" }, - { .fc_id = 588, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF8_FATAL" }, - { .fc_id = 589, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF9_FATAL" }, - { .fc_id = 590, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF10_FATAL" }, - { .fc_id = 591, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF11_FATAL" }, - { .fc_id = 592, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF7_FATAL" }, - { .fc_id = 593, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF6_FATAL" }, - { .fc_id = 594, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF5_FATAL" }, - { .fc_id = 595, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF4_FATAL" }, - { .fc_id = 596, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF15_FATAL" }, - { .fc_id = 597, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF14_FATAL" }, - { .fc_id = 598, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF13_FATAL" }, - { .fc_id = 599, .cpu_id = 120, .valid = 1, - .msg = 0, .reset = 1, .name = "HIF12_FATAL" }, - { .fc_id = 600, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC0_AXI_ERROR_RESPONSE" }, - { .fc_id = 601, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC1_AXI_ERROR_RESPONSE" }, - { .fc_id = 602, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC2_AXI_ERROR_RESPONSE" }, - { .fc_id = 603, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC3_AXI_ERROR_RESPONSE" }, - { .fc_id = 604, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC4_AXI_ERROR_RESPONSE" }, - { .fc_id = 605, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC5_AXI_ERROR_RESPONSE" }, - { .fc_id = 606, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC6_AXI_ERROR_RESPONSE" }, - { .fc_id = 607, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC7_AXI_ERROR_RESPONSE" }, - { .fc_id = 608, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC8_AXI_ERROR_RESPONSE" }, - { .fc_id = 609, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC9_AXI_ERROR_RESPONSE" }, - { .fc_id = 610, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC10_AXI_ERROR_RESPONSE" }, - { .fc_id = 611, .cpu_id = 121, .valid = 1, - .msg = 0, .reset = 1, .name = "NIC11_AXI_ERROR_RESPONSE" }, - { .fc_id = 612, .cpu_id = 122, .valid = 1, - .msg = 0, .reset = 1, .name = "SM0_AXI_ERROR_RESPONSE" }, - { .fc_id = 613, .cpu_id = 122, .valid = 1, - .msg = 0, .reset = 1, .name = "SM1_AXI_ERROR_RESPONSE" }, - { .fc_id = 614, .cpu_id = 122, .valid = 1, - .msg = 0, .reset = 1, .name = "SM2_AXI_ERROR_RESPONSE" }, - { .fc_id = 615, .cpu_id = 122, .valid = 1, - .msg = 0, .reset = 1, .name = "SM3_AXI_ERROR_RESPONSE" }, - { .fc_id = 616, .cpu_id = 123, .valid = 1, - .msg = 0, .reset = 1, .name = "ARC_AXI_ERROR_RESPONSE" }, - { .fc_id = 617, .cpu_id = 124, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 618, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 619, .cpu_id = 125, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_FLR_REQUESTED" }, - { .fc_id = 620, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 621, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 622, .cpu_id = 125, .valid = 1, - .msg = 0, .reset = 1, .name = "PCIE_APB_TIMEOUT" }, - { .fc_id = 623, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 624, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 625, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 626, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 627, .cpu_id = 125, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_FATAL_ERR" }, - { .fc_id = 628, .cpu_id = 125, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 629, .cpu_id = 126, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 630, .cpu_id = 127, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 631, .cpu_id = 128, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_P2P_MSIX" }, - { .fc_id = 632, .cpu_id = 129, .valid = 1, - .msg = 0, .reset = 0, .name = "PCIE_DRAIN_COMPLETE" }, - { .fc_id = 633, .cpu_id = 130, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC0_BMON_SPMU" }, - { .fc_id = 634, .cpu_id = 131, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC0_KERNEL_ERR" }, - { .fc_id = 635, .cpu_id = 132, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC1_BMON_SPMU" }, - { .fc_id = 636, .cpu_id = 133, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC1_KERNEL_ERR" }, - { .fc_id = 637, .cpu_id = 134, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC2_BMON_SPMU" }, - { .fc_id = 638, .cpu_id = 135, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC2_KERNEL_ERR" }, - { .fc_id = 639, .cpu_id = 136, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC3_BMON_SPMU" }, - { .fc_id = 640, .cpu_id = 137, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC3_KERNEL_ERR" }, - { .fc_id = 641, .cpu_id = 138, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC4_BMON_SPMU" }, - { .fc_id = 642, .cpu_id = 139, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC4_KERNEL_ERR" }, - { .fc_id = 643, .cpu_id = 140, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC5_BMON_SPMU" }, - { .fc_id = 644, .cpu_id = 141, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC5_KERNEL_ERR" }, - { .fc_id = 645, .cpu_id = 150, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC6_BMON_SPMU" }, - { .fc_id = 646, .cpu_id = 151, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC6_KERNEL_ERR" }, - { .fc_id = 647, .cpu_id = 152, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC7_BMON_SPMU" }, - { .fc_id = 648, .cpu_id = 153, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC7_KERNEL_ERR" }, - { .fc_id = 649, .cpu_id = 146, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC8_BMON_SPMU" }, - { .fc_id = 650, .cpu_id = 147, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC8_KERNEL_ERR" }, - { .fc_id = 651, .cpu_id = 148, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC9_BMON_SPMU" }, - { .fc_id = 652, .cpu_id = 149, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC9_KERNEL_ERR" }, - { .fc_id = 653, .cpu_id = 142, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC10_BMON_SPMU" }, - { .fc_id = 654, .cpu_id = 143, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC10_KERNEL_ERR" }, - { .fc_id = 655, .cpu_id = 144, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC11_BMON_SPMU" }, - { .fc_id = 656, .cpu_id = 145, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC11_KERNEL_ERR" }, - { .fc_id = 657, .cpu_id = 162, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC12_BMON_SPMU" }, - { .fc_id = 658, .cpu_id = 163, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC12_KERNEL_ERR" }, - { .fc_id = 659, .cpu_id = 164, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC13_BMON_SPMU" }, - { .fc_id = 660, .cpu_id = 165, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC13_KERNEL_ERR" }, - { .fc_id = 661, .cpu_id = 158, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC14_BMON_SPMU" }, - { .fc_id = 662, .cpu_id = 159, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC14_KERNEL_ERR" }, - { .fc_id = 663, .cpu_id = 160, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC15_BMON_SPMU" }, - { .fc_id = 664, .cpu_id = 161, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC15_KERNEL_ERR" }, - { .fc_id = 665, .cpu_id = 154, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC16_BMON_SPMU" }, - { .fc_id = 666, .cpu_id = 155, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC16_KERNEL_ERR" }, - { .fc_id = 667, .cpu_id = 156, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC17_BMON_SPMU" }, - { .fc_id = 668, .cpu_id = 157, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC17_KERNEL_ERR" }, - { .fc_id = 669, .cpu_id = 166, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC18_BMON_SPMU" }, - { .fc_id = 670, .cpu_id = 167, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC18_KERNEL_ERR" }, - { .fc_id = 671, .cpu_id = 168, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC19_BMON_SPMU" }, - { .fc_id = 672, .cpu_id = 169, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC19_KERNEL_ERR" }, - { .fc_id = 673, .cpu_id = 170, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC20_BMON_SPMU" }, - { .fc_id = 674, .cpu_id = 171, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC20_KERNEL_ERR" }, - { .fc_id = 675, .cpu_id = 172, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC21_BMON_SPMU" }, - { .fc_id = 676, .cpu_id = 173, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC21_KERNEL_ERR" }, - { .fc_id = 677, .cpu_id = 174, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC22_BMON_SPMU" }, - { .fc_id = 678, .cpu_id = 175, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC22_KERNEL_ERR" }, - { .fc_id = 679, .cpu_id = 176, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC23_BMON_SPMU" }, - { .fc_id = 680, .cpu_id = 177, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC23_KERNEL_ERR" }, - { .fc_id = 681, .cpu_id = 178, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC24_BMON_SPMU" }, - { .fc_id = 682, .cpu_id = 179, .valid = 1, - .msg = 0, .reset = 0, .name = "TPC24_KERNEL_ERR" }, - { .fc_id = 683, .cpu_id = 180, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 684, .cpu_id = 180, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 685, .cpu_id = 180, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 686, .cpu_id = 180, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 687, .cpu_id = 180, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 688, .cpu_id = 180, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_CTRL_BMON_SPMU" }, - { .fc_id = 689, .cpu_id = 180, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_SBTE_BMON_SPMU" }, - { .fc_id = 690, .cpu_id = 180, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_WAP_BMON_SPMU" }, - { .fc_id = 691, .cpu_id = 180, .valid = 1, - .msg = 0, .reset = 0, .name = "MME0_WAP_SOURCE_RESULT_INVALID" }, - { .fc_id = 692, .cpu_id = 181, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 693, .cpu_id = 181, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 694, .cpu_id = 181, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 695, .cpu_id = 181, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 696, .cpu_id = 181, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 697, .cpu_id = 181, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_CTRL_BMON_SPMU" }, - { .fc_id = 698, .cpu_id = 181, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_SBTE_BMON_SPMU" }, - { .fc_id = 699, .cpu_id = 181, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_WAP_BMON_SPMU" }, - { .fc_id = 700, .cpu_id = 181, .valid = 1, - .msg = 0, .reset = 0, .name = "MME1_WAP_SOURCE_RESULT_INVALID" }, - { .fc_id = 701, .cpu_id = 182, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 702, .cpu_id = 182, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 703, .cpu_id = 182, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 704, .cpu_id = 182, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 705, .cpu_id = 182, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 706, .cpu_id = 182, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_CTRL_BMON_SPMU" }, - { .fc_id = 707, .cpu_id = 182, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_SBTE_BMON_SPMU" }, - { .fc_id = 708, .cpu_id = 182, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_WAP_BMON_SPMU" }, - { .fc_id = 709, .cpu_id = 182, .valid = 1, - .msg = 0, .reset = 0, .name = "MME2_WAP_SOURCE_RESULT_INVALID" }, - { .fc_id = 710, .cpu_id = 183, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 711, .cpu_id = 183, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 712, .cpu_id = 183, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 713, .cpu_id = 183, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 714, .cpu_id = 183, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 715, .cpu_id = 183, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_CTRL_BMON_SPMU" }, - { .fc_id = 716, .cpu_id = 183, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_SBTE_BMON_SPMU" }, - { .fc_id = 717, .cpu_id = 183, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_WAP_BMON_SPMU" }, - { .fc_id = 718, .cpu_id = 183, .valid = 1, - .msg = 0, .reset = 0, .name = "MME3_WAP_SOURCE_RESULT_INVALID" }, - { .fc_id = 719, .cpu_id = 184, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 720, .cpu_id = 184, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU0_PAGE_FAULT_OR_WR_PERM" }, - { .fc_id = 721, .cpu_id = 184, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU0_SECURITY_ERROR" }, - { .fc_id = 722, .cpu_id = 185, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 723, .cpu_id = 185, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU1_PAGE_FAULT_WR_PERM" }, - { .fc_id = 724, .cpu_id = 185, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU1_SECURITY_ERROR" }, - { .fc_id = 725, .cpu_id = 186, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 726, .cpu_id = 186, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU2_PAGE_FAULT_WR_PERM" }, - { .fc_id = 727, .cpu_id = 186, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU2_SECURITY_ERROR" }, - { .fc_id = 728, .cpu_id = 187, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 729, .cpu_id = 187, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU3_PAGE_FAULT_WR_PERM" }, - { .fc_id = 730, .cpu_id = 187, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU3_SECURITY_ERROR" }, - { .fc_id = 731, .cpu_id = 188, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 732, .cpu_id = 188, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU8_PAGE_FAULT_WR_PERM" }, - { .fc_id = 733, .cpu_id = 188, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU8_SECURITY_ERROR" }, - { .fc_id = 734, .cpu_id = 189, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 735, .cpu_id = 189, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU9_PAGE_FAULT_WR_PERM" }, - { .fc_id = 736, .cpu_id = 189, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU9_SECURITY_ERROR" }, - { .fc_id = 737, .cpu_id = 190, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 738, .cpu_id = 190, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU10_PAGE_FAULT_WR_PERM" }, - { .fc_id = 739, .cpu_id = 190, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU10_SECURITY_ERROR" }, - { .fc_id = 740, .cpu_id = 191, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 741, .cpu_id = 191, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU11_PAGE_FAULT_WR_PERM" }, - { .fc_id = 742, .cpu_id = 191, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU11_SECURITY_ERROR" }, - { .fc_id = 743, .cpu_id = 192, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 744, .cpu_id = 192, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU7_PAGE_FAULT_WR_PERM" }, - { .fc_id = 745, .cpu_id = 192, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU7_SECURITY_ERROR" }, - { .fc_id = 746, .cpu_id = 193, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 747, .cpu_id = 193, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU6_PAGE_FAULT_WR_PERM" }, - { .fc_id = 748, .cpu_id = 193, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU6_SECURITY_ERROR" }, - { .fc_id = 749, .cpu_id = 194, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 750, .cpu_id = 194, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU5_PAGE_FAULT_WR_PERM" }, - { .fc_id = 751, .cpu_id = 194, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU5_SECURITY_ERROR" }, - { .fc_id = 752, .cpu_id = 195, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 753, .cpu_id = 195, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU4_PAGE_FAULT_WR_PERM" }, - { .fc_id = 754, .cpu_id = 195, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU4_SECURITY_ERROR" }, - { .fc_id = 755, .cpu_id = 196, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 756, .cpu_id = 196, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU15_PAGE_FAULT_WR_PERM" }, - { .fc_id = 757, .cpu_id = 196, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU15_SECURITY_ERROR" }, - { .fc_id = 758, .cpu_id = 197, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 759, .cpu_id = 197, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU14_PAGE_FAULT_WR_PERM" }, - { .fc_id = 760, .cpu_id = 197, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU14_SECURITY_ERROR" }, - { .fc_id = 761, .cpu_id = 198, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 762, .cpu_id = 198, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU13_PAGE_FAULT_WR_PERM" }, - { .fc_id = 763, .cpu_id = 198, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU13_SECURITY_ERROR" }, - { .fc_id = 764, .cpu_id = 199, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 765, .cpu_id = 199, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU12_PAGE_FAULT_WR_PERM" }, - { .fc_id = 766, .cpu_id = 199, .valid = 1, - .msg = 0, .reset = 1, .name = "HMMU12_SECURITY_ERROR" }, - { .fc_id = 767, .cpu_id = 200, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 768, .cpu_id = 201, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU0_PAGE_FAULT_WR_PERM" }, - { .fc_id = 769, .cpu_id = 202, .valid = 1, - .msg = 0, .reset = 1, .name = "PMMU0_SECURITY_ERROR" }, - { .fc_id = 770, .cpu_id = 203, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA2_BM_SPMU" }, - { .fc_id = 771, .cpu_id = 204, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 772, .cpu_id = 205, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA3_BM_SPMU" }, - { .fc_id = 773, .cpu_id = 206, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 774, .cpu_id = 207, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA0_BM_SPMU" }, - { .fc_id = 775, .cpu_id = 208, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 776, .cpu_id = 209, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA1_BM_SPMU" }, - { .fc_id = 777, .cpu_id = 210, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 778, .cpu_id = 211, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA6_BM_SPMU" }, - { .fc_id = 779, .cpu_id = 212, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 780, .cpu_id = 213, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA7_BM_SPMU" }, - { .fc_id = 781, .cpu_id = 214, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 782, .cpu_id = 215, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA4_BM_SPMU" }, - { .fc_id = 783, .cpu_id = 216, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 784, .cpu_id = 217, .valid = 1, - .msg = 0, .reset = 0, .name = "HDMA5_BM_SPMU" }, - { .fc_id = 785, .cpu_id = 218, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 786, .cpu_id = 219, .valid = 1, - .msg = 0, .reset = 0, .name = "KDMA_BM_SPMU" }, - { .fc_id = 787, .cpu_id = 220, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 788, .cpu_id = 221, .valid = 1, - .msg = 0, .reset = 0, .name = "PDMA0_BM_SPMU" }, - { .fc_id = 789, .cpu_id = 222, .valid = 1, - .msg = 0, .reset = 0, .name = "PDMA1_BM_SPMU" }, - { .fc_id = 790, .cpu_id = 223, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM0_MC0_SPI" }, - { .fc_id = 791, .cpu_id = 224, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM0_MC1_SPI" }, - { .fc_id = 792, .cpu_id = 225, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM1_MC0_SPI" }, - { .fc_id = 793, .cpu_id = 226, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM1_MC1_SPI" }, - { .fc_id = 794, .cpu_id = 227, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM2_MC0_SPI" }, - { .fc_id = 795, .cpu_id = 228, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM2_MC1_SPI" }, - { .fc_id = 796, .cpu_id = 229, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM3_MC0_SPI" }, - { .fc_id = 797, .cpu_id = 230, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM3_MC1_SPI" }, - { .fc_id = 798, .cpu_id = 231, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM4_MC0_SPI" }, - { .fc_id = 799, .cpu_id = 232, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM4_MC1_SPI" }, - { .fc_id = 800, .cpu_id = 233, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM5_MC0_SPI" }, - { .fc_id = 801, .cpu_id = 234, .valid = 1, - .msg = 0, .reset = 0, .name = "HBM5_MC1_SPI" }, - { .fc_id = 802, .cpu_id = 235, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 803, .cpu_id = 236, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 804, .cpu_id = 237, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 805, .cpu_id = 238, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 806, .cpu_id = 239, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 807, .cpu_id = 240, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 808, .cpu_id = 241, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 809, .cpu_id = 242, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 810, .cpu_id = 243, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 811, .cpu_id = 244, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 812, .cpu_id = 245, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 813, .cpu_id = 246, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 814, .cpu_id = 247, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 815, .cpu_id = 248, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 816, .cpu_id = 249, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 817, .cpu_id = 250, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 818, .cpu_id = 251, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 819, .cpu_id = 252, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 820, .cpu_id = 253, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 821, .cpu_id = 254, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 822, .cpu_id = 255, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 823, .cpu_id = 256, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 824, .cpu_id = 257, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 825, .cpu_id = 258, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 826, .cpu_id = 259, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 827, .cpu_id = 260, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 828, .cpu_id = 261, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 829, .cpu_id = 262, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 830, .cpu_id = 263, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 831, .cpu_id = 264, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 832, .cpu_id = 265, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 833, .cpu_id = 266, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 834, .cpu_id = 267, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 835, .cpu_id = 268, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 836, .cpu_id = 269, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 837, .cpu_id = 270, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 838, .cpu_id = 271, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 839, .cpu_id = 272, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 840, .cpu_id = 273, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 841, .cpu_id = 274, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 842, .cpu_id = 275, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 843, .cpu_id = 276, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 844, .cpu_id = 277, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 845, .cpu_id = 278, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 846, .cpu_id = 279, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 847, .cpu_id = 280, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 848, .cpu_id = 281, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 849, .cpu_id = 282, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 850, .cpu_id = 283, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 851, .cpu_id = 284, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 852, .cpu_id = 285, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 853, .cpu_id = 286, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 854, .cpu_id = 287, .valid = 0, - .msg = 0, .reset = 1, .name = "" }, - { .fc_id = 855, .cpu_id = 288, .valid = 0, - .msg = 0, .reset = 1, .name = "" }, - { .fc_id = 856, .cpu_id = 289, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 857, .cpu_id = 290, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 858, .cpu_id = 291, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 859, .cpu_id = 292, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 860, .cpu_id = 293, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 861, .cpu_id = 294, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 862, .cpu_id = 295, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 863, .cpu_id = 296, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 864, .cpu_id = 297, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 865, .cpu_id = 298, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 866, .cpu_id = 299, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 867, .cpu_id = 300, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 868, .cpu_id = 301, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 869, .cpu_id = 302, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 870, .cpu_id = 303, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 871, .cpu_id = 304, .valid = 1, - .msg = 0, .reset = 1, .name = "RPM_ERROR_OR_DRAIN" }, - { .fc_id = 872, .cpu_id = 305, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 873, .cpu_id = 306, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 874, .cpu_id = 307, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 875, .cpu_id = 308, .valid = 1, - .msg = 0, .reset = 0, .name = "RAZWI_OR_PID_MIN_MAX_INTERRUPT" }, - { .fc_id = 876, .cpu_id = 309, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 877, .cpu_id = 310, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 878, .cpu_id = 311, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 879, .cpu_id = 312, .valid = 0, - .msg = 0, .reset = 1, .name = "" }, - { .fc_id = 880, .cpu_id = 313, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 881, .cpu_id = 314, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 882, .cpu_id = 315, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 883, .cpu_id = 316, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 884, .cpu_id = 317, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 885, .cpu_id = 318, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 886, .cpu_id = 319, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 887, .cpu_id = 320, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 888, .cpu_id = 321, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 889, .cpu_id = 322, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 890, .cpu_id = 323, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 891, .cpu_id = 324, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 892, .cpu_id = 325, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 893, .cpu_id = 326, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 894, .cpu_id = 327, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 895, .cpu_id = 328, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 896, .cpu_id = 329, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC0_SPI" }, - { .fc_id = 897, .cpu_id = 329, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC0_BMON_SPMU" }, - { .fc_id = 898, .cpu_id = 330, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC1_SPI" }, - { .fc_id = 899, .cpu_id = 330, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC1_BMON_SPMU" }, - { .fc_id = 900, .cpu_id = 331, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC2_SPI" }, - { .fc_id = 901, .cpu_id = 331, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC2_BMON_SPMU" }, - { .fc_id = 902, .cpu_id = 332, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC3_SPI" }, - { .fc_id = 903, .cpu_id = 332, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC3_BMON_SPMU" }, - { .fc_id = 904, .cpu_id = 333, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC4_SPI" }, - { .fc_id = 905, .cpu_id = 333, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC4_BMON_SPMU" }, - { .fc_id = 906, .cpu_id = 334, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC5_SPI" }, - { .fc_id = 907, .cpu_id = 334, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC5_BMON_SPMU" }, - { .fc_id = 908, .cpu_id = 335, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC6_SPI" }, - { .fc_id = 909, .cpu_id = 335, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC6_BMON_SPMU" }, - { .fc_id = 910, .cpu_id = 336, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC7_SPI" }, - { .fc_id = 911, .cpu_id = 336, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC7_BMON_SPMU" }, - { .fc_id = 912, .cpu_id = 337, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC8_SPI" }, - { .fc_id = 913, .cpu_id = 337, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC8_BMON_SPMU" }, - { .fc_id = 914, .cpu_id = 338, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC9_SPI" }, - { .fc_id = 915, .cpu_id = 338, .valid = 1, - .msg = 0, .reset = 0, .name = "DEC9_BMON_SPMU" }, - { .fc_id = 916, .cpu_id = 339, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 917, .cpu_id = 340, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 918, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 919, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 920, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 921, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 922, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 923, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 924, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 925, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 926, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 927, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 928, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 929, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 930, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 931, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 932, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 933, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 934, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 935, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 936, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 937, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 938, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 939, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 940, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 941, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 942, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 943, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 944, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 945, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 946, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 947, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 948, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 949, .cpu_id = 341, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 950, .cpu_id = 342, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 951, .cpu_id = 343, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC0_BMON_SPMU" }, - { .fc_id = 952, .cpu_id = 343, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC0_SW_ERROR" }, - { .fc_id = 953, .cpu_id = 343, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 954, .cpu_id = 343, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 955, .cpu_id = 344, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC1_BMON_SPMU" }, - { .fc_id = 956, .cpu_id = 344, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC1_SW_ERROR" }, - { .fc_id = 957, .cpu_id = 344, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 958, .cpu_id = 344, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 959, .cpu_id = 345, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC2_BMON_SPMU" }, - { .fc_id = 960, .cpu_id = 345, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC2_SW_ERROR" }, - { .fc_id = 961, .cpu_id = 345, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 962, .cpu_id = 345, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 963, .cpu_id = 346, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC3_BMON_SPMU" }, - { .fc_id = 964, .cpu_id = 346, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC3_SW_ERROR" }, - { .fc_id = 965, .cpu_id = 346, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 966, .cpu_id = 346, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 967, .cpu_id = 347, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC4_BMON_SPMU" }, - { .fc_id = 968, .cpu_id = 347, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC4_SW_ERROR" }, - { .fc_id = 969, .cpu_id = 347, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 970, .cpu_id = 347, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 971, .cpu_id = 348, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC5_BMON_SPMU" }, - { .fc_id = 972, .cpu_id = 348, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC5_SW_ERROR" }, - { .fc_id = 973, .cpu_id = 348, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 974, .cpu_id = 348, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 975, .cpu_id = 349, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC6_BMON_SPMU" }, - { .fc_id = 976, .cpu_id = 349, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC6_SW_ERROR" }, - { .fc_id = 977, .cpu_id = 349, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 978, .cpu_id = 349, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 979, .cpu_id = 350, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC7_BMON_SPMU" }, - { .fc_id = 980, .cpu_id = 350, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC7_SW_ERROR" }, - { .fc_id = 981, .cpu_id = 350, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 982, .cpu_id = 350, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 983, .cpu_id = 351, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC8_BMON_SPMU" }, - { .fc_id = 984, .cpu_id = 351, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC8_SW_ERROR" }, - { .fc_id = 985, .cpu_id = 351, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 986, .cpu_id = 351, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 987, .cpu_id = 352, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC9_BMON_SPMU" }, - { .fc_id = 988, .cpu_id = 352, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC9_SW_ERROR" }, - { .fc_id = 989, .cpu_id = 352, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 990, .cpu_id = 352, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 991, .cpu_id = 353, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC10_BMON_SPMU" }, - { .fc_id = 992, .cpu_id = 353, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC10_SW_ERROR" }, - { .fc_id = 993, .cpu_id = 353, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 994, .cpu_id = 353, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 995, .cpu_id = 354, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC11_BMON_SPMU" }, - { .fc_id = 996, .cpu_id = 354, .valid = 1, - .msg = 0, .reset = 0, .name = "NIC11_SW_ERROR" }, - { .fc_id = 997, .cpu_id = 354, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 998, .cpu_id = 354, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 999, .cpu_id = 355, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1000, .cpu_id = 356, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1001, .cpu_id = 357, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1002, .cpu_id = 358, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1003, .cpu_id = 359, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1004, .cpu_id = 360, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1005, .cpu_id = 361, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1006, .cpu_id = 362, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1007, .cpu_id = 363, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1008, .cpu_id = 368, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1009, .cpu_id = 369, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1010, .cpu_id = 366, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1011, .cpu_id = 367, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1012, .cpu_id = 364, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1013, .cpu_id = 365, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1014, .cpu_id = 374, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1015, .cpu_id = 375, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1016, .cpu_id = 372, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1017, .cpu_id = 373, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1018, .cpu_id = 370, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1019, .cpu_id = 371, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1020, .cpu_id = 376, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1021, .cpu_id = 377, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1022, .cpu_id = 378, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1023, .cpu_id = 379, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1024, .cpu_id = 380, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1025, .cpu_id = 381, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1026, .cpu_id = 382, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1027, .cpu_id = 383, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1028, .cpu_id = 384, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1029, .cpu_id = 385, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1030, .cpu_id = 386, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1031, .cpu_id = 387, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1032, .cpu_id = 388, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1033, .cpu_id = 389, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1034, .cpu_id = 390, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1035, .cpu_id = 391, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1036, .cpu_id = 392, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1037, .cpu_id = 393, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1038, .cpu_id = 394, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1039, .cpu_id = 395, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1040, .cpu_id = 396, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1041, .cpu_id = 397, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1042, .cpu_id = 398, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1043, .cpu_id = 399, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1044, .cpu_id = 400, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1045, .cpu_id = 401, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1046, .cpu_id = 402, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1047, .cpu_id = 403, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1048, .cpu_id = 404, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1049, .cpu_id = 405, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1050, .cpu_id = 406, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1051, .cpu_id = 407, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1052, .cpu_id = 408, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1053, .cpu_id = 409, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1054, .cpu_id = 410, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1055, .cpu_id = 411, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1056, .cpu_id = 412, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1057, .cpu_id = 413, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1058, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1059, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1060, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1061, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1062, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1063, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1064, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1065, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1066, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1067, .cpu_id = 414, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1068, .cpu_id = 415, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1069, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1070, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1071, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1072, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1073, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1074, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1075, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1076, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1077, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1078, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1079, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1080, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1081, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1082, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1083, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1084, .cpu_id = 416, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1085, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1086, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1087, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1088, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1089, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1090, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1091, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1092, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1093, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1094, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1095, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1096, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1097, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1098, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1099, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1100, .cpu_id = 417, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1101, .cpu_id = 418, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1102, .cpu_id = 419, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1103, .cpu_id = 420, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1104, .cpu_id = 421, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1105, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1106, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1107, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1108, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1109, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1110, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1111, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1112, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1113, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1114, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1115, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1116, .cpu_id = 422, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1117, .cpu_id = 423, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1118, .cpu_id = 424, .valid = 1, - .msg = 0, .reset = 0, .name = "ROTATOR0_SERR" }, - { .fc_id = 1119, .cpu_id = 425, .valid = 1, - .msg = 0, .reset = 0, .name = "ROTATOR1_SERR" }, - { .fc_id = 1120, .cpu_id = 426, .valid = 1, - .msg = 0, .reset = 1, .name = "ROTATOR0_DERR" }, - { .fc_id = 1121, .cpu_id = 427, .valid = 1, - .msg = 0, .reset = 1, .name = "ROTATOR1_DERR" }, - { .fc_id = 1122, .cpu_id = 428, .valid = 1, - .msg = 0, .reset = 1, .name = "ROTATOR0_AXI_ERROR_RESPONSE" }, - { .fc_id = 1123, .cpu_id = 429, .valid = 1, - .msg = 0, .reset = 1, .name = "ROTATOR1_AXI_ERROR_RESPONSE" }, - { .fc_id = 1124, .cpu_id = 430, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1125, .cpu_id = 431, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1126, .cpu_id = 432, .valid = 1, - .msg = 0, .reset = 0, .name = "ROTATOR0_BMON_SPMU" }, - { .fc_id = 1127, .cpu_id = 433, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1128, .cpu_id = 434, .valid = 1, - .msg = 0, .reset = 0, .name = "ROTATOR1_BMON_SPMU" }, - { .fc_id = 1129, .cpu_id = 435, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1130, .cpu_id = 436, .valid = 1, - .msg = 0, .reset = 0, .name = "SM0_BMON_SPMU" }, - { .fc_id = 1131, .cpu_id = 437, .valid = 1, - .msg = 0, .reset = 0, .name = "SM1_BMON_SPMU" }, - { .fc_id = 1132, .cpu_id = 438, .valid = 1, - .msg = 0, .reset = 0, .name = "SM2_BMON_SPMU" }, - { .fc_id = 1133, .cpu_id = 439, .valid = 1, - .msg = 0, .reset = 0, .name = "SM3_BMON_SPMU" }, - { .fc_id = 1134, .cpu_id = 440, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1135, .cpu_id = 441, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1136, .cpu_id = 442, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1137, .cpu_id = 443, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1138, .cpu_id = 444, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1139, .cpu_id = 445, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1140, .cpu_id = 446, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1141, .cpu_id = 447, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1142, .cpu_id = 448, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1143, .cpu_id = 449, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1144, .cpu_id = 450, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1145, .cpu_id = 451, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1146, .cpu_id = 452, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1147, .cpu_id = 453, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1148, .cpu_id = 454, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1149, .cpu_id = 455, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1150, .cpu_id = 456, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1151, .cpu_id = 457, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1152, .cpu_id = 458, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1153, .cpu_id = 459, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1154, .cpu_id = 460, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1155, .cpu_id = 461, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1156, .cpu_id = 462, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1157, .cpu_id = 463, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1158, .cpu_id = 464, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1159, .cpu_id = 465, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1160, .cpu_id = 466, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1161, .cpu_id = 467, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1162, .cpu_id = 468, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1163, .cpu_id = 469, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1164, .cpu_id = 470, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1165, .cpu_id = 471, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1166, .cpu_id = 472, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1167, .cpu_id = 473, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1168, .cpu_id = 474, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1169, .cpu_id = 475, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1170, .cpu_id = 476, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1171, .cpu_id = 477, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1172, .cpu_id = 478, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1173, .cpu_id = 479, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1174, .cpu_id = 480, .valid = 1, - .msg = 1, .reset = 0, .name = "PSOC_DMA_QM" }, - { .fc_id = 1175, .cpu_id = 481, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1176, .cpu_id = 482, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1177, .cpu_id = 483, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1178, .cpu_id = 484, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1179, .cpu_id = 485, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1180, .cpu_id = 486, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1181, .cpu_id = 487, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1182, .cpu_id = 488, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1183, .cpu_id = 489, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1184, .cpu_id = 490, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1185, .cpu_id = 491, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1186, .cpu_id = 492, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1187, .cpu_id = 493, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1188, .cpu_id = 494, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1189, .cpu_id = 495, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1190, .cpu_id = 496, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1191, .cpu_id = 497, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1192, .cpu_id = 498, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1193, .cpu_id = 499, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1194, .cpu_id = 500, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1195, .cpu_id = 501, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1196, .cpu_id = 502, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1197, .cpu_id = 503, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1198, .cpu_id = 504, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1199, .cpu_id = 505, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1200, .cpu_id = 506, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1201, .cpu_id = 507, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1202, .cpu_id = 508, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1203, .cpu_id = 509, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1204, .cpu_id = 510, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1205, .cpu_id = 511, .valid = 0, - .msg = 0, .reset = 0, .name = "" }, - { .fc_id = 1206, .cpu_id = 512, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC0_QM" }, - { .fc_id = 1207, .cpu_id = 513, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC1_QM" }, - { .fc_id = 1208, .cpu_id = 514, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC2_QM" }, - { .fc_id = 1209, .cpu_id = 515, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC3_QM" }, - { .fc_id = 1210, .cpu_id = 516, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC4_QM" }, - { .fc_id = 1211, .cpu_id = 517, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC5_QM" }, - { .fc_id = 1212, .cpu_id = 518, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC6_QM" }, - { .fc_id = 1213, .cpu_id = 519, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC7_QM" }, - { .fc_id = 1214, .cpu_id = 520, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC8_QM" }, - { .fc_id = 1215, .cpu_id = 521, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC9_QM" }, - { .fc_id = 1216, .cpu_id = 522, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC10_QM" }, - { .fc_id = 1217, .cpu_id = 523, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC11_QM" }, - { .fc_id = 1218, .cpu_id = 524, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC12_QM" }, - { .fc_id = 1219, .cpu_id = 525, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC13_QM" }, - { .fc_id = 1220, .cpu_id = 526, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC14_QM" }, - { .fc_id = 1221, .cpu_id = 527, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC15_QM" }, - { .fc_id = 1222, .cpu_id = 528, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC16_QM" }, - { .fc_id = 1223, .cpu_id = 529, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC17_QM" }, - { .fc_id = 1224, .cpu_id = 530, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC18_QM" }, - { .fc_id = 1225, .cpu_id = 531, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC19_QM" }, - { .fc_id = 1226, .cpu_id = 532, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC20_QM" }, - { .fc_id = 1227, .cpu_id = 533, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC21_QM" }, - { .fc_id = 1228, .cpu_id = 534, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC22_QM" }, - { .fc_id = 1229, .cpu_id = 535, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC23_QM" }, - { .fc_id = 1230, .cpu_id = 536, .valid = 1, - .msg = 1, .reset = 0, .name = "TPC24_QM" }, - { .fc_id = 1231, .cpu_id = 537, .valid = 0, - .msg = 1, .reset = 0, .name = "" }, - { .fc_id = 1232, .cpu_id = 538, .valid = 1, - .msg = 1, .reset = 0, .name = "MME0_QM" }, - { .fc_id = 1233, .cpu_id = 539, .valid = 1, - .msg = 1, .reset = 0, .name = "MME1_QM" }, - { .fc_id = 1234, .cpu_id = 540, .valid = 1, - .msg = 1, .reset = 0, .name = "MME2_QM" }, - { .fc_id = 1235, .cpu_id = 541, .valid = 1, - .msg = 1, .reset = 0, .name = "MME3_QM" }, - { .fc_id = 1236, .cpu_id = 542, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA2_QM" }, - { .fc_id = 1237, .cpu_id = 543, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA3_QM" }, - { .fc_id = 1238, .cpu_id = 544, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA0_QM" }, - { .fc_id = 1239, .cpu_id = 545, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA1_QM" }, - { .fc_id = 1240, .cpu_id = 546, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA6_QM" }, - { .fc_id = 1241, .cpu_id = 547, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA7_QM" }, - { .fc_id = 1242, .cpu_id = 548, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA4_QM" }, - { .fc_id = 1243, .cpu_id = 549, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA5_QM" }, - { .fc_id = 1244, .cpu_id = 550, .valid = 1, - .msg = 1, .reset = 0, .name = "PDMA0_QM" }, - { .fc_id = 1245, .cpu_id = 551, .valid = 1, - .msg = 1, .reset = 0, .name = "PDMA1_QM" }, - { .fc_id = 1246, .cpu_id = 552, .valid = 1, - .msg = 1, .reset = 0, .name = "PI_UPDATE" }, - { .fc_id = 1247, .cpu_id = 553, .valid = 1, - .msg = 1, .reset = 0, .name = "HALT_MACHINE" }, - { .fc_id = 1248, .cpu_id = 554, .valid = 1, - .msg = 1, .reset = 0, .name = "INTS_REGISTER" }, - { .fc_id = 1249, .cpu_id = 555, .valid = 1, - .msg = 1, .reset = 0, .name = "ROT0_QM" }, - { .fc_id = 1250, .cpu_id = 556, .valid = 1, - .msg = 1, .reset = 0, .name = "ROT1_QM" }, - { .fc_id = 1251, .cpu_id = 557, .valid = 1, - .msg = 1, .reset = 0, .name = "SOFT_RESET" }, - { .fc_id = 1252, .cpu_id = 558, .valid = 1, - .msg = 1, .reset = 0, .name = "CPLD_SHUTDOWN_CAUSE" }, - { .fc_id = 1253, .cpu_id = 559, .valid = 1, - .msg = 1, .reset = 0, .name = "FIX_POWER_ENV_S" }, - { .fc_id = 1254, .cpu_id = 560, .valid = 1, - .msg = 1, .reset = 0, .name = "FIX_POWER_ENV_E" }, - { .fc_id = 1255, .cpu_id = 561, .valid = 1, - .msg = 1, .reset = 0, .name = "FIX_THERMAL_ENV_S" }, - { .fc_id = 1256, .cpu_id = 562, .valid = 1, - .msg = 1, .reset = 0, .name = "FIX_THERMAL_ENV_E" }, - { .fc_id = 1257, .cpu_id = 563, .valid = 1, - .msg = 1, .reset = 0, .name = "CPLD_SHUTDOWN_EVENT" }, - { .fc_id = 1258, .cpu_id = 564, .valid = 1, - .msg = 1, .reset = 0, .name = "PKT_QUEUE_OUT_SYNC" }, - { .fc_id = 1259, .cpu_id = 565, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA2_CORE" }, - { .fc_id = 1260, .cpu_id = 566, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA3_CORE" }, - { .fc_id = 1261, .cpu_id = 567, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA0_CORE" }, - { .fc_id = 1262, .cpu_id = 568, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA1_CORE" }, - { .fc_id = 1263, .cpu_id = 569, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA6_CORE" }, - { .fc_id = 1264, .cpu_id = 570, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA7_CORE" }, - { .fc_id = 1265, .cpu_id = 571, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA4_CORE" }, - { .fc_id = 1266, .cpu_id = 572, .valid = 1, - .msg = 1, .reset = 0, .name = "HDMA5_CORE" }, - { .fc_id = 1267, .cpu_id = 573, .valid = 1, - .msg = 1, .reset = 0, .name = "PDMA0_CORE" }, - { .fc_id = 1268, .cpu_id = 574, .valid = 1, - .msg = 1, .reset = 0, .name = "PDMA1_CORE" }, - { .fc_id = 1269, .cpu_id = 575, .valid = 1, - .msg = 1, .reset = 0, .name = "KDMA0_CORE" }, - { .fc_id = 1270, .cpu_id = 576, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC0_QM0" }, - { .fc_id = 1271, .cpu_id = 577, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC0_QM1" }, - { .fc_id = 1272, .cpu_id = 578, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC1_QM0" }, - { .fc_id = 1273, .cpu_id = 579, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC1_QM1" }, - { .fc_id = 1274, .cpu_id = 580, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC2_QM0" }, - { .fc_id = 1275, .cpu_id = 581, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC2_QM1" }, - { .fc_id = 1276, .cpu_id = 582, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC3_QM0" }, - { .fc_id = 1277, .cpu_id = 583, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC3_QM1" }, - { .fc_id = 1278, .cpu_id = 584, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC4_QM0" }, - { .fc_id = 1279, .cpu_id = 585, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC4_QM1" }, - { .fc_id = 1280, .cpu_id = 586, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC5_QM0" }, - { .fc_id = 1281, .cpu_id = 587, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC5_QM1" }, - { .fc_id = 1282, .cpu_id = 588, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC6_QM0" }, - { .fc_id = 1283, .cpu_id = 589, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC6_QM1" }, - { .fc_id = 1284, .cpu_id = 590, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC7_QM0" }, - { .fc_id = 1285, .cpu_id = 591, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC7_QM1" }, - { .fc_id = 1286, .cpu_id = 592, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC8_QM0" }, - { .fc_id = 1287, .cpu_id = 593, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC8_QM1" }, - { .fc_id = 1288, .cpu_id = 594, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC9_QM0" }, - { .fc_id = 1289, .cpu_id = 595, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC9_QM1" }, - { .fc_id = 1290, .cpu_id = 596, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC10_QM0" }, - { .fc_id = 1291, .cpu_id = 597, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC10_QM1" }, - { .fc_id = 1292, .cpu_id = 598, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC11_QM0" }, - { .fc_id = 1293, .cpu_id = 599, .valid = 1, - .msg = 1, .reset = 0, .name = "NIC11_QM1" }, - { .fc_id = 1294, .cpu_id = 600, .valid = 1, - .msg = 1, .reset = 0, .name = "CPU_PKT_SANITY_FAILED" }, - { .fc_id = 1295, .cpu_id = 601, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC0_ENG0" }, - { .fc_id = 1296, .cpu_id = 602, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC0_ENG1" }, - { .fc_id = 1297, .cpu_id = 603, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC1_ENG0" }, - { .fc_id = 1298, .cpu_id = 604, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC1_ENG1" }, - { .fc_id = 1299, .cpu_id = 605, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC2_ENG0" }, - { .fc_id = 1300, .cpu_id = 606, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC2_ENG1" }, - { .fc_id = 1301, .cpu_id = 607, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC3_ENG0" }, - { .fc_id = 1302, .cpu_id = 608, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC3_ENG1" }, - { .fc_id = 1303, .cpu_id = 609, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC4_ENG0" }, - { .fc_id = 1304, .cpu_id = 610, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC4_ENG1" }, - { .fc_id = 1305, .cpu_id = 611, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC5_ENG0" }, - { .fc_id = 1306, .cpu_id = 612, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC5_ENG1" }, - { .fc_id = 1307, .cpu_id = 613, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC6_ENG0" }, - { .fc_id = 1308, .cpu_id = 614, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC6_ENG1" }, - { .fc_id = 1309, .cpu_id = 615, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC7_ENG0" }, - { .fc_id = 1310, .cpu_id = 616, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC7_ENG1" }, - { .fc_id = 1311, .cpu_id = 617, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC8_ENG0" }, - { .fc_id = 1312, .cpu_id = 618, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC8_ENG1" }, - { .fc_id = 1313, .cpu_id = 619, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC9_ENG0" }, - { .fc_id = 1314, .cpu_id = 620, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC9_ENG1" }, - { .fc_id = 1315, .cpu_id = 621, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC10_ENG0" }, - { .fc_id = 1316, .cpu_id = 622, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC10_ENG1" }, - { .fc_id = 1317, .cpu_id = 623, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC11_ENG0" }, - { .fc_id = 1318, .cpu_id = 624, .valid = 1, - .msg = 1, .reset = 0, .name = "STATUS_NIC11_ENG1" }, - { .fc_id = 1319, .cpu_id = 625, .valid = 1, - .msg = 1, .reset = 0, .name = "ARC_DCCM_FULL" }, - { .fc_id = 1320, .cpu_id = 626, .valid = 1, - .msg = 1, .reset = 1, .name = "FP32_NOT_SUPPORTED" }, - { .fc_id = 1321, .cpu_id = 627, .valid = 1, - .msg = 1, .reset = 1, .name = "DEV_RESET_REQ" }, + { .fc_id = 0, .cpu_id = 0, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1, .cpu_id = 1, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 2, .cpu_id = 2, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 3, .cpu_id = 3, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 4, .cpu_id = 4, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 5, .cpu_id = 5, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 6, .cpu_id = 6, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 7, .cpu_id = 7, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 8, .cpu_id = 8, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 9, .cpu_id = 9, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 10, .cpu_id = 10, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 11, .cpu_id = 11, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 12, .cpu_id = 12, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 13, .cpu_id = 13, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 14, .cpu_id = 14, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 15, .cpu_id = 15, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 16, .cpu_id = 16, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 17, .cpu_id = 17, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 18, .cpu_id = 18, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 19, .cpu_id = 19, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 20, .cpu_id = 20, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 21, .cpu_id = 21, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 22, .cpu_id = 22, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 23, .cpu_id = 23, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 24, .cpu_id = 24, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 25, .cpu_id = 25, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 26, .cpu_id = 26, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 27, .cpu_id = 27, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 28, .cpu_id = 28, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 29, .cpu_id = 29, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 30, .cpu_id = 30, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 31, .cpu_id = 31, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 32, .cpu_id = 32, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_CORE_SERR" }, + { .fc_id = 33, .cpu_id = 33, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_CORE_DERR" }, + { .fc_id = 34, .cpu_id = 34, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_IF_SERR" }, + { .fc_id = 35, .cpu_id = 35, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_IF_DERR" }, + { .fc_id = 36, .cpu_id = 36, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_PHY_SERR" }, + { .fc_id = 37, .cpu_id = 37, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_PHY_DERR" }, + { .fc_id = 38, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC0_ECC_SERR" }, + { .fc_id = 39, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC1_ECC_SERR" }, + { .fc_id = 40, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC2_ECC_SERR" }, + { .fc_id = 41, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC3_ECC_SERR" }, + { .fc_id = 42, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC4_ECC_SERR" }, + { .fc_id = 43, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC5_ECC_SERR" }, + { .fc_id = 44, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC6_ECC_SERR" }, + { .fc_id = 45, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC7_ECC_SERR" }, + { .fc_id = 46, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC8_ECC_SERR" }, + { .fc_id = 47, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC9_ECC_SERR" }, + { .fc_id = 48, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC10_ECC_SERR" }, + { .fc_id = 49, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC11_ECC_SERR" }, + { .fc_id = 50, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC12_ECC_SERR" }, + { .fc_id = 51, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC13_ECC_SERR" }, + { .fc_id = 52, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC14_ECC_SERR" }, + { .fc_id = 53, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC15_ECC_SERR" }, + { .fc_id = 54, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC16_ECC_SERR" }, + { .fc_id = 55, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC17_ECC_SERR" }, + { .fc_id = 56, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC18_ECC_SERR" }, + { .fc_id = 57, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC19_ECC_SERR" }, + { .fc_id = 58, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC20_ECC_SERR" }, + { .fc_id = 59, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC21_ECC_SERR" }, + { .fc_id = 60, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC22_ECC_SERR" }, + { .fc_id = 61, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC23_ECC_SERR" }, + { .fc_id = 62, .cpu_id = 38, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC24_ECC_SERR" }, + { .fc_id = 63, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC0_ECC_DERR" }, + { .fc_id = 64, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC1_ECC_DERR" }, + { .fc_id = 65, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC2_ECC_DERR" }, + { .fc_id = 66, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC3_ECC_DERR" }, + { .fc_id = 67, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC4_ECC_DERR" }, + { .fc_id = 68, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC5_ECC_DERR" }, + { .fc_id = 69, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC6_ECC_DERR" }, + { .fc_id = 70, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC7_ECC_DERR" }, + { .fc_id = 71, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC8_ECC_DERR" }, + { .fc_id = 72, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC9_ECC_DERR" }, + { .fc_id = 73, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC10_ECC_DERR" }, + { .fc_id = 74, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC11_ECC_DERR" }, + { .fc_id = 75, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC12_ECC_DERR" }, + { .fc_id = 76, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC13_ECC_DERR" }, + { .fc_id = 77, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC14_ECC_DERR" }, + { .fc_id = 78, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC15_ECC_DERR" }, + { .fc_id = 79, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC16_ECC_DERR" }, + { .fc_id = 80, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC17_ECC_DERR" }, + { .fc_id = 81, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC18_ECC_DERR" }, + { .fc_id = 82, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC19_ECC_DERR" }, + { .fc_id = 83, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC20_ECC_DERR" }, + { .fc_id = 84, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC21_ECC_DERR" }, + { .fc_id = 85, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC22_ECC_DERR" }, + { .fc_id = 86, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC23_ECC_DERR" }, + { .fc_id = 87, .cpu_id = 39, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "TPC24_ECC_DERR" }, + { .fc_id = 88, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE0_ECC_SERR" }, + { .fc_id = 89, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE1_ECC_SERR" }, + { .fc_id = 90, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE2_ECC_SERR" }, + { .fc_id = 91, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE3_ECC_SERR" }, + { .fc_id = 92, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE4_ECC_SERR" }, + { .fc_id = 93, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_CTRL_ECC_SERR" }, + { .fc_id = 94, .cpu_id = 40, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_WAP_ECC_SERR" }, + { .fc_id = 95, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE0_ECC_SERR" }, + { .fc_id = 96, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE1_ECC_SERR" }, + { .fc_id = 97, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE2_ECC_SERR" }, + { .fc_id = 98, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE3_ECC_SERR" }, + { .fc_id = 99, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE4_ECC_SERR" }, + { .fc_id = 100, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_CTRL_ECC_SERR" }, + { .fc_id = 101, .cpu_id = 41, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_WAP_ECC_SERR" }, + { .fc_id = 102, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE0_ECC_SERR" }, + { .fc_id = 103, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE1_ECC_SERR" }, + { .fc_id = 104, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE2_ECC_SERR" }, + { .fc_id = 105, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE3_ECC_SERR" }, + { .fc_id = 106, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE4_ECC_SERR" }, + { .fc_id = 107, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_CTRL_ECC_SERR" }, + { .fc_id = 108, .cpu_id = 42, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_WAP_ECC_SERR" }, + { .fc_id = 109, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE0_ECC_SERR" }, + { .fc_id = 110, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE1_ECC_SERR" }, + { .fc_id = 111, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE2_ECC_SERR" }, + { .fc_id = 112, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE3_ECC_SERR" }, + { .fc_id = 113, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE4_ECC_SERR" }, + { .fc_id = 114, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_CTRL_ECC_SERR" }, + { .fc_id = 115, .cpu_id = 43, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_WAP_ECC_SERR" }, + { .fc_id = 116, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_SBTE0_ECC_DERR" }, + { .fc_id = 117, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_SBTE1_ECC_DERR" }, + { .fc_id = 118, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_SBTE2_ECC_DERR" }, + { .fc_id = 119, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_SBTE3_ECC_DERR" }, + { .fc_id = 120, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_SBTE4_ECC_DERR" }, + { .fc_id = 121, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_CTRL_ECC_DERR" }, + { .fc_id = 122, .cpu_id = 44, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME0_WAP_ECC_DERR" }, + { .fc_id = 123, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_SBTE0_ECC_DERR" }, + { .fc_id = 124, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_SBTE1_ECC_DERR" }, + { .fc_id = 125, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_SBTE2_ECC_DERR" }, + { .fc_id = 126, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_SBTE3_ECC_DERR" }, + { .fc_id = 127, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_SBTE4_ECC_DERR" }, + { .fc_id = 128, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_CTRL_ECC_DERR" }, + { .fc_id = 129, .cpu_id = 45, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME1_WAP_ECC_DERR" }, + { .fc_id = 130, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_SBTE0_ECC_DERR" }, + { .fc_id = 131, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_SBTE1_ECC_DERR" }, + { .fc_id = 132, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_SBTE2_ECC_DERR" }, + { .fc_id = 133, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_SBTE3_ECC_DERR" }, + { .fc_id = 134, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_SBTE4_ECC_DERR" }, + { .fc_id = 135, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_CTRL_ECC_DERR" }, + { .fc_id = 136, .cpu_id = 46, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME2_WAP_ECC_DERR" }, + { .fc_id = 137, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_SBTE0_ECC_DERR" }, + { .fc_id = 138, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_SBTE1_ECC_DERR" }, + { .fc_id = 139, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_SBTE2_ECC_DERR" }, + { .fc_id = 140, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_SBTE3_ECC_DERR" }, + { .fc_id = 141, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_SBTE4_ECC_DERR" }, + { .fc_id = 142, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_CTRL_ECC_DERR" }, + { .fc_id = 143, .cpu_id = 47, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "MME3_WAP_ECC_DERR" }, + { .fc_id = 144, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA2_ECC_SERR" }, + { .fc_id = 145, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA3_ECC_SERR" }, + { .fc_id = 146, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA0_ECC_SERR" }, + { .fc_id = 147, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA1_ECC_SERR" }, + { .fc_id = 148, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA6_ECC_SERR" }, + { .fc_id = 149, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA7_ECC_SERR" }, + { .fc_id = 150, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA4_ECC_SERR" }, + { .fc_id = 151, .cpu_id = 48, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA5_ECC_SERR" }, + { .fc_id = 152, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA2_ECC_DERR" }, + { .fc_id = 153, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA3_ECC_DERR" }, + { .fc_id = 154, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA0_ECC_DERR" }, + { .fc_id = 155, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA1_ECC_DERR" }, + { .fc_id = 156, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA6_ECC_DERR" }, + { .fc_id = 157, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA7_ECC_DERR" }, + { .fc_id = 158, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA4_ECC_DERR" }, + { .fc_id = 159, .cpu_id = 49, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "EDMA5_ECC_DERR" }, + { .fc_id = 160, .cpu_id = 50, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "KDMA0_ECC_SERR" }, + { .fc_id = 161, .cpu_id = 51, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PDMA0_ECC_SERR" }, + { .fc_id = 162, .cpu_id = 51, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PDMA1_ECC_SERR" }, + { .fc_id = 163, .cpu_id = 52, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "KDMA0_ECC_DERR" }, + { .fc_id = 164, .cpu_id = 53, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PDMA0_ECC_DERR" }, + { .fc_id = 165, .cpu_id = 53, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PDMA1_ECC_DERR" }, + { .fc_id = 166, .cpu_id = 54, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "CPU_IF_ECC_SERR" }, + { .fc_id = 167, .cpu_id = 55, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "CPU_IF_ECC_DERR" }, + { .fc_id = 168, .cpu_id = 56, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PSOC_MEM_SERR" }, + { .fc_id = 169, .cpu_id = 57, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PSOC_MEM_DERR" }, + { .fc_id = 170, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM0_ECC_SERR" }, + { .fc_id = 171, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM1_ECC_SERR" }, + { .fc_id = 172, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM2_ECC_SERR" }, + { .fc_id = 173, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM3_ECC_SERR" }, + { .fc_id = 174, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM4_ECC_SERR" }, + { .fc_id = 175, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM5_ECC_SERR" }, + { .fc_id = 176, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM6_ECC_SERR" }, + { .fc_id = 177, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM7_ECC_SERR" }, + { .fc_id = 178, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM8_ECC_SERR" }, + { .fc_id = 179, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM9_ECC_SERR" }, + { .fc_id = 180, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM10_ECC_SERR" }, + { .fc_id = 181, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM11_ECC_SERR" }, + { .fc_id = 182, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM12_ECC_SERR" }, + { .fc_id = 183, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM13_ECC_SERR" }, + { .fc_id = 184, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM14_ECC_SERR" }, + { .fc_id = 185, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM15_ECC_SERR" }, + { .fc_id = 186, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM16_ECC_SERR" }, + { .fc_id = 187, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM17_ECC_SERR" }, + { .fc_id = 188, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM18_ECC_SERR" }, + { .fc_id = 189, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM19_ECC_SERR" }, + { .fc_id = 190, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM20_ECC_SERR" }, + { .fc_id = 191, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM21_ECC_SERR" }, + { .fc_id = 192, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM22_ECC_SERR" }, + { .fc_id = 193, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM23_ECC_SERR" }, + { .fc_id = 194, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM24_ECC_SERR" }, + { .fc_id = 195, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM25_ECC_SERR" }, + { .fc_id = 196, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM26_ECC_SERR" }, + { .fc_id = 197, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM27_ECC_SERR" }, + { .fc_id = 198, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM28_ECC_SERR" }, + { .fc_id = 199, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM29_ECC_SERR" }, + { .fc_id = 200, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM30_ECC_SERR" }, + { .fc_id = 201, .cpu_id = 58, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SRAM31_ECC_SERR" }, + { .fc_id = 202, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM0_ECC_DERR" }, + { .fc_id = 203, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM1_ECC_DERR" }, + { .fc_id = 204, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM2_ECC_DERR" }, + { .fc_id = 205, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM3_ECC_DERR" }, + { .fc_id = 206, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM4_ECC_DERR" }, + { .fc_id = 207, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM5_ECC_DERR" }, + { .fc_id = 208, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM6_ECC_DERR" }, + { .fc_id = 209, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM7_ECC_DERR" }, + { .fc_id = 210, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM8_ECC_DERR" }, + { .fc_id = 211, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM9_ECC_DERR" }, + { .fc_id = 212, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM10_ECC_DERR" }, + { .fc_id = 213, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM11_ECC_DERR" }, + { .fc_id = 214, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM12_ECC_DERR" }, + { .fc_id = 215, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM13_ECC_DERR" }, + { .fc_id = 216, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM14_ECC_DERR" }, + { .fc_id = 217, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM15_ECC_DERR" }, + { .fc_id = 218, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM16_ECC_DERR" }, + { .fc_id = 219, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM17_ECC_DERR" }, + { .fc_id = 220, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM18_ECC_DERR" }, + { .fc_id = 221, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM19_ECC_DERR" }, + { .fc_id = 222, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM20_ECC_DERR" }, + { .fc_id = 223, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM21_ECC_DERR" }, + { .fc_id = 224, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM22_ECC_DERR" }, + { .fc_id = 225, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM23_ECC_DERR" }, + { .fc_id = 226, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM24_ECC_DERR" }, + { .fc_id = 227, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM25_ECC_DERR" }, + { .fc_id = 228, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM26_ECC_DERR" }, + { .fc_id = 229, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM27_ECC_DERR" }, + { .fc_id = 230, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM28_ECC_DERR" }, + { .fc_id = 231, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM29_ECC_DERR" }, + { .fc_id = 232, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM30_ECC_DERR" }, + { .fc_id = 233, .cpu_id = 59, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SRAM31_ECC_DERR" }, + { .fc_id = 234, .cpu_id = 60, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "GIC500" }, + { .fc_id = 235, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_0_MC0_ECC_SERR" }, + { .fc_id = 236, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_1_MC0_ECC_SERR" }, + { .fc_id = 237, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_2_MC0_ECC_SERR" }, + { .fc_id = 238, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_3_MC0_ECC_SERR" }, + { .fc_id = 239, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_4_MC0_ECC_SERR" }, + { .fc_id = 240, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_5_MC0_ECC_SERR" }, + { .fc_id = 241, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_0_MC1_ECC_SERR" }, + { .fc_id = 242, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_1_MC1_ECC_SERR" }, + { .fc_id = 243, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_2_MC1_ECC_SERR" }, + { .fc_id = 244, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_3_MC1_ECC_SERR" }, + { .fc_id = 245, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_4_MC1_ECC_SERR" }, + { .fc_id = 246, .cpu_id = 61, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM_5_MC1_ECC_SERR" }, + { .fc_id = 247, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_0_MC0_ECC_DERR" }, + { .fc_id = 248, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_1_MC0_ECC_DERR" }, + { .fc_id = 249, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_2_MC0_ECC_DERR" }, + { .fc_id = 250, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_3_MC0_ECC_DERR" }, + { .fc_id = 251, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_4_MC0_ECC_DERR" }, + { .fc_id = 252, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_5_MC0_ECC_DERR" }, + { .fc_id = 253, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_0_MC1_ECC_DERR" }, + { .fc_id = 254, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_1_MC1_ECC_DERR" }, + { .fc_id = 255, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_2_MC1_ECC_DERR" }, + { .fc_id = 256, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_3_MC1_ECC_DERR" }, + { .fc_id = 257, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_4_MC1_ECC_DERR" }, + { .fc_id = 258, .cpu_id = 62, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_5_MC1_ECC_DERR" }, + { .fc_id = 259, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_0_ECC_SERR" }, + { .fc_id = 260, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_1_ECC_SERR" }, + { .fc_id = 261, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_2_ECC_SERR" }, + { .fc_id = 262, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_3_ECC_SERR" }, + { .fc_id = 263, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_8_ECC_SERR" }, + { .fc_id = 264, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_9_ECC_SERR" }, + { .fc_id = 265, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_10_ECC_SERR" }, + { .fc_id = 266, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_11_ECC_SERR" }, + { .fc_id = 267, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_7_ECC_SERR" }, + { .fc_id = 268, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_6_ECC_SERR" }, + { .fc_id = 269, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_5_ECC_SERR" }, + { .fc_id = 270, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_4_ECC_SERR" }, + { .fc_id = 271, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_15_ECC_SERR" }, + { .fc_id = 272, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_14_ECC_SERR" }, + { .fc_id = 273, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_13_ECC_SERR" }, + { .fc_id = 274, .cpu_id = 63, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU_12_ECC_SERR" }, + { .fc_id = 275, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_0_ECC_DERR" }, + { .fc_id = 276, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_1_ECC_DERR" }, + { .fc_id = 277, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_2_ECC_DERR" }, + { .fc_id = 278, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_3_ECC_DERR" }, + { .fc_id = 279, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_8_ECC_DERR" }, + { .fc_id = 280, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_9_ECC_DERR" }, + { .fc_id = 281, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_10_ECC_DERR" }, + { .fc_id = 282, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_11_ECC_DERR" }, + { .fc_id = 283, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_7_ECC_DERR" }, + { .fc_id = 284, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_6_ECC_DERR" }, + { .fc_id = 285, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_5_ECC_DERR" }, + { .fc_id = 286, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_4_ECC_DERR" }, + { .fc_id = 287, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_15_ECC_DERR" }, + { .fc_id = 288, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_14_ECC_DERR" }, + { .fc_id = 289, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_13_ECC_DERR" }, + { .fc_id = 290, .cpu_id = 64, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_12_ECC_DERR" }, + { .fc_id = 291, .cpu_id = 65, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PMMU_ECC_SERR" }, + { .fc_id = 292, .cpu_id = 66, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU_ECC_DERR" }, + { .fc_id = 293, .cpu_id = 67, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 294, .cpu_id = 68, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 295, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC0_VCD_ECC_SERR" }, + { .fc_id = 296, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC1_VCD_ECC_SERR" }, + { .fc_id = 297, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC2_VCD_ECC_SERR" }, + { .fc_id = 298, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC3_VCD_ECC_SERR" }, + { .fc_id = 299, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC4_VCD_ECC_SERR" }, + { .fc_id = 300, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC5_VCD_ECC_SERR" }, + { .fc_id = 301, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC6_VCD_ECC_SERR" }, + { .fc_id = 302, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC7_VCD_ECC_SERR" }, + { .fc_id = 303, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC8_VCD_ECC_SERR" }, + { .fc_id = 304, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC9_VCD_ECC_SERR" }, + { .fc_id = 305, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC0_L2C_ECC_SERR" }, + { .fc_id = 306, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC1_L2C_ECC_SERR" }, + { .fc_id = 307, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC2_L2C_ECC_SERR" }, + { .fc_id = 308, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC3_L2C_ECC_SERR" }, + { .fc_id = 309, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC4_L2C_ECC_SERR" }, + { .fc_id = 310, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC5_L2C_ECC_SERR" }, + { .fc_id = 311, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC6_L2C_ECC_SERR" }, + { .fc_id = 312, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC7_L2C_ECC_SERR" }, + { .fc_id = 313, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC8_L2C_ECC_SERR" }, + { .fc_id = 314, .cpu_id = 69, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC9_L2C_ECC_SERR" }, + { .fc_id = 315, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC0_VCD_ECC_DERR" }, + { .fc_id = 316, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC1_VCD_ECC_DERR" }, + { .fc_id = 317, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC2_VCD_ECC_DERR" }, + { .fc_id = 318, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC3_VCD_ECC_DERR" }, + { .fc_id = 319, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC4_VCD_ECC_DERR" }, + { .fc_id = 320, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC5_VCD_ECC_DERR" }, + { .fc_id = 321, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC6_VCD_ECC_DERR" }, + { .fc_id = 322, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC7_VCD_ECC_DERR" }, + { .fc_id = 323, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC8_VCD_ECC_DERR" }, + { .fc_id = 324, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC9_VCD_ECC_DERR" }, + { .fc_id = 325, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC0_L2C_ECC_DERR" }, + { .fc_id = 326, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC1_L2C_ECC_DERR" }, + { .fc_id = 327, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC2_L2C_ECC_DERR" }, + { .fc_id = 328, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC3_L2C_ECC_DERR" }, + { .fc_id = 329, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC4_L2C_ECC_DERR" }, + { .fc_id = 330, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC5_L2C_ECC_DERR" }, + { .fc_id = 331, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC6_L2C_ECC_DERR" }, + { .fc_id = 332, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC7_L2C_ECC_DERR" }, + { .fc_id = 333, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC8_L2C_ECC_DERR" }, + { .fc_id = 334, .cpu_id = 70, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEC9_L2C_ECC_DERR" }, + { .fc_id = 335, .cpu_id = 71, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 336, .cpu_id = 72, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 337, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF0_ECC_SERR" }, + { .fc_id = 338, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF1_ECC_SERR" }, + { .fc_id = 339, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF2_ECC_SERR" }, + { .fc_id = 340, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF3_ECC_SERR" }, + { .fc_id = 341, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF8_ECC_SERR" }, + { .fc_id = 342, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF9_ECC_SERR" }, + { .fc_id = 343, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF10_ECC_SERR" }, + { .fc_id = 344, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF11_ECC_SERR" }, + { .fc_id = 345, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF7_ECC_SERR" }, + { .fc_id = 346, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF6_ECC_SERR" }, + { .fc_id = 347, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF5_ECC_SERR" }, + { .fc_id = 348, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF4_ECC_SERR" }, + { .fc_id = 349, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF15_ECC_SERR" }, + { .fc_id = 350, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF14_ECC_SERR" }, + { .fc_id = 351, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF13_ECC_SERR" }, + { .fc_id = 352, .cpu_id = 73, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HIF12_ECC_SERR" }, + { .fc_id = 353, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF0_ECC_DERR" }, + { .fc_id = 354, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF1_ECC_DERR" }, + { .fc_id = 355, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF2_ECC_DERR" }, + { .fc_id = 356, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF3_ECC_DERR" }, + { .fc_id = 357, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF8_ECC_DERR" }, + { .fc_id = 358, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF9_ECC_DERR" }, + { .fc_id = 359, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF10_ECC_DERR" }, + { .fc_id = 360, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF11_ECC_DERR" }, + { .fc_id = 361, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF7_ECC_DERR" }, + { .fc_id = 362, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF6_ECC_DERR" }, + { .fc_id = 363, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF5_ECC_DERR" }, + { .fc_id = 364, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF4_ECC_DERR" }, + { .fc_id = 365, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF15_ECC_DERR" }, + { .fc_id = 366, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF14_ECC_DERR" }, + { .fc_id = 367, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF13_ECC_DERR" }, + { .fc_id = 368, .cpu_id = 74, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF12_ECC_DERR" }, + { .fc_id = 369, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC0_ECC_SERR" }, + { .fc_id = 370, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC1_ECC_SERR" }, + { .fc_id = 371, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC2_ECC_SERR" }, + { .fc_id = 372, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC3_ECC_SERR" }, + { .fc_id = 373, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC4_ECC_SERR" }, + { .fc_id = 374, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC5_ECC_SERR" }, + { .fc_id = 375, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC6_ECC_SERR" }, + { .fc_id = 376, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC7_ECC_SERR" }, + { .fc_id = 377, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC8_ECC_SERR" }, + { .fc_id = 378, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC9_ECC_SERR" }, + { .fc_id = 379, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC10_ECC_SERR" }, + { .fc_id = 380, .cpu_id = 75, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC11_ECC_SERR" }, + { .fc_id = 381, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC0_ECC_DERR" }, + { .fc_id = 382, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC1_ECC_DERR" }, + { .fc_id = 383, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC2_ECC_DERR" }, + { .fc_id = 384, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC3_ECC_DERR" }, + { .fc_id = 385, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC4_ECC_DERR" }, + { .fc_id = 386, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC5_ECC_DERR" }, + { .fc_id = 387, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC6_ECC_DERR" }, + { .fc_id = 388, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC7_ECC_DERR" }, + { .fc_id = 389, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC8_ECC_DERR" }, + { .fc_id = 390, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC9_ECC_DERR" }, + { .fc_id = 391, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC10_ECC_DERR" }, + { .fc_id = 392, .cpu_id = 76, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC11_ECC_DERR" }, + { .fc_id = 393, .cpu_id = 77, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SM0_ECC_DERR" }, + { .fc_id = 394, .cpu_id = 77, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SM1_ECC_DERR" }, + { .fc_id = 395, .cpu_id = 77, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SM2_ECC_DERR" }, + { .fc_id = 396, .cpu_id = 77, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "SM3_ECC_DERR" }, + { .fc_id = 397, .cpu_id = 78, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM0_ECC_SERR" }, + { .fc_id = 398, .cpu_id = 78, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM1_ECC_SERR" }, + { .fc_id = 399, .cpu_id = 78, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM2_ECC_SERR" }, + { .fc_id = 400, .cpu_id = 78, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM3_ECC_SERR" }, + { .fc_id = 401, .cpu_id = 79, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "XBAR0_ECC_SERR" }, + { .fc_id = 402, .cpu_id = 79, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "XBAR1_ECC_SERR" }, + { .fc_id = 403, .cpu_id = 79, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "XBAR2_ECC_SERR" }, + { .fc_id = 404, .cpu_id = 79, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "XBAR3_ECC_SERR" }, + { .fc_id = 405, .cpu_id = 80, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "XBAR0_ECC_DERR" }, + { .fc_id = 406, .cpu_id = 80, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "XBAR1_ECC_DERR" }, + { .fc_id = 407, .cpu_id = 80, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "XBAR2_ECC_DERR" }, + { .fc_id = 408, .cpu_id = 80, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "XBAR3_ECC_DERR" }, + { .fc_id = 409, .cpu_id = 81, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC0_ECC_SERR" }, + { .fc_id = 410, .cpu_id = 82, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "ARC0_ECC_DERR" }, + { .fc_id = 411, .cpu_id = 83, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 412, .cpu_id = 84, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_ADDR_DEC_ERR" }, + { .fc_id = 413, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC0_AXI_ERR_RSP" }, + { .fc_id = 414, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC1_AXI_ERR_RSP" }, + { .fc_id = 415, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC2_AXI_ERR_RSP" }, + { .fc_id = 416, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC3_AXI_ERR_RSP" }, + { .fc_id = 417, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC4_AXI_ERR_RSP" }, + { .fc_id = 418, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC5_AXI_ERR_RSP" }, + { .fc_id = 419, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC0_AXI_ERR_RSP" }, + { .fc_id = 420, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC1_AXI_ERR_RSP" }, + { .fc_id = 421, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC2_AXI_ERR_RSP" }, + { .fc_id = 422, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC3_AXI_ERR_RSP" }, + { .fc_id = 423, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC4_AXI_ERR_RSP" }, + { .fc_id = 424, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC5_AXI_ERR_RSP" }, + { .fc_id = 425, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC0_AXI_ERR_RSP" }, + { .fc_id = 426, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC1_AXI_ERR_RSP" }, + { .fc_id = 427, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC2_AXI_ERR_RSP" }, + { .fc_id = 428, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC3_AXI_ERR_RSP" }, + { .fc_id = 429, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC4_AXI_ERR_RSP" }, + { .fc_id = 430, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC5_AXI_ERR_RSP" }, + { .fc_id = 431, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC0_AXI_ERR_RSP" }, + { .fc_id = 432, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC1_AXI_ERR_RSP" }, + { .fc_id = 433, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC2_AXI_ERR_RSP" }, + { .fc_id = 434, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC3_AXI_ERR_RSP" }, + { .fc_id = 435, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC4_AXI_ERR_RSP" }, + { .fc_id = 436, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC5_AXI_ERR_RSP" }, + { .fc_id = 437, .cpu_id = 85, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE4_TPC0_AXI_ERR_RSP" }, + { .fc_id = 438, .cpu_id = 86, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "AXI_ECC" }, + { .fc_id = 439, .cpu_id = 87, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "L2_RAM_ECC" }, + { .fc_id = 440, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_SBTE0_AXI_ERR_RSP" }, + { .fc_id = 441, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_SBTE1_AXI_ERR_RSP" }, + { .fc_id = 442, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_SBTE2_AXI_ERR_RSP" }, + { .fc_id = 443, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_SBTE3_AXI_ERR_RSP" }, + { .fc_id = 444, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_SBTE4_AXI_ERR_RSP" }, + { .fc_id = 445, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_CTRL_AXI_ERROR_RESPONSE" }, + { .fc_id = 446, .cpu_id = 88, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_QMAN_SW_ERROR" }, + { .fc_id = 447, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_SBTE0_AXI_ERR_RSP" }, + { .fc_id = 448, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_SBTE1_AXI_ERR_RSP" }, + { .fc_id = 449, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_SBTE2_AXI_ERR_RSP" }, + { .fc_id = 450, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_SBTE3_AXI_ERR_RSP" }, + { .fc_id = 451, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_SBTE4_AXI_ERR_RSP" }, + { .fc_id = 452, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_CTRL_AXI_ERROR_RESPONSE" }, + { .fc_id = 453, .cpu_id = 89, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_QMAN_SW_ERROR" }, + { .fc_id = 454, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_SBTE0_AXI_ERR_RSP" }, + { .fc_id = 455, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_SBTE1_AXI_ERR_RSP" }, + { .fc_id = 456, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_SBTE2_AXI_ERR_RSP" }, + { .fc_id = 457, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_SBTE3_AXI_ERR_RSP" }, + { .fc_id = 458, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_SBTE4_AXI_ERR_RSP" }, + { .fc_id = 459, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_CTRL_AXI_ERROR_RESPONSE" }, + { .fc_id = 460, .cpu_id = 90, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_QMAN_SW_ERROR" }, + { .fc_id = 461, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_SBTE0_AXI_ERR_RSP" }, + { .fc_id = 462, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_SBTE1_AXI_ERR_RSP" }, + { .fc_id = 463, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_SBTE2_AXI_ERR_RSP" }, + { .fc_id = 464, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_SBTE3_AXI_ERR_RSP" }, + { .fc_id = 465, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_SBTE4_AXI_ERR_RSP" }, + { .fc_id = 466, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_CTRL_AXI_ERROR_RESPONSE" }, + { .fc_id = 467, .cpu_id = 91, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_QMAN_SW_ERROR" }, + { .fc_id = 468, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PSOC_MME_PLL_LOCK_ERR" }, + { .fc_id = 469, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PSOC_CPU_PLL_LOCK_ERR" }, + { .fc_id = 470, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_TPC_PLL_LOCK_ERR" }, + { .fc_id = 471, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_NIC_PLL_LOCK_ERR" }, + { .fc_id = 472, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_XBAR_MMU_PLL_LOCK_ERR" }, + { .fc_id = 473, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_XBAR_DMA_PLL_LOCK_ERR" }, + { .fc_id = 474, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_XBAR_IF_PLL_LOCK_ERR" }, + { .fc_id = 475, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_XBAR_BANK_PLL_LOCK_ERR" }, + { .fc_id = 476, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_XBAR_MMU_PLL_LOCK_ERR" }, + { .fc_id = 477, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_XBAR_DMA_PLL_LOCK_ERR" }, + { .fc_id = 478, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_XBAR_IF_PLL_LOCK_ERR" }, + { .fc_id = 479, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_XBAR_MESH_PLL_LOCK_ERR" }, + { .fc_id = 480, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_TPC_PLL_LOCK_ERR" }, + { .fc_id = 481, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_NIC_PLL_LOCK_ERR" }, + { .fc_id = 482, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU_MME_PLL_LOCK_ERR" }, + { .fc_id = 483, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_TPC_PLL_LOCK_ERR" }, + { .fc_id = 484, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_PCI_PLL_LOCK_ERR" }, + { .fc_id = 485, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_XBAR_MMU_PLL_LOCK_ERR" }, + { .fc_id = 486, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_XBAR_DMA_PLL_LOCK_ERR" }, + { .fc_id = 487, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_XBAR_IF_PLL_LOCK_ERR" }, + { .fc_id = 488, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_XBAR_MESH_PLL_LOCK_ERR" }, + { .fc_id = 489, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_XBAR_MMU_PLL_LOCK_ERR" }, + { .fc_id = 490, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_XBAR_DMA_PLL_LOCK_ERR" }, + { .fc_id = 491, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_XBAR_IF_PLL_LOCK_ERR" }, + { .fc_id = 492, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_XBAR_BANK_PLL_LOCK_ERR" }, + { .fc_id = 493, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_TPC_PLL_LOCK_ERR" }, + { .fc_id = 494, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PSOC_VID_PLL_LOCK_ERR" }, + { .fc_id = 495, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU_VID_PLL_LOCK_ERR" }, + { .fc_id = 496, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE3_HBM_PLL_LOCK_ERR" }, + { .fc_id = 497, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_XBAR_HBM_PLL_LOCK_ERR" }, + { .fc_id = 498, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE1_HBM_PLL_LOCK_ERR" }, + { .fc_id = 499, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE0_HBM_PLL_LOCK_ERR" }, + { .fc_id = 500, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_XBAR_HBM_PLL_LOCK_ERR" }, + { .fc_id = 501, .cpu_id = 92, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "DCORE2_HBM_PLL_LOCK_ERR" }, + { .fc_id = 502, .cpu_id = 93, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "CPU_AXI_ERR_RSP" }, + { .fc_id = 503, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_0_AXI_ERR_RSP" }, + { .fc_id = 504, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_1_AXI_ERR_RSP" }, + { .fc_id = 505, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_2_AXI_ERR_RSP" }, + { .fc_id = 506, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_3_AXI_ERR_RSP" }, + { .fc_id = 507, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_8_AXI_ERR_RSP" }, + { .fc_id = 508, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_9_AXI_ERR_RSP" }, + { .fc_id = 509, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_10_AXI_ERR_RSP" }, + { .fc_id = 510, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_11_AXI_ERR_RSP" }, + { .fc_id = 511, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_7_AXI_ERR_RSP" }, + { .fc_id = 512, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_6_AXI_ERR_RSP" }, + { .fc_id = 513, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_5_AXI_ERR_RSP" }, + { .fc_id = 514, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_4_AXI_ERR_RSP" }, + { .fc_id = 515, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_15_AXI_ERR_RSP" }, + { .fc_id = 516, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_14_AXI_ERR_RSP" }, + { .fc_id = 517, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_13_AXI_ERR_RSP" }, + { .fc_id = 518, .cpu_id = 94, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU_12_AXI_ERR_RSP" }, + { .fc_id = 519, .cpu_id = 95, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU_FATAL" }, + { .fc_id = 520, .cpu_id = 96, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU_AXI_ERR_RSP" }, + { .fc_id = 521, .cpu_id = 97, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM0_ALARM_A" }, + { .fc_id = 522, .cpu_id = 98, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM0_ALARM_B" }, + { .fc_id = 523, .cpu_id = 99, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM1_ALARM_A" }, + { .fc_id = 524, .cpu_id = 100, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM1_ALARM_B" }, + { .fc_id = 525, .cpu_id = 101, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM2_ALARM_A" }, + { .fc_id = 526, .cpu_id = 102, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM2_ALARM_B" }, + { .fc_id = 527, .cpu_id = 103, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM3_ALARM_A" }, + { .fc_id = 528, .cpu_id = 104, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "VM3_ALARM_B" }, + { .fc_id = 529, .cpu_id = 105, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PSOC_AXI_ERR_RSP" }, + { .fc_id = 530, .cpu_id = 106, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PSOC_PRSTN_FALL" }, + { .fc_id = 531, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 532, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 533, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 534, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 535, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 536, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 537, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 538, .cpu_id = 107, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 539, .cpu_id = 108, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "KDMA_CH0_AXI_ERR_RSP" }, + { .fc_id = 540, .cpu_id = 109, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA_CH0_AXI_ERR_RSP" }, + { .fc_id = 541, .cpu_id = 109, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA_CH1_AXI_ERR_RSP" }, + { .fc_id = 542, .cpu_id = 110, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_0" }, + { .fc_id = 543, .cpu_id = 111, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_1" }, + { .fc_id = 544, .cpu_id = 112, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_2" }, + { .fc_id = 545, .cpu_id = 113, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_3" }, + { .fc_id = 546, .cpu_id = 114, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_4" }, + { .fc_id = 547, .cpu_id = 115, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM_CATTRIP_5" }, + { .fc_id = 548, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM0_MC0_SEI_SEVERE" }, + { .fc_id = 549, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM0_MC0_SEI_NON_SEVERE" }, + { .fc_id = 550, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM0_MC1_SEI_SEVERE" }, + { .fc_id = 551, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM0_MC1_SEI_NON_SEVERE" }, + { .fc_id = 552, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM1_MC0_SEI_SEVERE" }, + { .fc_id = 553, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM1_MC0_SEI_NON_SEVERE" }, + { .fc_id = 554, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM1_MC1_SEI_SEVERE" }, + { .fc_id = 555, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM1_MC1_SEI_NON_SEVERE" }, + { .fc_id = 556, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM2_MC0_SEI_SEVERE" }, + { .fc_id = 557, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM2_MC0_SEI_NON_SEVERE" }, + { .fc_id = 558, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM2_MC1_SEI_SEVERE" }, + { .fc_id = 559, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM2_MC1_SEI_NON_SEVERE" }, + { .fc_id = 560, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM3_MC0_SEI_SEVERE" }, + { .fc_id = 561, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM3_MC0_SEI_NON_SEVERE" }, + { .fc_id = 562, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM3_MC1_SEI_SEVERE" }, + { .fc_id = 563, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM3_MC1_SEI_NON_SEVERE" }, + { .fc_id = 564, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM4_MC0_SEI_SEVERE" }, + { .fc_id = 565, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM4_MC0_SEI_NON_SEVERE" }, + { .fc_id = 566, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM4_MC1_SEI_SEVERE" }, + { .fc_id = 567, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM4_MC1_SEI_NON_SEVERE" }, + { .fc_id = 568, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM5_MC0_SEI_SEVERE" }, + { .fc_id = 569, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM5_MC0_SEI_NON_SEVERE" }, + { .fc_id = 570, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HBM5_MC1_SEI_SEVERE" }, + { .fc_id = 571, .cpu_id = 116, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM5_MC1_SEI_NON_SEVERE" }, + { .fc_id = 572, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC0_AXI_ERR_RSPONSE" }, + { .fc_id = 573, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC1_AXI_ERR_RSPONSE" }, + { .fc_id = 574, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC2_AXI_ERR_RSPONSE" }, + { .fc_id = 575, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC3_AXI_ERR_RSPONSE" }, + { .fc_id = 576, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC4_AXI_ERR_RSPONSE" }, + { .fc_id = 577, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC5_AXI_ERR_RSPONSE" }, + { .fc_id = 578, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC6_AXI_ERR_RSPONSE" }, + { .fc_id = 579, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC7_AXI_ERR_RSPONSE" }, + { .fc_id = 580, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC8_AXI_ERR_RSPONSE" }, + { .fc_id = 581, .cpu_id = 117, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC9_AXI_ERR_RSPONSE" }, + { .fc_id = 582, .cpu_id = 118, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 583, .cpu_id = 119, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 584, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF0_FATAL" }, + { .fc_id = 585, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF1_FATAL" }, + { .fc_id = 586, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF2_FATAL" }, + { .fc_id = 587, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF3_FATAL" }, + { .fc_id = 588, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF8_FATAL" }, + { .fc_id = 589, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF9_FATAL" }, + { .fc_id = 590, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF10_FATAL" }, + { .fc_id = 591, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF11_FATAL" }, + { .fc_id = 592, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF7_FATAL" }, + { .fc_id = 593, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF6_FATAL" }, + { .fc_id = 594, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF5_FATAL" }, + { .fc_id = 595, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF4_FATAL" }, + { .fc_id = 596, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF15_FATAL" }, + { .fc_id = 597, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF14_FATAL" }, + { .fc_id = 598, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF13_FATAL" }, + { .fc_id = 599, .cpu_id = 120, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HIF12_FATAL" }, + { .fc_id = 600, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC0_AXI_ERROR_RESPONSE" }, + { .fc_id = 601, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC1_AXI_ERROR_RESPONSE" }, + { .fc_id = 602, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC2_AXI_ERROR_RESPONSE" }, + { .fc_id = 603, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC3_AXI_ERROR_RESPONSE" }, + { .fc_id = 604, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC4_AXI_ERROR_RESPONSE" }, + { .fc_id = 605, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC5_AXI_ERROR_RESPONSE" }, + { .fc_id = 606, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC6_AXI_ERROR_RESPONSE" }, + { .fc_id = 607, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC7_AXI_ERROR_RESPONSE" }, + { .fc_id = 608, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC8_AXI_ERROR_RESPONSE" }, + { .fc_id = 609, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC9_AXI_ERROR_RESPONSE" }, + { .fc_id = 610, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC10_AXI_ERROR_RESPONSE" }, + { .fc_id = 611, .cpu_id = 121, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC11_AXI_ERROR_RESPONSE" }, + { .fc_id = 612, .cpu_id = 122, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "SM0_AXI_ERROR_RESPONSE" }, + { .fc_id = 613, .cpu_id = 122, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "SM1_AXI_ERROR_RESPONSE" }, + { .fc_id = 614, .cpu_id = 122, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "SM2_AXI_ERROR_RESPONSE" }, + { .fc_id = 615, .cpu_id = 122, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "SM3_AXI_ERROR_RESPONSE" }, + { .fc_id = 616, .cpu_id = 123, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ARC_AXI_ERROR_RESPONSE" }, + { .fc_id = 617, .cpu_id = 124, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 618, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 619, .cpu_id = 125, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_FLR_REQUESTED" }, + { .fc_id = 620, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 621, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 622, .cpu_id = 125, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_APB_TIMEOUT" }, + { .fc_id = 623, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 624, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 625, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 626, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 627, .cpu_id = 125, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_FATAL_ERR" }, + { .fc_id = 628, .cpu_id = 125, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 629, .cpu_id = 126, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 630, .cpu_id = 127, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PCIE_P2P_MSIX" }, + { .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PCIE_DRAIN_COMPLETE" }, + { .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC0_BMON_SPMU" }, + { .fc_id = 634, .cpu_id = 131, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC0_KERNEL_ERR" }, + { .fc_id = 635, .cpu_id = 132, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC1_BMON_SPMU" }, + { .fc_id = 636, .cpu_id = 133, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC1_KERNEL_ERR" }, + { .fc_id = 637, .cpu_id = 134, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC2_BMON_SPMU" }, + { .fc_id = 638, .cpu_id = 135, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC2_KERNEL_ERR" }, + { .fc_id = 639, .cpu_id = 136, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC3_BMON_SPMU" }, + { .fc_id = 640, .cpu_id = 137, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC3_KERNEL_ERR" }, + { .fc_id = 641, .cpu_id = 138, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC4_BMON_SPMU" }, + { .fc_id = 642, .cpu_id = 139, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC4_KERNEL_ERR" }, + { .fc_id = 643, .cpu_id = 140, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC5_BMON_SPMU" }, + { .fc_id = 644, .cpu_id = 141, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC5_KERNEL_ERR" }, + { .fc_id = 645, .cpu_id = 150, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC6_BMON_SPMU" }, + { .fc_id = 646, .cpu_id = 151, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC0_KERNEL_ERR" }, + { .fc_id = 647, .cpu_id = 152, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC7_BMON_SPMU" }, + { .fc_id = 648, .cpu_id = 153, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC1_KERNEL_ERR" }, + { .fc_id = 649, .cpu_id = 146, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC8_BMON_SPMU" }, + { .fc_id = 650, .cpu_id = 147, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC2_KERNEL_ERR" }, + { .fc_id = 651, .cpu_id = 148, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC9_BMON_SPMU" }, + { .fc_id = 652, .cpu_id = 149, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC3_KERNEL_ERR" }, + { .fc_id = 653, .cpu_id = 142, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC10_BMON_SPMU" }, + { .fc_id = 654, .cpu_id = 143, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC4_KERNEL_ERR" }, + { .fc_id = 655, .cpu_id = 144, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC11_BMON_SPMU" }, + { .fc_id = 656, .cpu_id = 145, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC5_KERNEL_ERR" }, + { .fc_id = 657, .cpu_id = 162, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC12_BMON_SPMU" }, + { .fc_id = 658, .cpu_id = 163, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC0_KERNEL_ERR" }, + { .fc_id = 659, .cpu_id = 164, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC13_BMON_SPMU" }, + { .fc_id = 660, .cpu_id = 165, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC1_KERNEL_ERR" }, + { .fc_id = 661, .cpu_id = 158, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC14_BMON_SPMU" }, + { .fc_id = 662, .cpu_id = 159, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC2_KERNEL_ERR" }, + { .fc_id = 663, .cpu_id = 160, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC15_BMON_SPMU" }, + { .fc_id = 664, .cpu_id = 161, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC3_KERNEL_ERR" }, + { .fc_id = 665, .cpu_id = 154, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC16_BMON_SPMU" }, + { .fc_id = 666, .cpu_id = 155, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC4_KERNEL_ERR" }, + { .fc_id = 667, .cpu_id = 156, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC17_BMON_SPMU" }, + { .fc_id = 668, .cpu_id = 157, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC5_KERNEL_ERR" }, + { .fc_id = 669, .cpu_id = 166, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC18_BMON_SPMU" }, + { .fc_id = 670, .cpu_id = 167, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC0_KERNEL_ERR" }, + { .fc_id = 671, .cpu_id = 168, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC19_BMON_SPMU" }, + { .fc_id = 672, .cpu_id = 169, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC1_KERNEL_ERR" }, + { .fc_id = 673, .cpu_id = 170, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC20_BMON_SPMU" }, + { .fc_id = 674, .cpu_id = 171, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC2_KERNEL_ERR" }, + { .fc_id = 675, .cpu_id = 172, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC21_BMON_SPMU" }, + { .fc_id = 676, .cpu_id = 173, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC3_KERNEL_ERR" }, + { .fc_id = 677, .cpu_id = 174, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC22_BMON_SPMU" }, + { .fc_id = 678, .cpu_id = 175, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC4_KERNEL_ERR" }, + { .fc_id = 679, .cpu_id = 176, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC23_BMON_SPMU" }, + { .fc_id = 680, .cpu_id = 177, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC5_KERNEL_ERR" }, + { .fc_id = 681, .cpu_id = 178, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "TPC24_BMON_SPMU" }, + { .fc_id = 682, .cpu_id = 179, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE4_TPC0_KERNEL_ERR" }, + { .fc_id = 683, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 684, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 685, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 686, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 687, .cpu_id = 180, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 688, .cpu_id = 180, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_CTRL_BMON_SPMU" }, + { .fc_id = 689, .cpu_id = 180, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_SBTE_BMON_SPMU" }, + { .fc_id = 690, .cpu_id = 180, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME0_WAP_BMON_SPMU" }, + { .fc_id = 691, .cpu_id = 180, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_WAP_SOURCE_RESULT_INVALID" }, + { .fc_id = 692, .cpu_id = 181, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 693, .cpu_id = 181, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 694, .cpu_id = 181, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 695, .cpu_id = 181, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 696, .cpu_id = 181, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 697, .cpu_id = 181, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_CTRL_BMON_SPMU" }, + { .fc_id = 698, .cpu_id = 181, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_SBTE_BMON_SPMU" }, + { .fc_id = 699, .cpu_id = 181, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME1_WAP_BMON_SPMU" }, + { .fc_id = 700, .cpu_id = 181, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_WAP_SOURCE_RESULT_INVALID" }, + { .fc_id = 701, .cpu_id = 182, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 702, .cpu_id = 182, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 703, .cpu_id = 182, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 704, .cpu_id = 182, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 705, .cpu_id = 182, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 706, .cpu_id = 182, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_CTRL_BMON_SPMU" }, + { .fc_id = 707, .cpu_id = 182, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_SBTE_BMON_SPMU" }, + { .fc_id = 708, .cpu_id = 182, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME2_WAP_BMON_SPMU" }, + { .fc_id = 709, .cpu_id = 182, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_WAP_SOURCE_RESULT_INVALID" }, + { .fc_id = 710, .cpu_id = 183, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 711, .cpu_id = 183, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 712, .cpu_id = 183, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 713, .cpu_id = 183, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 714, .cpu_id = 183, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 715, .cpu_id = 183, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_CTRL_BMON_SPMU" }, + { .fc_id = 716, .cpu_id = 183, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_SBTE_BMON_SPMU" }, + { .fc_id = 717, .cpu_id = 183, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "MME3_WAP_BMON_SPMU" }, + { .fc_id = 718, .cpu_id = 183, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_WAP_SOURCE_RESULT_INVALID" }, + { .fc_id = 719, .cpu_id = 184, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 720, .cpu_id = 184, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU0_PAGE_FAULT_OR_WR_PERM" }, + { .fc_id = 721, .cpu_id = 184, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU0_SECURITY_ERROR" }, + { .fc_id = 722, .cpu_id = 185, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 723, .cpu_id = 185, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU1_PAGE_FAULT_WR_PERM" }, + { .fc_id = 724, .cpu_id = 185, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU1_SECURITY_ERROR" }, + { .fc_id = 725, .cpu_id = 186, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 726, .cpu_id = 186, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU2_PAGE_FAULT_WR_PERM" }, + { .fc_id = 727, .cpu_id = 186, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU2_SECURITY_ERROR" }, + { .fc_id = 728, .cpu_id = 187, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 729, .cpu_id = 187, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU3_PAGE_FAULT_WR_PERM" }, + { .fc_id = 730, .cpu_id = 187, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU3_SECURITY_ERROR" }, + { .fc_id = 731, .cpu_id = 188, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 732, .cpu_id = 188, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU8_PAGE_FAULT_WR_PERM" }, + { .fc_id = 733, .cpu_id = 188, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU8_SECURITY_ERROR" }, + { .fc_id = 734, .cpu_id = 189, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 735, .cpu_id = 189, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU9_PAGE_FAULT_WR_PERM" }, + { .fc_id = 736, .cpu_id = 189, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU9_SECURITY_ERROR" }, + { .fc_id = 737, .cpu_id = 190, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 738, .cpu_id = 190, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU10_PAGE_FAULT_WR_PERM" }, + { .fc_id = 739, .cpu_id = 190, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU10_SECURITY_ERROR" }, + { .fc_id = 740, .cpu_id = 191, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 741, .cpu_id = 191, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU11_PAGE_FAULT_WR_PERM" }, + { .fc_id = 742, .cpu_id = 191, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU11_SECURITY_ERROR" }, + { .fc_id = 743, .cpu_id = 192, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 744, .cpu_id = 192, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU7_PAGE_FAULT_WR_PERM" }, + { .fc_id = 745, .cpu_id = 192, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU7_SECURITY_ERROR" }, + { .fc_id = 746, .cpu_id = 193, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 747, .cpu_id = 193, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU6_PAGE_FAULT_WR_PERM" }, + { .fc_id = 748, .cpu_id = 193, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU6_SECURITY_ERROR" }, + { .fc_id = 749, .cpu_id = 194, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 750, .cpu_id = 194, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU5_PAGE_FAULT_WR_PERM" }, + { .fc_id = 751, .cpu_id = 194, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU5_SECURITY_ERROR" }, + { .fc_id = 752, .cpu_id = 195, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 753, .cpu_id = 195, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU4_PAGE_FAULT_WR_PERM" }, + { .fc_id = 754, .cpu_id = 195, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU4_SECURITY_ERROR" }, + { .fc_id = 755, .cpu_id = 196, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 756, .cpu_id = 196, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU15_PAGE_FAULT_WR_PERM" }, + { .fc_id = 757, .cpu_id = 196, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU15_SECURITY_ERROR" }, + { .fc_id = 758, .cpu_id = 197, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 759, .cpu_id = 197, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU14_PAGE_FAULT_WR_PERM" }, + { .fc_id = 760, .cpu_id = 197, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU14_SECURITY_ERROR" }, + { .fc_id = 761, .cpu_id = 198, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 762, .cpu_id = 198, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU13_PAGE_FAULT_WR_PERM" }, + { .fc_id = 763, .cpu_id = 198, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU13_SECURITY_ERROR" }, + { .fc_id = 764, .cpu_id = 199, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 765, .cpu_id = 199, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HMMU12_PAGE_FAULT_WR_PERM" }, + { .fc_id = 766, .cpu_id = 199, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "HMMU12_SECURITY_ERROR" }, + { .fc_id = 767, .cpu_id = 200, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 768, .cpu_id = 201, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PMMU0_PAGE_FAULT_WR_PERM" }, + { .fc_id = 769, .cpu_id = 202, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "PMMU0_SECURITY_ERROR" }, + { .fc_id = 770, .cpu_id = 203, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA2_BM_SPMU" }, + { .fc_id = 771, .cpu_id = 204, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 772, .cpu_id = 205, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA3_BM_SPMU" }, + { .fc_id = 773, .cpu_id = 206, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 774, .cpu_id = 207, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA0_BM_SPMU" }, + { .fc_id = 775, .cpu_id = 208, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 776, .cpu_id = 209, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA1_BM_SPMU" }, + { .fc_id = 777, .cpu_id = 210, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 778, .cpu_id = 211, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA6_BM_SPMU" }, + { .fc_id = 779, .cpu_id = 212, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 780, .cpu_id = 213, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA7_BM_SPMU" }, + { .fc_id = 781, .cpu_id = 214, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 782, .cpu_id = 215, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA4_BM_SPMU" }, + { .fc_id = 783, .cpu_id = 216, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 784, .cpu_id = 217, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "EDMA5_BM_SPMU" }, + { .fc_id = 785, .cpu_id = 218, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 786, .cpu_id = 219, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "KDMA_BM_SPMU" }, + { .fc_id = 787, .cpu_id = 220, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 788, .cpu_id = 221, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PDMA0_BM_SPMU" }, + { .fc_id = 789, .cpu_id = 222, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "PDMA1_BM_SPMU" }, + { .fc_id = 790, .cpu_id = 223, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM0_MC0_SPI" }, + { .fc_id = 791, .cpu_id = 224, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM0_MC1_SPI" }, + { .fc_id = 792, .cpu_id = 225, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM1_MC0_SPI" }, + { .fc_id = 793, .cpu_id = 226, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM1_MC1_SPI" }, + { .fc_id = 794, .cpu_id = 227, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM2_MC0_SPI" }, + { .fc_id = 795, .cpu_id = 228, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM2_MC1_SPI" }, + { .fc_id = 796, .cpu_id = 229, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM3_MC0_SPI" }, + { .fc_id = 797, .cpu_id = 230, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM3_MC1_SPI" }, + { .fc_id = 798, .cpu_id = 231, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM4_MC0_SPI" }, + { .fc_id = 799, .cpu_id = 232, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM4_MC1_SPI" }, + { .fc_id = 800, .cpu_id = 233, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM5_MC0_SPI" }, + { .fc_id = 801, .cpu_id = 234, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "HBM5_MC1_SPI" }, + { .fc_id = 802, .cpu_id = 235, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 803, .cpu_id = 236, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 804, .cpu_id = 237, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 805, .cpu_id = 238, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 806, .cpu_id = 239, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 807, .cpu_id = 240, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 808, .cpu_id = 241, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 809, .cpu_id = 242, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 810, .cpu_id = 243, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 811, .cpu_id = 244, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 812, .cpu_id = 245, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 813, .cpu_id = 246, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 814, .cpu_id = 247, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 815, .cpu_id = 248, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 816, .cpu_id = 249, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 817, .cpu_id = 250, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 818, .cpu_id = 251, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 819, .cpu_id = 252, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 820, .cpu_id = 253, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 821, .cpu_id = 254, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 822, .cpu_id = 255, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 823, .cpu_id = 256, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 824, .cpu_id = 257, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 825, .cpu_id = 258, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 826, .cpu_id = 259, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 827, .cpu_id = 260, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 828, .cpu_id = 261, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 829, .cpu_id = 262, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 830, .cpu_id = 263, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 831, .cpu_id = 264, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 832, .cpu_id = 265, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 833, .cpu_id = 266, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 834, .cpu_id = 267, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 835, .cpu_id = 268, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 836, .cpu_id = 269, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 837, .cpu_id = 270, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 838, .cpu_id = 271, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 839, .cpu_id = 272, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 840, .cpu_id = 273, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 841, .cpu_id = 274, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 842, .cpu_id = 275, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 843, .cpu_id = 276, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 844, .cpu_id = 277, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 845, .cpu_id = 278, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 846, .cpu_id = 279, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 847, .cpu_id = 280, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 848, .cpu_id = 281, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 849, .cpu_id = 282, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 850, .cpu_id = 283, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 851, .cpu_id = 284, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 852, .cpu_id = 285, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 853, .cpu_id = 286, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 854, .cpu_id = 287, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "" }, + { .fc_id = 855, .cpu_id = 288, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "" }, + { .fc_id = 856, .cpu_id = 289, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 857, .cpu_id = 290, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 858, .cpu_id = 291, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 859, .cpu_id = 292, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 860, .cpu_id = 293, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 861, .cpu_id = 294, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 862, .cpu_id = 295, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 863, .cpu_id = 296, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 864, .cpu_id = 297, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 865, .cpu_id = 298, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 866, .cpu_id = 299, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 867, .cpu_id = 300, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 868, .cpu_id = 301, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 869, .cpu_id = 302, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 870, .cpu_id = 303, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 871, .cpu_id = 304, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "RPM_ERROR_OR_DRAIN" }, + { .fc_id = 872, .cpu_id = 305, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 873, .cpu_id = 306, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 874, .cpu_id = 307, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 875, .cpu_id = 308, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "RAZWI_OR_PID_MIN_MAX_INTERRUPT" }, + { .fc_id = 876, .cpu_id = 309, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 877, .cpu_id = 310, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 878, .cpu_id = 311, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 879, .cpu_id = 312, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "" }, + { .fc_id = 880, .cpu_id = 313, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 881, .cpu_id = 314, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 882, .cpu_id = 315, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 883, .cpu_id = 316, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 884, .cpu_id = 317, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 885, .cpu_id = 318, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 886, .cpu_id = 319, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 887, .cpu_id = 320, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 888, .cpu_id = 321, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 889, .cpu_id = 322, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 890, .cpu_id = 323, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 891, .cpu_id = 324, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 892, .cpu_id = 325, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 893, .cpu_id = 326, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 894, .cpu_id = 327, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 895, .cpu_id = 328, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 896, .cpu_id = 329, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC0_SPI" }, + { .fc_id = 897, .cpu_id = 329, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC0_BMON_SPMU" }, + { .fc_id = 898, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC1_SPI" }, + { .fc_id = 899, .cpu_id = 330, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC1_BMON_SPMU" }, + { .fc_id = 900, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC2_SPI" }, + { .fc_id = 901, .cpu_id = 331, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC2_BMON_SPMU" }, + { .fc_id = 902, .cpu_id = 332, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC3_SPI" }, + { .fc_id = 903, .cpu_id = 332, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC3_BMON_SPMU" }, + { .fc_id = 904, .cpu_id = 333, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC4_SPI" }, + { .fc_id = 905, .cpu_id = 333, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC4_BMON_SPMU" }, + { .fc_id = 906, .cpu_id = 334, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC5_SPI" }, + { .fc_id = 907, .cpu_id = 334, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC5_BMON_SPMU" }, + { .fc_id = 908, .cpu_id = 335, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC6_SPI" }, + { .fc_id = 909, .cpu_id = 335, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC6_BMON_SPMU" }, + { .fc_id = 910, .cpu_id = 336, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC7_SPI" }, + { .fc_id = 911, .cpu_id = 336, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC7_BMON_SPMU" }, + { .fc_id = 912, .cpu_id = 337, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC8_SPI" }, + { .fc_id = 913, .cpu_id = 337, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC8_BMON_SPMU" }, + { .fc_id = 914, .cpu_id = 338, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DEC9_SPI" }, + { .fc_id = 915, .cpu_id = 338, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "DEC9_BMON_SPMU" }, + { .fc_id = 916, .cpu_id = 339, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 917, .cpu_id = 340, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 918, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 919, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 920, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 921, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 922, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 923, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 924, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 925, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 926, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 927, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 928, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 929, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 930, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 931, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 932, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 933, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 934, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 935, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 936, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 937, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 938, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 939, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 940, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 941, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 942, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 943, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 944, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 945, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 946, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 947, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 948, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 949, .cpu_id = 341, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 950, .cpu_id = 342, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 951, .cpu_id = 343, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC0_BMON_SPMU" }, + { .fc_id = 952, .cpu_id = 343, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC0_SW_ERROR" }, + { .fc_id = 953, .cpu_id = 343, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 954, .cpu_id = 343, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 955, .cpu_id = 344, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC1_BMON_SPMU" }, + { .fc_id = 956, .cpu_id = 344, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC1_SW_ERROR" }, + { .fc_id = 957, .cpu_id = 344, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 958, .cpu_id = 344, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 959, .cpu_id = 345, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC2_BMON_SPMU" }, + { .fc_id = 960, .cpu_id = 345, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC2_SW_ERROR" }, + { .fc_id = 961, .cpu_id = 345, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 962, .cpu_id = 345, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 963, .cpu_id = 346, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC3_BMON_SPMU" }, + { .fc_id = 964, .cpu_id = 346, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC3_SW_ERROR" }, + { .fc_id = 965, .cpu_id = 346, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 966, .cpu_id = 346, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 967, .cpu_id = 347, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC4_BMON_SPMU" }, + { .fc_id = 968, .cpu_id = 347, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC4_SW_ERROR" }, + { .fc_id = 969, .cpu_id = 347, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 970, .cpu_id = 347, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 971, .cpu_id = 348, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC5_BMON_SPMU" }, + { .fc_id = 972, .cpu_id = 348, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC5_SW_ERROR" }, + { .fc_id = 973, .cpu_id = 348, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 974, .cpu_id = 348, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 975, .cpu_id = 349, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC6_BMON_SPMU" }, + { .fc_id = 976, .cpu_id = 349, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC6_SW_ERROR" }, + { .fc_id = 977, .cpu_id = 349, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 978, .cpu_id = 349, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 979, .cpu_id = 350, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC7_BMON_SPMU" }, + { .fc_id = 980, .cpu_id = 350, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC7_SW_ERROR" }, + { .fc_id = 981, .cpu_id = 350, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 982, .cpu_id = 350, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 983, .cpu_id = 351, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC8_BMON_SPMU" }, + { .fc_id = 984, .cpu_id = 351, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC8_SW_ERROR" }, + { .fc_id = 985, .cpu_id = 351, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 986, .cpu_id = 351, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 987, .cpu_id = 352, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC9_BMON_SPMU" }, + { .fc_id = 988, .cpu_id = 352, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC9_SW_ERROR" }, + { .fc_id = 989, .cpu_id = 352, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 990, .cpu_id = 352, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 991, .cpu_id = 353, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC10_BMON_SPMU" }, + { .fc_id = 992, .cpu_id = 353, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC10_SW_ERROR" }, + { .fc_id = 993, .cpu_id = 353, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 994, .cpu_id = 353, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 995, .cpu_id = 354, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC11_BMON_SPMU" }, + { .fc_id = 996, .cpu_id = 354, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "NIC11_SW_ERROR" }, + { .fc_id = 997, .cpu_id = 354, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 998, .cpu_id = 354, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 999, .cpu_id = 355, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1000, .cpu_id = 356, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1001, .cpu_id = 357, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1002, .cpu_id = 358, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1003, .cpu_id = 359, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1004, .cpu_id = 360, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1005, .cpu_id = 361, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1006, .cpu_id = 362, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1007, .cpu_id = 363, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1008, .cpu_id = 368, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1009, .cpu_id = 369, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1010, .cpu_id = 366, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1011, .cpu_id = 367, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1012, .cpu_id = 364, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1013, .cpu_id = 365, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1014, .cpu_id = 374, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1015, .cpu_id = 375, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1016, .cpu_id = 372, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1017, .cpu_id = 373, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1018, .cpu_id = 370, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1019, .cpu_id = 371, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1020, .cpu_id = 376, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1021, .cpu_id = 377, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1022, .cpu_id = 378, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1023, .cpu_id = 379, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1024, .cpu_id = 380, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1025, .cpu_id = 381, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1026, .cpu_id = 382, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1027, .cpu_id = 383, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1028, .cpu_id = 384, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1029, .cpu_id = 385, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1030, .cpu_id = 386, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1031, .cpu_id = 387, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1032, .cpu_id = 388, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1033, .cpu_id = 389, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1034, .cpu_id = 390, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1035, .cpu_id = 391, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1036, .cpu_id = 392, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1037, .cpu_id = 393, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1038, .cpu_id = 394, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1039, .cpu_id = 395, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1040, .cpu_id = 396, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1041, .cpu_id = 397, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1042, .cpu_id = 398, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1043, .cpu_id = 399, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1044, .cpu_id = 400, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1045, .cpu_id = 401, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1046, .cpu_id = 402, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1047, .cpu_id = 403, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1048, .cpu_id = 404, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1049, .cpu_id = 405, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1050, .cpu_id = 406, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1051, .cpu_id = 407, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1052, .cpu_id = 408, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1053, .cpu_id = 409, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1054, .cpu_id = 410, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1055, .cpu_id = 411, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1056, .cpu_id = 412, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1057, .cpu_id = 413, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1058, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1059, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1060, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1061, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1062, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1063, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1064, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1065, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1066, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1067, .cpu_id = 414, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1068, .cpu_id = 415, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1069, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1070, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1071, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1072, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1073, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1074, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1075, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1076, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1077, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1078, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1079, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1080, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1081, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1082, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1083, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1084, .cpu_id = 416, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1085, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1086, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1087, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1088, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1089, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1090, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1091, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1092, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1093, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1094, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1095, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1096, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1097, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1098, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1099, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1100, .cpu_id = 417, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1101, .cpu_id = 418, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1102, .cpu_id = 419, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1103, .cpu_id = 420, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1104, .cpu_id = 421, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1105, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1106, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1107, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1108, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1109, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1110, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1111, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1112, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1113, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1114, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1115, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1116, .cpu_id = 422, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1117, .cpu_id = 423, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1118, .cpu_id = 424, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "ROTATOR0_SERR" }, + { .fc_id = 1119, .cpu_id = 425, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "ROTATOR1_SERR" }, + { .fc_id = 1120, .cpu_id = 426, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "ROTATOR0_DERR" }, + { .fc_id = 1121, .cpu_id = 427, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, + .name = "ROTATOR1_DERR" }, + { .fc_id = 1122, .cpu_id = 428, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ROTATOR0_AXI_ERROR_RESPONSE" }, + { .fc_id = 1123, .cpu_id = 429, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ROTATOR1_AXI_ERROR_RESPONSE" }, + { .fc_id = 1124, .cpu_id = 430, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1125, .cpu_id = 431, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1126, .cpu_id = 432, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "ROTATOR0_BMON_SPMU" }, + { .fc_id = 1127, .cpu_id = 433, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1128, .cpu_id = 434, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "ROTATOR1_BMON_SPMU" }, + { .fc_id = 1129, .cpu_id = 435, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1130, .cpu_id = 436, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM0_BMON_SPMU" }, + { .fc_id = 1131, .cpu_id = 437, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM1_BMON_SPMU" }, + { .fc_id = 1132, .cpu_id = 438, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM2_BMON_SPMU" }, + { .fc_id = 1133, .cpu_id = 439, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "SM3_BMON_SPMU" }, + { .fc_id = 1134, .cpu_id = 440, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1135, .cpu_id = 441, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1136, .cpu_id = 442, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1137, .cpu_id = 443, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1138, .cpu_id = 444, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1139, .cpu_id = 445, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1140, .cpu_id = 446, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1141, .cpu_id = 447, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1142, .cpu_id = 448, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1143, .cpu_id = 449, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1144, .cpu_id = 450, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1145, .cpu_id = 451, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1146, .cpu_id = 452, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1147, .cpu_id = 453, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1148, .cpu_id = 454, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1149, .cpu_id = 455, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1150, .cpu_id = 456, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1151, .cpu_id = 457, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1152, .cpu_id = 458, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1153, .cpu_id = 459, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1154, .cpu_id = 460, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1155, .cpu_id = 461, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1156, .cpu_id = 462, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1157, .cpu_id = 463, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1158, .cpu_id = 464, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1159, .cpu_id = 465, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1160, .cpu_id = 466, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1161, .cpu_id = 467, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1162, .cpu_id = 468, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1163, .cpu_id = 469, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1164, .cpu_id = 470, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1165, .cpu_id = 471, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1166, .cpu_id = 472, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1167, .cpu_id = 473, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1168, .cpu_id = 474, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1169, .cpu_id = 475, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1170, .cpu_id = 476, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1171, .cpu_id = 477, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1172, .cpu_id = 478, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1173, .cpu_id = 479, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1174, .cpu_id = 480, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PSOC_DMA_QM" }, + { .fc_id = 1175, .cpu_id = 481, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1176, .cpu_id = 482, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1177, .cpu_id = 483, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1178, .cpu_id = 484, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1179, .cpu_id = 485, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1180, .cpu_id = 486, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1181, .cpu_id = 487, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1182, .cpu_id = 488, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1183, .cpu_id = 489, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1184, .cpu_id = 490, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1185, .cpu_id = 491, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1186, .cpu_id = 492, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1187, .cpu_id = 493, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1188, .cpu_id = 494, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1189, .cpu_id = 495, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1190, .cpu_id = 496, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1191, .cpu_id = 497, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1192, .cpu_id = 498, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1193, .cpu_id = 499, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1194, .cpu_id = 500, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1195, .cpu_id = 501, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1196, .cpu_id = 502, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1197, .cpu_id = 503, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1198, .cpu_id = 504, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1199, .cpu_id = 505, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1200, .cpu_id = 506, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1201, .cpu_id = 507, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1202, .cpu_id = 508, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1203, .cpu_id = 509, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1204, .cpu_id = 510, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1205, .cpu_id = 511, .valid = 0, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1206, .cpu_id = 512, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC0_QM" }, + { .fc_id = 1207, .cpu_id = 513, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC1_QM" }, + { .fc_id = 1208, .cpu_id = 514, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC2_QM" }, + { .fc_id = 1209, .cpu_id = 515, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC3_QM" }, + { .fc_id = 1210, .cpu_id = 516, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC4_QM" }, + { .fc_id = 1211, .cpu_id = 517, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE0_TPC5_QM" }, + { .fc_id = 1212, .cpu_id = 518, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC0_QM" }, + { .fc_id = 1213, .cpu_id = 519, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC1_QM" }, + { .fc_id = 1214, .cpu_id = 520, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC2_QM" }, + { .fc_id = 1215, .cpu_id = 521, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC3_QM" }, + { .fc_id = 1216, .cpu_id = 522, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC4_QM" }, + { .fc_id = 1217, .cpu_id = 523, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE1_TPC5_QM" }, + { .fc_id = 1218, .cpu_id = 524, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC0_QM" }, + { .fc_id = 1219, .cpu_id = 525, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC1_QM" }, + { .fc_id = 1220, .cpu_id = 526, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC2_QM" }, + { .fc_id = 1221, .cpu_id = 527, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC3_QM" }, + { .fc_id = 1222, .cpu_id = 528, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC4_QM" }, + { .fc_id = 1223, .cpu_id = 529, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE2_TPC5_QM" }, + { .fc_id = 1224, .cpu_id = 530, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC0_QM" }, + { .fc_id = 1225, .cpu_id = 531, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC1_QM" }, + { .fc_id = 1226, .cpu_id = 532, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC2_QM" }, + { .fc_id = 1227, .cpu_id = 533, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC3_QM" }, + { .fc_id = 1228, .cpu_id = 534, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC4_QM" }, + { .fc_id = 1229, .cpu_id = 535, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE3_TPC5_QM" }, + { .fc_id = 1230, .cpu_id = 536, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "DCORE4_TPC0_QM" }, + { .fc_id = 1231, .cpu_id = 537, .valid = 0, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "" }, + { .fc_id = 1232, .cpu_id = 538, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME0_QM" }, + { .fc_id = 1233, .cpu_id = 539, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME1_QM" }, + { .fc_id = 1234, .cpu_id = 540, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME2_QM" }, + { .fc_id = 1235, .cpu_id = 541, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "MME3_QM" }, + { .fc_id = 1236, .cpu_id = 542, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA2_QM" }, + { .fc_id = 1237, .cpu_id = 543, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA3_QM" }, + { .fc_id = 1238, .cpu_id = 544, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA0_QM" }, + { .fc_id = 1239, .cpu_id = 545, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA1_QM" }, + { .fc_id = 1240, .cpu_id = 546, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA6_QM" }, + { .fc_id = 1241, .cpu_id = 547, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA7_QM" }, + { .fc_id = 1242, .cpu_id = 548, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA4_QM" }, + { .fc_id = 1243, .cpu_id = 549, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA5_QM" }, + { .fc_id = 1244, .cpu_id = 550, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA0_QM" }, + { .fc_id = 1245, .cpu_id = 551, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA1_QM" }, + { .fc_id = 1246, .cpu_id = 552, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PI_UPDATE" }, + { .fc_id = 1247, .cpu_id = 553, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "HALT_MACHINE" }, + { .fc_id = 1248, .cpu_id = 554, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "INTS_REGISTER" }, + { .fc_id = 1249, .cpu_id = 555, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ROT0_QM" }, + { .fc_id = 1250, .cpu_id = 556, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ROT1_QM" }, + { .fc_id = 1251, .cpu_id = 557, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "SOFT_RESET" }, + { .fc_id = 1252, .cpu_id = 558, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "CPLD_SHUTDOWN_CAUSE" }, + { .fc_id = 1253, .cpu_id = 559, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "FIX_POWER_ENV_S" }, + { .fc_id = 1254, .cpu_id = 560, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "FIX_POWER_ENV_E" }, + { .fc_id = 1255, .cpu_id = 561, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "FIX_THERMAL_ENV_S" }, + { .fc_id = 1256, .cpu_id = 562, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "FIX_THERMAL_ENV_E" }, + { .fc_id = 1257, .cpu_id = 563, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "CPLD_SHUTDOWN_EVENT" }, + { .fc_id = 1258, .cpu_id = 564, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "PKT_QUEUE_OUT_SYNC" }, + { .fc_id = 1259, .cpu_id = 565, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA2_CORE" }, + { .fc_id = 1260, .cpu_id = 566, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA3_CORE" }, + { .fc_id = 1261, .cpu_id = 567, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA0_CORE" }, + { .fc_id = 1262, .cpu_id = 568, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA1_CORE" }, + { .fc_id = 1263, .cpu_id = 569, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA6_CORE" }, + { .fc_id = 1264, .cpu_id = 570, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA7_CORE" }, + { .fc_id = 1265, .cpu_id = 571, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA4_CORE" }, + { .fc_id = 1266, .cpu_id = 572, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "EDMA5_CORE" }, + { .fc_id = 1267, .cpu_id = 573, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA0_CORE" }, + { .fc_id = 1268, .cpu_id = 574, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "PDMA1_CORE" }, + { .fc_id = 1269, .cpu_id = 575, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "KDMA0_CORE" }, + { .fc_id = 1270, .cpu_id = 576, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC0_QM0" }, + { .fc_id = 1271, .cpu_id = 577, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC0_QM1" }, + { .fc_id = 1272, .cpu_id = 578, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC1_QM0" }, + { .fc_id = 1273, .cpu_id = 579, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC1_QM1" }, + { .fc_id = 1274, .cpu_id = 580, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC2_QM0" }, + { .fc_id = 1275, .cpu_id = 581, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC2_QM1" }, + { .fc_id = 1276, .cpu_id = 582, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC3_QM0" }, + { .fc_id = 1277, .cpu_id = 583, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC3_QM1" }, + { .fc_id = 1278, .cpu_id = 584, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC4_QM0" }, + { .fc_id = 1279, .cpu_id = 585, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC4_QM1" }, + { .fc_id = 1280, .cpu_id = 586, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC5_QM0" }, + { .fc_id = 1281, .cpu_id = 587, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC5_QM1" }, + { .fc_id = 1282, .cpu_id = 588, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC6_QM0" }, + { .fc_id = 1283, .cpu_id = 589, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC6_QM1" }, + { .fc_id = 1284, .cpu_id = 590, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC7_QM0" }, + { .fc_id = 1285, .cpu_id = 591, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC7_QM1" }, + { .fc_id = 1286, .cpu_id = 592, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC8_QM0" }, + { .fc_id = 1287, .cpu_id = 593, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC8_QM1" }, + { .fc_id = 1288, .cpu_id = 594, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC9_QM0" }, + { .fc_id = 1289, .cpu_id = 595, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC9_QM1" }, + { .fc_id = 1290, .cpu_id = 596, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC10_QM0" }, + { .fc_id = 1291, .cpu_id = 597, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC10_QM1" }, + { .fc_id = 1292, .cpu_id = 598, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC11_QM0" }, + { .fc_id = 1293, .cpu_id = 599, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "NIC11_QM1" }, + { .fc_id = 1294, .cpu_id = 600, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "CPU_PKT_SANITY_FAILED" }, + { .fc_id = 1295, .cpu_id = 601, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC0_ENG0" }, + { .fc_id = 1296, .cpu_id = 602, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC0_ENG1" }, + { .fc_id = 1297, .cpu_id = 603, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC1_ENG0" }, + { .fc_id = 1298, .cpu_id = 604, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC1_ENG1" }, + { .fc_id = 1299, .cpu_id = 605, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC2_ENG0" }, + { .fc_id = 1300, .cpu_id = 606, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC2_ENG1" }, + { .fc_id = 1301, .cpu_id = 607, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC3_ENG0" }, + { .fc_id = 1302, .cpu_id = 608, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC3_ENG1" }, + { .fc_id = 1303, .cpu_id = 609, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC4_ENG0" }, + { .fc_id = 1304, .cpu_id = 610, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC4_ENG1" }, + { .fc_id = 1305, .cpu_id = 611, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC5_ENG0" }, + { .fc_id = 1306, .cpu_id = 612, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC5_ENG1" }, + { .fc_id = 1307, .cpu_id = 613, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC6_ENG0" }, + { .fc_id = 1308, .cpu_id = 614, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC6_ENG1" }, + { .fc_id = 1309, .cpu_id = 615, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC7_ENG0" }, + { .fc_id = 1310, .cpu_id = 616, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC7_ENG1" }, + { .fc_id = 1311, .cpu_id = 617, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC8_ENG0" }, + { .fc_id = 1312, .cpu_id = 618, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC8_ENG1" }, + { .fc_id = 1313, .cpu_id = 619, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC9_ENG0" }, + { .fc_id = 1314, .cpu_id = 620, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC9_ENG1" }, + { .fc_id = 1315, .cpu_id = 621, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC10_ENG0" }, + { .fc_id = 1316, .cpu_id = 622, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC10_ENG1" }, + { .fc_id = 1317, .cpu_id = 623, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC11_ENG0" }, + { .fc_id = 1318, .cpu_id = 624, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "STATUS_NIC11_ENG1" }, + { .fc_id = 1319, .cpu_id = 625, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_COMPUTE, + .name = "ARC_DCCM_FULL" }, + { .fc_id = 1320, .cpu_id = 626, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "FP32_NOT_SUPPORTED" }, + { .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, + .name = "DEV_RESET_REQ" }, + { .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_BRK_ENTRY" }, + { .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_BRK_EXT" }, + { .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_RD_MODE0" }, + { .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_RD_MODE1" }, + { .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_RD_MODE2" }, + { .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "PWR_RD_MODE3" }, + { .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "EQ_HEARTBEAT" }, }; #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */ diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h index 82f3ca2a3966..6ea936c9594e 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h @@ -45,6 +45,13 @@ #define GAUDI2_ARM_RX_MB_OFFSET (GAUDI2_ARM_RX_MB_ADDR - \ GAUDI2_SP_SRAM_BASE_ADDR) +#define POWER_MODE_LEVELS { \ + 150000, /* 00 */ \ + 250000, /* 01 */ \ + 400000, /* 10 */ \ + /* 11: Normal mode */ \ +} + enum gaudi2_fw_status { GAUDI2_PID_STATUS_UP = 0x1, /* PID on ARC0 is up */ GAUDI2_ARM_STATUS_UP = 0x2, /* ARM Linux Boot complete */ @@ -52,23 +59,6 @@ enum gaudi2_fw_status { GAUDI2_STATUS_LAST = 0xFF }; -struct gaudi2_cold_rst_data { - union { - struct { - u32 recovery_flag: 1; - u32 validation_flag: 1; - u32 efuse_read_flag: 1; - u32 spsram_init_done : 1; - u32 fake_security_enable : 1; - u32 fake_sig_validation_en : 1; - u32 bist_skip_enable : 1; - u32 bist_need_iatu_config : 1; - u32 reserved : 24; - }; - __le32 data; - }; -}; - enum gaudi2_rst_src { HL_COLD_RST = 1, HL_MANUAL_RST = 2, diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h index f3eaeb6d9b7e..1e9c056e437d 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_reg_map.h @@ -58,4 +58,12 @@ #define mmWD_GPIO_DATAOUT_REG mmPSOC_GPIO3_DATAOUT #define mmSTM_PROFILER_SPE_REG mmPSOC_STM_STMSPER +/* Registers below are used to pass the boot_if data between ARM and ARC1 */ +#define mmARM_MSG_BOOT_ERR_SET mmCPU_IF_SPECIAL_GLBL_SPARE_0 +#define mmARM_MSG_BOOT_ERR_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_1 +#define mmARM_MSG_BOOT_DEV_STS_SET mmCPU_IF_SPECIAL_GLBL_SPARE_2 +#define mmARM_MSG_BOOT_DEV_STS_CLR mmCPU_IF_SPECIAL_GLBL_SPARE_3 +#define mmMGMT_MSG_BOOT_ERR mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_0 +#define mmMGMT_MSG_BOOT_DEV_STS mmCPU_MSTR_IF_SPECIAL_GLBL_SPARE_1 + #endif /* GAUDI2_REG_MAP_H_ */ diff --git a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h index d408feecd483..b4a5e95be354 100644 --- a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -26,6 +26,8 @@ #define LAST_MASK 0x0000000000800ull #define FLAGS_MASK 0x0000000000FFFull +#define MMU_ARCH_3_HOPS 3 +#define MMU_ARCH_4_HOPS 4 #define MMU_ARCH_5_HOPS 5 #define MMU_ARCH_6_HOPS 6 diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h index f5d497dc9bdc..a75faa00197f 100644 --- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h +++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h @@ -25,6 +25,8 @@ enum hl_revision_id { REV_ID_INVALID = 0x00, REV_ID_A = 0x01, REV_ID_B = 0x02, + REV_ID_C = 0x03, + REV_ID_D = 0x04 }; #endif /* INCLUDE_PCI_GENERAL_H_ */ diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 9bdf168bf1d0..9e055b5ce03d 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -1,15 +1,27 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ACCEL_IVPU - tristate "Intel VPU for Meteor Lake and newer" + tristate "Intel NPU (Neural Processing Unit)" depends on DRM_ACCEL depends on X86_64 && !UML depends on PCI && PCI_MSI select FW_LOADER - select SHMEM + select DRM_GEM_SHMEM_HELPER + select GENERIC_ALLOCATOR + select WANT_DEV_COREDUMP help - Choose this option if you have a system that has an 14th generation Intel CPU - or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated - inference accelerator for Computer Vision and Deep Learning applications. + Choose this option if you have a system with an 14th generation + Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) + is a CPU-integrated inference accelerator for Computer Vision + and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. + +config DRM_ACCEL_IVPU_DEBUG + bool "Intel NPU debug mode" + depends on DRM_ACCEL_IVPU + help + Choose this option to enable additional + debug features for the Intel NPU driver: + - Always print debug messages regardless of dyndbg config, + - Enable unsafe module params. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 80f1fb3548ae..dbf76b8a5b4c 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -1,16 +1,30 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation intel_vpu-y := \ ivpu_drv.o \ ivpu_fw.o \ + ivpu_fw_log.o \ ivpu_gem.o \ - ivpu_hw_mtl.o \ + ivpu_gem_userptr.o \ + ivpu_hw.o \ + ivpu_hw_btrs.o \ + ivpu_hw_ip.o \ ivpu_ipc.o \ ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o \ - ivpu_pm.o + ivpu_ms.o \ + ivpu_pm.o \ + ivpu_sysfs.o \ + ivpu_trace_points.o -obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
\ No newline at end of file +intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o +intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o + +obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o + +subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG + +CFLAGS_ivpu_trace_points.o = -I$(src) diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO deleted file mode 100644 index 9077217ae10f..000000000000 --- a/drivers/accel/ivpu/TODO +++ /dev/null @@ -1,11 +0,0 @@ -- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler() -- Implement support for BLOB IDs -- Add debugfs support to improve debugging and testing -- Add tracing events for performance debugging -- Implement HW based scheduling support -- Use syncobjs for submit/sync -- Refactor IPC protocol to improve message latency -- Implement BO cache and MADVISE IOCTL -- Add support for user allocated buffers using prime import and dma-buf heaps -- Refactor struct ivpu_bo to use struct drm_gem_shmem_object -- Add driver/device documentation diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c new file mode 100644 index 000000000000..16ad0c30818c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> + +#include "ivpu_coredump.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "vpu_boot_api.h" + +#define CRASH_DUMP_HEADER "Intel NPU crash dump" +#define CRASH_DUMP_HEADERS_SIZE SZ_4K + +void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_print_iterator pi = {}; + struct drm_printer p; + size_t coredump_size; + char *coredump; + + coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + + ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); + coredump = vmalloc(coredump_size); + if (!coredump) + return; + + pi.data = coredump; + pi.remain = coredump_size; + p = drm_coredump_printer(&pi); + + drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); + drm_printf(&p, "FW version: %s\n", vdev->fw->version); + ivpu_fw_log_print(vdev, false, &p); + + dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); +} diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h new file mode 100644 index 000000000000..8efb09d02441 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_COREDUMP_H__ +#define __IVPU_COREDUMP_H__ + +#include <drm/drm_print.h> + +#include "ivpu_drv.h" +#include "ivpu_fw_log.h" + +#ifdef CONFIG_DEV_COREDUMP +void ivpu_dev_coredump(struct ivpu_device *vdev); +#else +static inline void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_printer p = drm_info_printer(vdev->drm.dev); + + ivpu_fw_log_print(vdev, false, &p); +} +#endif + +#endif /* __IVPU_COREDUMP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c new file mode 100644 index 000000000000..3bd85ee6c26b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -0,0 +1,512 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <linux/fault-inject.h> + +#include <drm/drm_debugfs.h> +#include <drm/drm_file.h> +#include <drm/drm_print.h> + +#include <uapi/drm/ivpu_accel.h> + +#include "ivpu_debugfs.h" +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_fw_log.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" + +static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s) +{ + struct drm_debugfs_entry *entry = s->private; + + return to_ivpu_device(entry->dev); +} + +static int bo_list_show(struct seq_file *s, void *v) +{ + struct drm_printer p = drm_seq_file_printer(s); + struct ivpu_device *vdev = seq_to_ivpu(s); + + ivpu_bo_list(&vdev->drm, &p); + + return 0; +} + +static int fw_name_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", vdev->fw->name); + return 0; +} + +static int fw_version_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", vdev->fw->version); + return 0; +} + +static int fw_trace_capability_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + u64 trace_hw_component_mask; + u32 trace_destination_mask; + int ret; + + ret = ivpu_jsm_trace_get_capability(vdev, &trace_destination_mask, + &trace_hw_component_mask); + if (!ret) { + seq_printf(s, + "trace_destination_mask: %#18x\n" + "trace_hw_component_mask: %#18llx\n", + trace_destination_mask, trace_hw_component_mask); + } + return 0; +} + +static int fw_trace_config_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + /** + * WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet, + * so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config() + */ + u32 trace_level = vdev->fw->trace_level; + u32 trace_destination_mask = vdev->fw->trace_destination_mask; + u64 trace_hw_component_mask = vdev->fw->trace_hw_component_mask; + + seq_printf(s, + "trace_level: %#18x\n" + "trace_destination_mask: %#18x\n" + "trace_hw_component_mask: %#18llx\n", + trace_level, trace_destination_mask, trace_hw_component_mask); + + return 0; +} + +static int last_bootmode_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot"); + + return 0; +} + +static int reset_counter_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter)); + return 0; +} + +static int reset_pending_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending)); + return 0; +} + +static int firewall_irq_counter_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter)); + return 0; +} + +static const struct drm_debugfs_info vdev_debugfs_list[] = { + {"bo_list", bo_list_show, 0}, + {"fw_name", fw_name_show, 0}, + {"fw_version", fw_version_show, 0}, + {"fw_trace_capability", fw_trace_capability_show, 0}, + {"fw_trace_config", fw_trace_config_show, 0}, + {"last_bootmode", last_bootmode_show, 0}, + {"reset_counter", reset_counter_show, 0}, + {"reset_pending", reset_pending_show, 0}, + {"firewall_irq_counter", firewall_irq_counter_show, 0}, +}; + +static int dvfs_mode_get(void *data, u64 *dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + *dvfs_mode = vdev->fw->dvfs_mode; + return 0; +} + +static int dvfs_mode_set(void *data, u64 dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + vdev->fw->dvfs_mode = (u32)dvfs_mode; + return pci_try_reset_function(to_pci_dev(vdev->drm.dev)); +} + +DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n"); + +static ssize_t +fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + char buffer[VPU_DYNDBG_CMD_MAX_LEN] = {}; + int ret; + + if (size >= VPU_DYNDBG_CMD_MAX_LEN) + return -EINVAL; + + ret = strncpy_from_user(buffer, user_buf, size); + if (ret < 0) + return ret; + + ivpu_jsm_dyndbg_control(vdev, buffer, size); + return size; +} + +static const struct file_operations fw_dyndbg_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_dyndbg_fops_write, +}; + +static int fw_log_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = s->private; + struct drm_printer p = drm_seq_file_printer(s); + + ivpu_fw_log_print(vdev, true, &p); + return 0; +} + +static int fw_log_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, fw_log_show, inode->i_private); +} + +static ssize_t +fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct seq_file *s = file->private_data; + struct ivpu_device *vdev = s->private; + + if (!size) + return -EINVAL; + + ivpu_fw_log_mark_read(vdev); + return size; +} + +static const struct file_operations fw_log_fops = { + .owner = THIS_MODULE, + .open = fw_log_fops_open, + .write = fw_log_fops_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t +fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf, + size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + bool enable; + int ret; + + ret = kstrtobool_from_user(user_buf, size, &enable); + if (ret < 0) + return ret; + + ivpu_hw_profiling_freq_drive(vdev, enable); + + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; + + return size; +} + +static const struct file_operations fw_profiling_freq_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_profiling_freq_fops_write, +}; + +static ssize_t +fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf, + size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + struct ivpu_fw_info *fw = vdev->fw; + u32 trace_destination_mask; + int ret; + + ret = kstrtou32_from_user(user_buf, size, 0, &trace_destination_mask); + if (ret < 0) + return ret; + + fw->trace_destination_mask = trace_destination_mask; + + ivpu_jsm_trace_set_config(vdev, fw->trace_level, trace_destination_mask, + fw->trace_hw_component_mask); + + return size; +} + +static const struct file_operations fw_trace_destination_mask_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_trace_destination_mask_fops_write, +}; + +static ssize_t +fw_trace_hw_comp_mask_fops_write(struct file *file, const char __user *user_buf, + size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + struct ivpu_fw_info *fw = vdev->fw; + u64 trace_hw_component_mask; + int ret; + + ret = kstrtou64_from_user(user_buf, size, 0, &trace_hw_component_mask); + if (ret < 0) + return ret; + + fw->trace_hw_component_mask = trace_hw_component_mask; + + ivpu_jsm_trace_set_config(vdev, fw->trace_level, fw->trace_destination_mask, + trace_hw_component_mask); + + return size; +} + +static const struct file_operations fw_trace_hw_comp_mask_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_trace_hw_comp_mask_fops_write, +}; + +static ssize_t +fw_trace_level_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + struct ivpu_fw_info *fw = vdev->fw; + u32 trace_level; + int ret; + + ret = kstrtou32_from_user(user_buf, size, 0, &trace_level); + if (ret < 0) + return ret; + + fw->trace_level = trace_level; + + ivpu_jsm_trace_set_config(vdev, trace_level, fw->trace_destination_mask, + fw->trace_hw_component_mask); + + return size; +} + +static const struct file_operations fw_trace_level_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_trace_level_fops_write, +}; + +static ssize_t +ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + int ret; + + if (!size) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + ivpu_pm_trigger_recovery(vdev, "debugfs"); + flush_work(&vdev->pm->recovery_work); + ivpu_rpm_put(vdev); + return size; +} + +static const struct file_operations ivpu_force_recovery_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = ivpu_force_recovery_fn, +}; + +static int ivpu_reset_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_reset_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n"); + +static int ivpu_resume_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_hws_resume_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n"); + +static int dct_active_get(void *data, u64 *active_percent) +{ + struct ivpu_device *vdev = data; + + *active_percent = vdev->pm->dct_active_percent; + + return 0; +} + +static int dct_active_set(void *data, u64 active_percent) +{ + struct ivpu_device *vdev = data; + int ret; + + if (active_percent > 100) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + if (active_percent) + ret = ivpu_pm_dct_enable(vdev, active_percent); + else + ret = ivpu_pm_dct_disable(vdev); + + ivpu_rpm_put(vdev); + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n"); + +static void print_priority_band(struct seq_file *s, struct ivpu_hw_info *hw, + int band, const char *name) +{ + seq_printf(s, "%-9s: grace_period %9u process_grace_period %9u process_quantum %9u\n", + name, + hw->hws.grace_period[band], + hw->hws.process_grace_period[band], + hw->hws.process_quantum[band]); +} + +static int priority_bands_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = s->private; + struct ivpu_hw_info *hw = vdev->hw; + + print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE, "Idle"); + print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL, "Normal"); + print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS, "Focus"); + print_priority_band(s, hw, VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME, "Realtime"); + + return 0; +} + +static int priority_bands_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, priority_bands_show, inode->i_private); +} + +static ssize_t +priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct seq_file *s = file->private_data; + struct ivpu_device *vdev = s->private; + char buf[64]; + u32 grace_period; + u32 process_grace_period; + u32 process_quantum; + u32 band; + int ret; + + if (size >= sizeof(buf)) + return -EINVAL; + + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size); + if (ret < 0) + return ret; + + buf[ret] = '\0'; + ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, + &process_quantum); + if (ret != 4) + return -EINVAL; + + if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT) + return -EINVAL; + + vdev->hw->hws.grace_period[band] = grace_period; + vdev->hw->hws.process_grace_period[band] = process_grace_period; + vdev->hw->hws.process_quantum[band] = process_quantum; + + return size; +} + +static const struct file_operations ivpu_hws_priority_bands_fops = { + .owner = THIS_MODULE, + .open = priority_bands_fops_open, + .write = priority_bands_fops_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void ivpu_debugfs_init(struct ivpu_device *vdev) +{ + struct dentry *debugfs_root = vdev->drm.debugfs_root; + + drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list)); + + debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, + &ivpu_force_recovery_fops); + + debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev, + &dvfs_mode_fops); + + debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev, + &fw_dyndbg_fops); + debugfs_create_file("fw_log", 0644, debugfs_root, vdev, + &fw_log_fops); + debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev, + &fw_trace_destination_mask_fops); + debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev, + &fw_trace_hw_comp_mask_fops); + debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev, + &fw_trace_level_fops); + debugfs_create_file("hws_priority_bands", 0200, debugfs_root, vdev, + &ivpu_hws_priority_bands_fops); + + debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, + &ivpu_reset_engine_fops); + debugfs_create_file("resume_engine", 0200, debugfs_root, vdev, + &ivpu_resume_engine_fops); + + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) { + debugfs_create_file("fw_profiling_freq_drive", 0200, + debugfs_root, vdev, &fw_profiling_freq_fops); + debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops); + } + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_hw", debugfs_root, &ivpu_hw_failure); +#endif +} diff --git a/drivers/accel/ivpu/ivpu_debugfs.h b/drivers/accel/ivpu/ivpu_debugfs.h new file mode 100644 index 000000000000..49ae9ea78287 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_debugfs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_DEBUGFS_H__ +#define __IVPU_DEBUGFS_H__ + +struct ivpu_device; + +#if defined(CONFIG_DEBUG_FS) +void ivpu_debugfs_init(struct ivpu_device *vdev); +#else +static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { } +#endif + +#endif /* __IVPU_DEBUGFS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index a29e8ee0dce6..3d6fccdefdd6 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,22 +1,26 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/workqueue.h> +#include <generated/utsrelease.h> #include <drm/drm_accel.h> -#include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> #include <drm/drm_prime.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" +#include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" @@ -24,32 +28,44 @@ #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_sysfs.h" +#include "vpu_boot_api.h" #ifndef DRIVER_VERSION_STR -#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ - __stringify(DRM_IVPU_DRIVER_MINOR) "." +#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE #endif -static const struct drm_driver driver; - -static struct lock_class_key submitted_jobs_xa_lock_class_key; - int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); -MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw"); +MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); +#endif u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); -MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set NPU frequency"); u8 ivpu_pll_max_ratio = U8_MAX; module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); -MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency"); + +int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO; +module_param_named(sched_mode, ivpu_sched_mode, int, 0444); +MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler (supported on 27XX - 50XX), 1 - Use HW scheduler"); + +bool ivpu_disable_mmu_cont_pages; +module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444); +MODULE_PARM_DESC(disable_mmu_cont_pages, "Disable MMU contiguous pages optimization"); + +bool ivpu_force_snoop; +module_param_named(force_snoop, ivpu_force_snoop, bool, 0444); +MODULE_PARM_DESC(force_snoop, "Force snooping for NPU host memory access"); struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) { @@ -63,22 +79,19 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) return file_priv; } -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id) +static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv) { - struct ivpu_file_priv *file_priv; - - xa_lock_irq(&vdev->context_xa); - file_priv = xa_load(&vdev->context_xa, id); - /* file_priv may still be in context_xa during file_priv_release() */ - if (file_priv && !kref_get_unless_zero(&file_priv->ref)) - file_priv = NULL; - xa_unlock_irq(&vdev->context_xa); - - if (file_priv) - ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n", - file_priv->ctx.id, kref_read(&file_priv->ref)); - - return file_priv; + mutex_lock(&file_priv->lock); + if (file_priv->bound) { + ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id); + + ivpu_cmdq_release_all_locked(file_priv); + ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); + ivpu_mmu_context_fini(vdev, &file_priv->ctx); + file_priv->bound = false; + drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); + } + mutex_unlock(&file_priv->lock); } static void file_priv_release(struct kref *ref) @@ -86,12 +99,18 @@ static void file_priv_release(struct kref *ref) struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); struct ivpu_device *vdev = file_priv->vdev; - ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u bound %d\n", + file_priv->ctx.id, (bool)file_priv->bound); + + pm_runtime_get_sync(vdev->drm.dev); + mutex_lock(&vdev->context_list_lock); + file_priv_unbind(vdev, file_priv); + drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa)); + xa_destroy(&file_priv->cmdq_xa); + mutex_unlock(&vdev->context_list_lock); + pm_runtime_put_autosuspend(vdev->drm.dev); - ivpu_cmdq_release_all(file_priv); - ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); - drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -101,8 +120,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; - drm_WARN_ON(&vdev->drm, !file_priv); - ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -110,6 +127,22 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) kref_put(&file_priv->ref, file_priv_release); } +bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability) +{ + switch (capability) { + case DRM_IVPU_CAP_METRIC_STREAMER: + return true; + case DRM_IVPU_CAP_DMA_MEMORY_RANGE: + return true; + case DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR: + return true; + case DRM_IVPU_CAP_MANAGE_CMDQ: + return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW; + default: + return false; + } +} + static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -117,6 +150,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); struct drm_ivpu_param *args = data; int ret = 0; + int idx; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; switch (args->param) { case DRM_IVPU_PARAM_DEVICE_ID: @@ -129,16 +166,13 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_reg_pll_freq_get(vdev); + args->value = ivpu_hw_dpu_max_freq_get(vdev); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); break; case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: - args->value = vdev->hw->ranges.user_low.start; - break; - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - args->value = file_priv->priority; + args->value = vdev->hw->ranges.user.start; break; case DRM_IVPU_PARAM_CONTEXT_ID: args->value = file_priv->ctx.id; @@ -165,27 +199,27 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_SKU: args->value = vdev->hw->sku; break; + case DRM_IVPU_PARAM_CAPABILITIES: + args->value = ivpu_is_capable(vdev, args->index); + break; + case DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE: + args->value = ivpu_fw_preempt_buf_size(vdev); + break; default: ret = -EINVAL; break; } + drm_dev_exit(idx); return ret; } static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_param *args = data; int ret = 0; switch (args->param) { - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) - file_priv->priority = args->value; - else - ret = -EINVAL; - break; default: ret = -EINVAL; } @@ -198,50 +232,60 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_file_priv *file_priv; u32 ctx_id; - void *old; - int ret; + int idx, ret; - ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); - if (ret) { - ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); - return ret; - } + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) { ret = -ENOMEM; - goto err_xa_erase; + goto err_dev_exit; } + INIT_LIST_HEAD(&file_priv->ms_instance_list); + file_priv->vdev = vdev; - file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; + file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_init(&file_priv->ms_lock); - ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); - if (ret) - goto err_mutex_destroy; + mutex_lock(&vdev->context_list_lock); - old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); - if (xa_is_err(old)) { - ret = xa_err(old); - ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); - goto err_ctx_fini; + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, file_priv, + vdev->context_xa_limit, GFP_KERNEL); + if (ret) { + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); + goto err_unlock; } + ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id); + + file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK; + + xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1); + file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID; + file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID; + + mutex_unlock(&vdev->context_list_lock); + drm_dev_exit(idx); + + file->driver_priv = file_priv; + ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", ctx_id, current->comm, task_pid_nr(current)); - file->driver_priv = file_priv; return 0; -err_ctx_fini: - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); -err_mutex_destroy: +err_unlock: + mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); -err_xa_erase: - xa_erase_irq(&vdev->context_xa, ctx_id); +err_dev_exit: + drm_dev_exit(idx); return ret; } @@ -253,6 +297,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", file_priv->ctx.id, current->comm, task_pid_nr(current)); + ivpu_ms_cleanup(file_priv); ivpu_file_priv_put(&file_priv); } @@ -263,6 +308,14 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE_FROM_USERPTR, ivpu_bo_create_from_userptr_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -272,16 +325,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) unsigned long timeout; int ret; - if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST) + if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST) return 0; - ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG); + ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL); timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); while (1) { - ret = ivpu_ipc_irq_handler(vdev); - if (ret) - break; + ivpu_ipc_irq_handler(vdev); ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) break; @@ -292,15 +343,28 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) ivpu_ipc_consumer_del(vdev, &cons); if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) { - ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n", + ivpu_err(vdev, "Invalid NPU ready message: 0x%x\n", ipc_hdr.data_addr); return -EIO; } if (!ret) - ivpu_info(vdev, "VPU ready message received successfully\n"); - else - ivpu_hw_diagnose_failure(vdev); + ivpu_dbg(vdev, PM, "NPU ready message received successfully\n"); + + return ret; +} + +static int ivpu_hw_sched_init(struct ivpu_device *vdev) +{ + int ret = 0; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_setup_priority_bands(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret); + return ret; + } + } return ret; } @@ -316,8 +380,10 @@ int ivpu_boot(struct ivpu_device *vdev) { int ret; - /* Update boot params located at first 4KB of FW memory */ - ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr); + drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter)); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + + ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem_bp)); ret = ivpu_hw_boot_fw(vdev); if (ret) { @@ -328,35 +394,70 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); - return ret; + goto err_diagnose_failure; } ivpu_hw_irq_clear(vdev); enable_irq(vdev->irq); ivpu_hw_irq_enable(vdev); ivpu_ipc_enable(vdev); + + if (ivpu_fw_is_cold_boot(vdev)) { + ret = ivpu_pm_dct_init(vdev); + if (ret) + goto err_disable_ipc; + + ret = ivpu_hw_sched_init(vdev); + if (ret) + goto err_disable_ipc; + } + return 0; + +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); +err_diagnose_failure: + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); + ivpu_dev_coredump(vdev); + return ret; } -int ivpu_shutdown(struct ivpu_device *vdev) +void ivpu_prepare_for_reset(struct ivpu_device *vdev) { - int ret; - ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); + flush_work(&vdev->irq_ipc_work); + flush_work(&vdev->irq_dct_work); + flush_work(&vdev->context_abort_work); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); +} + +int ivpu_shutdown(struct ivpu_device *vdev) +{ + int ret; + + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); ret = ivpu_hw_power_down(vdev); if (ret) ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + return ret; } static const struct file_operations ivpu_fops = { .owner = THIS_MODULE, DRM_ACCEL_FOPS, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_fdinfo, +#endif }; static const struct drm_driver driver = { @@ -364,20 +465,21 @@ static const struct drm_driver driver = { .open = ivpu_open, .postclose = ivpu_postclose, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + + .gem_create_object = ivpu_gem_create_object, .gem_prime_import = ivpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), .fops = &ivpu_fops, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_memory_stats, +#endif .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, - .major = DRM_IVPU_DRIVER_MAJOR, - .minor = DRM_IVPU_DRIVER_MINOR, + + .major = 1, }; static int ivpu_irq_init(struct ivpu_device *vdev) @@ -391,9 +493,15 @@ static int ivpu_irq_init(struct ivpu_device *vdev) return ret; } + INIT_WORK(&vdev->irq_ipc_work, ivpu_ipc_irq_work_fn); + INIT_WORK(&vdev->irq_dct_work, ivpu_pm_irq_dct_work_fn); + INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_work_fn); + + ivpu_irq_handlers_init(vdev); + vdev->irq = pci_irq_vector(pdev, 0); - ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + ret = devm_request_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev); if (ret) ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); @@ -422,15 +530,19 @@ static int ivpu_pci_init(struct ivpu_device *vdev) return PTR_ERR(vdev->regb); } - ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38)); + ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(vdev->hw->dma_bits)); if (ret) { ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret); return ret; } + dma_set_max_seg_size(vdev->drm.dev, UINT_MAX); /* Clear any pending errors */ pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); + /* NPU does not require 10m D3hot delay */ + pdev->d3hot_delay = 0; + ret = pcim_enable_device(pdev); if (ret) { ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret); @@ -466,118 +578,132 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->pm) return -ENOMEM; - vdev->hw->ops = &ivpu_hw_mtl_ops; + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) + vdev->hw->dma_bits = 48; + else + vdev->hw->dma_bits = 38; + vdev->platform = IVPU_PLATFORM_INVALID; - vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; - vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; + vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); - xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + atomic_set(&vdev->job_timeout_counter, 0); + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); - lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); + xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); + INIT_LIST_HEAD(&vdev->bo_list); + + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB; + + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); + if (ret) + goto err_xa_destroy; + + ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock); + if (ret) + goto err_xa_destroy; + + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); + if (ret) + goto err_xa_destroy; ret = ivpu_pci_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret); + if (ret) goto err_xa_destroy; - } ret = ivpu_irq_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret); + if (ret) goto err_xa_destroy; - } /* Init basic HW info based on buttress registers which are accessible before power up */ - ret = ivpu_hw_info_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret); + ret = ivpu_hw_init(vdev); + if (ret) goto err_xa_destroy; - } /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); - if (ret) { - ivpu_err(vdev, "Failed to power up HW: %d\n", ret); - goto err_xa_destroy; - } + if (ret) + goto err_shutdown; - ret = ivpu_mmu_global_context_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret); - goto err_power_down; - } + ivpu_mmu_global_context_init(vdev); ret = ivpu_mmu_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret); + if (ret) goto err_mmu_gctx_fini; - } - ret = ivpu_fw_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret); + ret = ivpu_mmu_reserved_context_init(vdev); + if (ret) goto err_mmu_gctx_fini; - } + + ret = ivpu_fw_init(vdev); + if (ret) + goto err_mmu_rctx_fini; ret = ivpu_ipc_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret); + if (ret) goto err_fw_fini; - } - ret = ivpu_pm_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize PM: %d\n", ret); - goto err_ipc_fini; - } - - ret = ivpu_job_done_thread_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); - goto err_ipc_fini; - } - - ret = ivpu_fw_load(vdev); - if (ret) { - ivpu_err(vdev, "Failed to load firmware: %d\n", ret); - goto err_job_done_thread_fini; - } + ivpu_pm_init(vdev); ret = ivpu_boot(vdev); - if (ret) { - ivpu_err(vdev, "Failed to boot: %d\n", ret); - goto err_job_done_thread_fini; - } + if (ret) + goto err_ipc_fini; + ivpu_job_done_consumer_init(vdev); ivpu_pm_enable(vdev); return 0; -err_job_done_thread_fini: - ivpu_job_done_thread_fini(vdev); err_ipc_fini: ivpu_ipc_fini(vdev); err_fw_fini: ivpu_fw_fini(vdev); +err_mmu_rctx_fini: + ivpu_mmu_reserved_context_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); -err_power_down: - ivpu_hw_power_down(vdev); +err_shutdown: + ivpu_shutdown(vdev); err_xa_destroy: + xa_destroy(&vdev->db_xa); xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); return ret; } +static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + file_priv_unbind(vdev, file_priv); + + mutex_unlock(&vdev->context_list_lock); +} + static void ivpu_dev_fini(struct ivpu_device *vdev) { + ivpu_jobs_abort_all(vdev); + ivpu_pm_disable_recovery(vdev); ivpu_pm_disable(vdev); + ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); - ivpu_job_done_thread_fini(vdev); + + ivpu_ms_cleanup_all(vdev); + ivpu_job_done_consumer_fini(vdev); + ivpu_bo_unbind_all_user_contexts(vdev); + ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); + ivpu_mmu_reserved_context_fini(vdev); ivpu_mmu_global_context_fini(vdev); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->db_xa)); + xa_destroy(&vdev->db_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); xa_destroy(&vdev->submitted_jobs_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); @@ -586,6 +712,11 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_NVL) }, { } }; MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); @@ -602,10 +733,11 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, vdev); ret = ivpu_dev_init(vdev); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret); + if (ret) return ret; - } + + ivpu_debugfs_init(vdev); + ivpu_sysfs_init(vdev); ret = drm_dev_register(&vdev->drm, 0); if (ret) { @@ -620,7 +752,7 @@ static void ivpu_remove(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - drm_dev_unregister(&vdev->drm); + drm_dev_unplug(&vdev->drm); ivpu_dev_fini(vdev); } diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index f47b4965db2e..5b34b6f50e69 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_DRV_H__ #define __IVPU_DRV_H__ #include <drm/drm_device.h> +#include <drm/drm_drv.h> #include <drm/drm_managed.h> #include <drm/drm_mm.h> #include <drm/drm_print.h> @@ -16,22 +17,54 @@ #include <uapi/drm/ivpu_accel.h> #include "ivpu_mmu_context.h" +#include "ivpu_ipc.h" #define DRIVER_NAME "intel_vpu" -#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" -#define DRIVER_DATE "20230117" +#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" -#define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d +#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_PTL_P 0xb03e +#define PCI_DEVICE_ID_WCL 0xfd3e +#define PCI_DEVICE_ID_NVL 0xd71d -#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 -#define IVPU_CONTEXT_LIMIT 64 -#define IVPU_NUM_ENGINES 2 +#define IVPU_HW_IP_37XX 37 +#define IVPU_HW_IP_40XX 40 +#define IVPU_HW_IP_50XX 50 +#define IVPU_HW_IP_60XX 60 + +#define IVPU_HW_IP_REV_LNL_B0 4 + +#define IVPU_HW_BTRS_MTL 1 +#define IVPU_HW_BTRS_LNL 2 + +#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 +/* SSID 1 is used by the VPU to represent reserved context */ +#define IVPU_RESERVED_CONTEXT_MMU_SSID 1 +#define IVPU_USER_CONTEXT_MIN_SSID 2 +#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) + +#define IVPU_MIN_DB 1 +#define IVPU_MAX_DB 255 + +#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) +#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) + +#define IVPU_NUM_PRIORITIES 4 +#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES) + +#define IVPU_CMDQ_MIN_ID 1 +#define IVPU_CMDQ_MAX_ID 255 #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 +#define IVPU_PLATFORM_HSLE 4 #define IVPU_PLATFORM_INVALID 8 +#define IVPU_SCHED_MODE_AUTO -1 + #define IVPU_DBG_REG BIT(0) #define IVPU_DBG_IRQ BIT(1) #define IVPU_DBG_MMU BIT(2) @@ -45,6 +78,8 @@ #define IVPU_DBG_JSM BIT(10) #define IVPU_DBG_KREF BIT(11) #define IVPU_DBG_RPM BIT(12) +#define IVPU_DBG_MMU_MAP BIT(13) +#define IVPU_DBG_IOCTL BIT(14) #define ivpu_err(vdev, fmt, ...) \ drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) @@ -67,9 +102,19 @@ #define IVPU_WA(wa_name) (vdev->wa.wa_name) +#define IVPU_PRINT_WA(wa_name) do { \ + if (IVPU_WA(wa_name)) \ + ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n"); \ +} while (0) + struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; + bool interrupt_clear_with_0; + bool disable_clock_relinquish; + bool disable_d0i3_msg; + bool wp0_during_power_up; + bool disable_d0i2; }; struct ivpu_hw_info; @@ -93,19 +138,40 @@ struct ivpu_device { struct ivpu_pm_info *pm; struct ivpu_mmu_context gctx; + struct ivpu_mmu_context rctx; + struct mutex context_list_lock; /* Protects user context addition/removal */ struct xarray context_xa; struct xa_limit context_xa_limit; + struct xarray db_xa; + struct xa_limit db_limit; + u32 db_next; + + struct work_struct irq_ipc_work; + struct work_struct irq_dct_work; + struct work_struct context_abort_work; + + struct mutex bo_list_lock; /* Protects bo_list */ + struct list_head bo_list; + + struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ struct xarray submitted_jobs_xa; - struct task_struct *job_done_thread; + struct ivpu_ipc_consumer job_done_consumer; + atomic_t job_timeout_counter; atomic64_t unique_id_counter; + ktime_t busy_start_ts; + ktime_t busy_time; + struct { int boot; int jsm; int tdr; - int reschedule_suspend; + int inference; + int autosuspend; + int d0i3_entry_msg; + int state_dump_msg; } timeout; }; @@ -117,32 +183,48 @@ struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; struct mutex lock; /* Protects cmdq */ - struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; + struct xarray cmdq_xa; struct ivpu_mmu_context ctx; - u32 priority; + struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ + struct list_head ms_instance_list; + struct ivpu_bo *ms_info_bo; + struct xa_limit job_limit; + u32 job_id_next; + struct xa_limit cmdq_limit; + u32 cmdq_id_next; bool has_mmu_faults; + bool bound; + bool aborted; }; extern int ivpu_dbg_mask; extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; - -#define IVPU_TEST_MODE_DISABLED 0 -#define IVPU_TEST_MODE_FW_TEST 1 -#define IVPU_TEST_MODE_NULL_HW 2 +extern int ivpu_sched_mode; +extern bool ivpu_disable_mmu_cont_pages; +extern bool ivpu_force_snoop; + +#define IVPU_TEST_MODE_FW_TEST BIT(0) +#define IVPU_TEST_MODE_NULL_HW BIT(1) +#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) +#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) +#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) +#define IVPU_TEST_MODE_MIP_DISABLE BIT(6) +#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) +#define IVPU_TEST_MODE_TURBO_ENABLE BIT(9) +#define IVPU_TEST_MODE_TURBO_DISABLE BIT(10) +#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11) +#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(12) +#define IVPU_TEST_MODE_D0I2_DISABLE BIT(13) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_boot(struct ivpu_device *vdev); int ivpu_shutdown(struct ivpu_device *vdev); - -static inline bool ivpu_is_mtl(struct ivpu_device *vdev) -{ - return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL; -} +void ivpu_prepare_for_reset(struct ivpu_device *vdev); +bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability); static inline u8 ivpu_revision(struct ivpu_device *vdev) { @@ -154,6 +236,44 @@ static inline u16 ivpu_device_id(struct ivpu_device *vdev) return to_pci_dev(vdev->drm.dev)->device; } +static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev) +{ + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: + return IVPU_HW_IP_37XX; + case PCI_DEVICE_ID_LNL: + return IVPU_HW_IP_40XX; + case PCI_DEVICE_ID_PTL_P: + case PCI_DEVICE_ID_WCL: + return IVPU_HW_IP_50XX; + case PCI_DEVICE_ID_NVL: + return IVPU_HW_IP_60XX; + default: + dump_stack(); + ivpu_err(vdev, "Unknown NPU IP generation\n"); + return 0; + } +} + +static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev) +{ + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: + return IVPU_HW_BTRS_MTL; + case PCI_DEVICE_ID_LNL: + case PCI_DEVICE_ID_PTL_P: + case PCI_DEVICE_ID_WCL: + case PCI_DEVICE_ID_NVL: + return IVPU_HW_BTRS_LNL; + default: + dump_stack(); + ivpu_err(vdev, "Unknown buttress generation\n"); + return 0; + } +} + static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev) { return container_of(dev, struct ivpu_device, drm); @@ -184,7 +304,13 @@ static inline bool ivpu_is_simics(struct ivpu_device *vdev) static inline bool ivpu_is_fpga(struct ivpu_device *vdev) { - return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; + return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA || + ivpu_get_platform(vdev) == IVPU_PLATFORM_HSLE; +} + +static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev) +{ + return ivpu_force_snoop; } #endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index b463c24adb70..48386d2cddbb 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <linux/firmware.h> @@ -11,78 +11,204 @@ #include "vpu_boot_api.h" #include "ivpu_drv.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_pm.h" -#define FW_GLOBAL_MEM_START (2ull * SZ_1G) -#define FW_GLOBAL_MEM_END (3ull * SZ_1G) -#define FW_SHARED_MEM_SIZE SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */ -#define FW_SHARED_MEM_ALIGNMENT SZ_128K /* VPU MTRR limitation */ -#define FW_RUNTIME_MAX_SIZE SZ_512M #define FW_SHAVE_NN_MAX_SIZE SZ_2M -#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) -#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) -#define FW_VERSION_HEADER_SIZE SZ_4K #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) +#define FW_PREEMPT_BUF_MIN_SIZE SZ_4K +#define FW_PREEMPT_BUF_MAX_SIZE SZ_32M #define WATCHDOG_MSS_REDIRECT 32 #define WATCHDOG_NCE_REDIRECT 33 #define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31) -#define IVPU_FW_CHECK_API(vdev, fw_hdr, name) ivpu_fw_check_api(vdev, fw_hdr, #name, \ - VPU_##name##_API_VER_INDEX, \ - VPU_##name##_API_VER_MAJOR, \ - VPU_##name##_API_VER_MINOR) +/* Check if FW API is compatible with the driver */ +#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \ + ivpu_fw_check_api(vdev, fw_hdr, #name, \ + VPU_##name##_API_VER_INDEX, \ + VPU_##name##_API_VER_MAJOR, \ + VPU_##name##_API_VER_MINOR, min_major) + +/* Check if API version is lower that the given version */ +#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \ + ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor) + +#define IVPU_FOCUS_PRESENT_TIMER_MS 1000 static char *ivpu_firmware; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); -MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); +MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/.."); +#endif + +static struct { + int gen; + const char *name; +} fw_names[] = { + { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v1.bin" }, + { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, + { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v1.bin" }, + { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v1.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" }, + { IVPU_HW_IP_60XX, "intel/vpu/vpu_60xx_v1.bin" }, +}; + +/* Production fw_names from the table above */ +MODULE_FIRMWARE("intel/vpu/vpu_37xx_v1.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_40xx_v1.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_50xx_v1.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_60xx_v1.bin"); static int ivpu_fw_request(struct ivpu_device *vdev) { - static const char * const fw_names[] = { - "mtl_vpu.bin", - "intel/vpu/mtl_vpu_v0.0.bin" - }; int ret = -ENOENT; int i; - if (ivpu_firmware) - return request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev); + if (ivpu_firmware) { + ret = request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev); + if (!ret) + vdev->fw->name = ivpu_firmware; + return ret; + } for (i = 0; i < ARRAY_SIZE(fw_names); i++) { - ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i], vdev->drm.dev); - if (!ret) + if (fw_names[i].gen != ivpu_hw_ip_gen(vdev)) + continue; + + ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i].name, vdev->drm.dev); + if (!ret) { + vdev->fw->name = fw_names[i].name; return 0; + } } ivpu_err(vdev, "Failed to request firmware: %d\n", ret); return ret; } -static void +static int ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr, - const char *str, int index, u16 expected_major, u16 expected_minor) + const char *str, int index, u16 expected_major, u16 expected_minor, + u16 min_major) { u16 major = (u16)(fw_hdr->api_version[index] >> 16); u16 minor = (u16)(fw_hdr->api_version[index]); + if (major < min_major) { + ivpu_err(vdev, "Incompatible FW %s API version: %d.%d, required %d.0 or later\n", + str, major, minor, min_major); + return -EINVAL; + } if (major != expected_major) { - ivpu_warn(vdev, "Incompatible FW %s API version: %d.%d (expected %d.%d)\n", + ivpu_warn(vdev, "Major FW %s API version different: %d.%d (expected %d.%d)\n", str, major, minor, expected_major, expected_minor); } ivpu_dbg(vdev, FW_BOOT, "FW %s API version: %d.%d (expected %d.%d)\n", str, major, minor, expected_major, expected_minor); + + return 0; +} + +static bool +ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr, + const char *str, int index, u16 major, u16 minor) +{ + u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16); + u16 fw_minor = (u16)(fw_hdr->api_version[index]); + + if (fw_major < major || (fw_major == major && fw_minor < minor)) + return true; + + return false; +} + +bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range) +{ + u64 addr_end; + + if (!range || check_add_overflow(addr, size, &addr_end)) + return false; + + if (addr < range->start || addr_end > range->end) + return false; + + return true; +} + +static u32 +ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) +{ + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX && + ivpu_sched_mode == VPU_SCHEDULING_MODE_OS) { + ivpu_warn(vdev, "OS sched mode is not supported, using HW mode\n"); + return VPU_SCHEDULING_MODE_HW; + } + + if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) + return ivpu_sched_mode; + + if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, JSM, 3, 24)) + return VPU_SCHEDULING_MODE_OS; + + return VPU_SCHEDULING_MODE_HW; +} + +static void +ivpu_preemption_config_parse(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) +{ + struct ivpu_fw_info *fw = vdev->fw; + u32 primary_preempt_buf_size, secondary_preempt_buf_size; + + if (fw_hdr->preemption_buffer_1_max_size) + primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; + else + primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + + if (fw_hdr->preemption_buffer_2_max_size) + secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; + else + secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer size, primary: %u, secondary: %u\n", + primary_preempt_buf_size, secondary_preempt_buf_size); + + if (primary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE || + secondary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE) { + ivpu_warn(vdev, "Preemption buffers size too small\n"); + return; + } + + if (primary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE || + secondary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE) { + ivpu_warn(vdev, "Preemption buffers size too big\n"); + return; + } + + if (fw->sched_mode != VPU_SCHEDULING_MODE_HW) + return; + + if (ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE) + return; + + vdev->fw->primary_preempt_buf_size = ALIGN(primary_preempt_buf_size, PAGE_SIZE); + vdev->fw->secondary_preempt_buf_size = ALIGN(secondary_preempt_buf_size, PAGE_SIZE); } static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data; - u64 runtime_addr, image_load_addr, runtime_size, image_size; + struct ivpu_addr_range fw_image_range; + u64 boot_params_addr, boot_params_size; + u64 fw_version_addr, fw_version_size; + u64 runtime_addr, runtime_size; + u64 image_load_addr, image_size; if (fw->file->size <= FW_FILE_IMAGE_OFFSET) { ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size); @@ -94,18 +220,37 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) return -EINVAL; } - runtime_addr = fw_hdr->boot_params_load_address; - runtime_size = fw_hdr->runtime_size; - image_load_addr = fw_hdr->image_load_address; - image_size = fw_hdr->image_size; + boot_params_addr = fw_hdr->boot_params_load_address; + boot_params_size = SZ_4K; + + if (!ivpu_is_within_range(boot_params_addr, boot_params_size, &vdev->hw->ranges.runtime)) { + ivpu_err(vdev, "Invalid boot params address: 0x%llx\n", boot_params_addr); + return -EINVAL; + } + + fw_version_addr = fw_hdr->firmware_version_load_address; + fw_version_size = ALIGN(fw_hdr->firmware_version_size, SZ_4K); + + if (fw_version_size != SZ_4K) { + ivpu_err(vdev, "Invalid firmware version size: %u\n", + fw_hdr->firmware_version_size); + return -EINVAL; + } - if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) { - ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr); + if (!ivpu_is_within_range(fw_version_addr, fw_version_size, &vdev->hw->ranges.runtime)) { + ivpu_err(vdev, "Invalid firmware version address: 0x%llx\n", fw_version_addr); return -EINVAL; } - if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) { - ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size); + runtime_addr = fw_hdr->image_load_address; + runtime_size = fw_hdr->runtime_size - boot_params_size - fw_version_size; + + image_load_addr = fw_hdr->image_load_address; + image_size = fw_hdr->image_size; + + if (!ivpu_is_within_range(runtime_addr, runtime_size, &vdev->hw->ranges.runtime)) { + ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx and size %llu\n", + runtime_addr, runtime_size); return -EINVAL; } @@ -114,24 +259,42 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) return -EINVAL; } - if (image_load_addr < runtime_addr || - image_load_addr + image_size > runtime_addr + runtime_size) { - ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n", + if (!ivpu_is_within_range(image_load_addr, image_size, &vdev->hw->ranges.runtime)) { + ivpu_err(vdev, "Invalid firmware load address: 0x%llx and size %llu\n", image_load_addr, image_size); return -EINVAL; } + if (ivpu_hw_range_init(vdev, &fw_image_range, image_load_addr, image_size)) + return -EINVAL; + + if (!ivpu_is_within_range(fw_hdr->entry_point, SZ_4K, &fw_image_range)) { + ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point); + return -EINVAL; + } + if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) { ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size); return -EINVAL; } - if (fw_hdr->entry_point < image_load_addr || - fw_hdr->entry_point >= image_load_addr + image_size) { - ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point); + ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", + fw_hdr->header_version, fw_hdr->image_format); + + if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) + ivpu_warn(vdev, "Missing firmware version\n"); + + ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); + + if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) + return -EINVAL; + if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3)) return -EINVAL; - } + fw->boot_params_addr = boot_params_addr; + fw->boot_params_size = boot_params_size; + fw->fw_version_addr = fw_version_addr; + fw->fw_version_size = fw_version_size; fw->runtime_addr = runtime_addr; fw->runtime_size = runtime_size; fw->image_load_offset = image_load_addr - runtime_addr; @@ -141,16 +304,42 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->cold_boot_entry_point = fw_hdr->entry_point; fw->entry_point = fw->cold_boot_entry_point; - ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", - fw_hdr->header_version, fw_hdr->image_format); - ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", - fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); - ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", - fw->runtime_addr, image_load_addr, fw->entry_point); - ivpu_dbg(vdev, FW_BOOT, "FW version: %s\n", (char *)fw_hdr + VPU_FW_HEADER_SIZE); + fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); + fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; + fw->trace_hw_component_mask = -1; + + fw->dvfs_mode = 0; - IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT); - IVPU_FW_CHECK_API(vdev, fw_hdr, JSM); + fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); + ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + + ivpu_preemption_config_parse(vdev, fw_hdr); + ivpu_dbg(vdev, FW_BOOT, "Mid-inference preemption %s supported\n", + ivpu_fw_preempt_buf_size(vdev) ? "is" : "is not"); + + if (fw_hdr->ro_section_start_address && + !ivpu_is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, + &fw_image_range)) { + ivpu_err(vdev, "Invalid read-only section: start address 0x%llx, size %u\n", + fw_hdr->ro_section_start_address, fw_hdr->ro_section_size); + return -EINVAL; + } + + fw->read_only_addr = fw_hdr->ro_section_start_address; + fw->read_only_size = fw_hdr->ro_section_size; + + ivpu_dbg(vdev, FW_BOOT, "Boot params: address 0x%llx, size %llu\n", + fw->boot_params_addr, fw->boot_params_size); + ivpu_dbg(vdev, FW_BOOT, "FW version: address 0x%llx, size %llu\n", + fw->fw_version_addr, fw->fw_version_size); + ivpu_dbg(vdev, FW_BOOT, "Runtime: address 0x%llx, size %u\n", + fw->runtime_addr, fw->runtime_size); + ivpu_dbg(vdev, FW_BOOT, "Image load offset: 0x%llx, size %u\n", + fw->image_load_offset, fw->image_size); + ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n", + fw->read_only_addr, fw->read_only_size); + ivpu_dbg(vdev, FW_BOOT, "FW entry point: 0x%llx\n", fw->entry_point); + ivpu_dbg(vdev, FW_BOOT, "SHAVE NN size: %u\n", fw->shave_nn_size); return 0; } @@ -160,47 +349,103 @@ static void ivpu_fw_release(struct ivpu_device *vdev) release_firmware(vdev->fw->file); } -static int ivpu_fw_update_global_range(struct ivpu_device *vdev) +/* Initialize workarounds that depend on FW version */ +static void +ivpu_fw_init_wa(struct ivpu_device *vdev) { - struct ivpu_fw_info *fw = vdev->fw; - u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT); - u64 size = FW_SHARED_MEM_SIZE; + const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data; - if (start + size > FW_GLOBAL_MEM_END) { - ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size); - return -EINVAL; - } + if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) || + (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE)) + vdev->wa.disable_d0i3_msg = true; - ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size); - return 0; + /* Force enable the feature for testing purposes */ + if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE) + vdev->wa.disable_d0i3_msg = false; + + IVPU_PRINT_WA(disable_d0i3_msg); } static int ivpu_fw_mem_init(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; + int log_verb_size; int ret; - ret = ivpu_fw_update_global_range(vdev); - if (ret) - return ret; + fw->mem_bp = ivpu_bo_create_runtime(vdev, fw->boot_params_addr, fw->boot_params_size, + DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!fw->mem_bp) { + ivpu_err(vdev, "Failed to create firmware boot params memory buffer\n"); + return -ENOMEM; + } + + fw->mem_fw_ver = ivpu_bo_create_runtime(vdev, fw->fw_version_addr, fw->fw_version_size, + DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!fw->mem_fw_ver) { + ivpu_err(vdev, "Failed to create firmware version memory buffer\n"); + ret = -ENOMEM; + goto err_free_bp; + } - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + fw->mem = ivpu_bo_create_runtime(vdev, fw->runtime_addr, fw->runtime_size, + DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!fw->mem) { - ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); - return -ENOMEM; + ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n"); + ret = -ENOMEM; + goto err_free_fw_ver; + } + + ret = ivpu_mmu_context_set_pages_ro(vdev, &vdev->gctx, fw->read_only_addr, + fw->read_only_size); + if (ret) { + ivpu_err(vdev, "Failed to set firmware image read-only\n"); + goto err_free_fw_mem; + } + + fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!fw->mem_log_crit) { + ivpu_err(vdev, "Failed to create critical log buffer\n"); + ret = -ENOMEM; + goto err_free_fw_mem; + } + + if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO) + log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE; + else + log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE; + + fw->mem_log_verb = ivpu_bo_create_global(vdev, log_verb_size, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!fw->mem_log_verb) { + ivpu_err(vdev, "Failed to create verbose log buffer\n"); + ret = -ENOMEM; + goto err_free_log_crit; } if (fw->shave_nn_size) { - fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start, - fw->shave_nn_size, DRM_IVPU_BO_UNCACHED); + fw->mem_shave_nn = ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.shave, + fw->shave_nn_size, DRM_IVPU_BO_WC); if (!fw->mem_shave_nn) { - ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); - ivpu_bo_free_internal(fw->mem); - return -ENOMEM; + ivpu_err(vdev, "Failed to create shavenn buffer\n"); + ret = -ENOMEM; + goto err_free_log_verb; } } return 0; + +err_free_log_verb: + ivpu_bo_free(fw->mem_log_verb); +err_free_log_crit: + ivpu_bo_free(fw->mem_log_crit); +err_free_fw_mem: + ivpu_bo_free(fw->mem); +err_free_fw_ver: + ivpu_bo_free(fw->mem_fw_ver); +err_free_bp: + ivpu_bo_free(fw->mem_bp); + return ret; } static void ivpu_fw_mem_fini(struct ivpu_device *vdev) @@ -208,12 +453,21 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev) struct ivpu_fw_info *fw = vdev->fw; if (fw->mem_shave_nn) { - ivpu_bo_free_internal(fw->mem_shave_nn); + ivpu_bo_free(fw->mem_shave_nn); fw->mem_shave_nn = NULL; } - ivpu_bo_free_internal(fw->mem); + ivpu_bo_free(fw->mem_log_verb); + ivpu_bo_free(fw->mem_log_crit); + ivpu_bo_free(fw->mem); + ivpu_bo_free(fw->mem_fw_ver); + ivpu_bo_free(fw->mem_bp); + + fw->mem_log_verb = NULL; + fw->mem_log_crit = NULL; fw->mem = NULL; + fw->mem_fw_ver = NULL; + fw->mem_bp = NULL; } int ivpu_fw_init(struct ivpu_device *vdev) @@ -228,10 +482,14 @@ int ivpu_fw_init(struct ivpu_device *vdev) if (ret) goto err_fw_release; + ivpu_fw_init_wa(vdev); + ret = ivpu_fw_mem_init(vdev); if (ret) goto err_fw_release; + ivpu_fw_load(vdev); + return 0; err_fw_release: @@ -245,25 +503,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev) ivpu_fw_release(vdev); } -int ivpu_fw_load(struct ivpu_device *vdev) +void ivpu_fw_load(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; u64 image_end_offset = fw->image_load_offset + fw->image_size; - memset(fw->mem->kvaddr, 0, fw->image_load_offset); - memcpy(fw->mem->kvaddr + fw->image_load_offset, + memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset); + memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset, fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size); if (IVPU_WA(clear_runtime_mem)) { - u8 *start = fw->mem->kvaddr + image_end_offset; - u64 size = fw->mem->base.size - image_end_offset; + u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset; + u64 size = ivpu_bo_size(fw->mem) - image_end_offset; memset(start, 0, size); } wmb(); /* Flush WC buffers after writing fw->mem */ - - return 0; } static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -304,11 +560,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n", boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg); - ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n", - boot_params->global_memory_allocator_base); - ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n", - boot_params->global_memory_allocator_size); - ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n", boot_params->shave_nn_fw_base); @@ -316,10 +567,6 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->watchdog_irq_mss); ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n", boot_params->watchdog_irq_nce); - ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n", - boot_params->host_to_vpu_irq); - ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n", - boot_params->job_done_irq); ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n", boot_params->host_version_id); @@ -353,43 +600,79 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->punit_telemetry_sram_size); ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", boot_params->vpu_telemetry_enable); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_scheduling_mode = 0x%x\n", + boot_params->vpu_scheduling_mode); + ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n", + boot_params->dvfs_mode); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n", + boot_params->d0i3_delayed_entry); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", + boot_params->d0i3_residency_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", + boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", + boot_params->power_profile); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_uses_ecc_mca_signal = 0x%x\n", + boot_params->vpu_uses_ecc_mca_signal); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) { struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx; - /* In case of warm boot we only have to reset the entrypoint addr */ + /* In case of warm boot only update variable params */ if (!ivpu_fw_is_cold_boot(vdev)) { + boot_params->d0i3_residency_time_us = + ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts); + boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts; + boot_params->system_time_us = ktime_to_us(ktime_get_real()); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", + boot_params->d0i3_residency_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", + boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); + boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; + wmb(); /* Flush WC buffers after writing save_restore_ret_address */ return; } + memset(boot_params, 0, sizeof(*boot_params)); vdev->pm->is_warmboot = false; boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); + + /* + * This param is a debug firmware feature. It switches default clock + * to higher resolution one for fine-grained and more accurate firmware + * task profiling. + */ + boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev); /* * Uncached region of VPU address space, covers IPC buffers, job queues * and log buffers, programmable to L2$ Uncached by VPU MTRR */ - boot_params->shared_region_base = vdev->hw->ranges.global_low.start; - boot_params->shared_region_size = vdev->hw->ranges.global_low.end - - vdev->hw->ranges.global_low.start; + boot_params->shared_region_base = vdev->hw->ranges.global.start; + boot_params->shared_region_size = vdev->hw->ranges.global.end - + vdev->hw->ranges.global.start; boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr; - boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2; + boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2; - boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2; + boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2; + boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->global_aliased_pio_base = - vdev->hw->ranges.global_aliased_pio.start; - boot_params->global_aliased_pio_size = - ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio); + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; + boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + } /* Allow configuration for L2C_PAGE_TABLE with boot param value */ boot_params->autoconfig = 1; @@ -397,7 +680,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params /* Enable L2 cache for first 2GB of high memory */ boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1; boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = - ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start); + ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.shave.start); if (vdev->fw->mem_shave_nn) boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr; @@ -413,10 +696,32 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->pn_freq_pll_ratio = vdev->hw->pll.pn_ratio; boot_params->max_freq_pll_ratio = vdev->hw->pll.max_ratio; - boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); - boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); - boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); - + boot_params->default_trace_level = vdev->fw->trace_level; + boot_params->tracing_buff_message_format_mask = BIT(VPU_TRACING_FORMAT_STRING); + boot_params->trace_destination_mask = vdev->fw->trace_destination_mask; + boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask; + boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr; + boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit); + boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr; + boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb); + + boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev); + boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev); + boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev); + boot_params->vpu_scheduling_mode = vdev->fw->sched_mode; + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS; + boot_params->dvfs_mode = vdev->fw->dvfs_mode; + if (!IVPU_WA(disable_d0i3_msg)) + boot_params->d0i3_delayed_entry = 1; + boot_params->d0i3_residency_time_us = 0; + boot_params->d0i3_entry_vpu_ts = 0; + if (IVPU_WA(disable_d0i2)) + boot_params->power_profile |= BIT(1); + boot_params->vpu_uses_ecc_mca_signal = + ivpu_hw_uses_ecc_mca_signal(vdev) ? VPU_BOOT_MCA_ECC_BOTH : 0; + + boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ ivpu_fw_boot_params_print(vdev, boot_params); diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 8d275c802d1c..00945892b55e 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -1,21 +1,34 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ +#include "vpu_jsm_api.h" + +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_VERSION_STR_SIZE SZ_256 + struct ivpu_device; struct ivpu_bo; struct vpu_boot_params; struct ivpu_fw_info { const struct firmware *file; + const char *name; + char version[FW_VERSION_STR_SIZE]; + struct ivpu_bo *mem_bp; + struct ivpu_bo *mem_fw_ver; struct ivpu_bo *mem; struct ivpu_bo *mem_shave_nn; struct ivpu_bo *mem_log_crit; struct ivpu_bo *mem_log_verb; + u64 boot_params_addr; + u64 boot_params_size; + u64 fw_version_addr; + u64 fw_version_size; u64 runtime_addr; u32 runtime_size; u64 image_load_offset; @@ -23,16 +36,32 @@ struct ivpu_fw_info { u32 shave_nn_size; u64 entry_point; /* Cold or warm boot entry point for next boot */ u64 cold_boot_entry_point; + u32 trace_level; + u32 trace_destination_mask; + u64 trace_hw_component_mask; + u32 dvfs_mode; + u32 primary_preempt_buf_size; + u32 secondary_preempt_buf_size; + u64 read_only_addr; + u32 read_only_size; + u32 sched_mode; + u64 last_heartbeat; }; +bool ivpu_is_within_range(u64 addr, size_t size, struct ivpu_addr_range *range); int ivpu_fw_init(struct ivpu_device *vdev); void ivpu_fw_fini(struct ivpu_device *vdev); -int ivpu_fw_load(struct ivpu_device *vdev); -void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp); +void ivpu_fw_load(struct ivpu_device *vdev); +void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params); static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev) { return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point; } +static inline u32 ivpu_fw_preempt_buf_size(struct ivpu_device *vdev) +{ + return vdev->fw->primary_preempt_buf_size + vdev->fw->secondary_preempt_buf_size; +} + #endif /* __IVPU_FW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c new file mode 100644 index 000000000000..337c906b0210 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/ctype.h> +#include <linux/highmem.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/moduleparam.h> + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_fw_log.h" +#include "ivpu_gem.h" + +#define IVPU_FW_LOG_LINE_LENGTH 256 + +unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR; +module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444); +MODULE_PARM_DESC(fw_log_level, + "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG) + " info=" __stringify(IVPU_FW_LOG_INFO) + " warn=" __stringify(IVPU_FW_LOG_WARN) + " error=" __stringify(IVPU_FW_LOG_ERROR) + " fatal=" __stringify(IVPU_FW_LOG_FATAL)); + +static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, + struct vpu_tracing_buffer_header **out_log) +{ + struct vpu_tracing_buffer_header *log; + + if ((*offset + sizeof(*log)) > ivpu_bo_size(bo)) + return -EINVAL; + + log = ivpu_bo_vaddr(bo) + *offset; + + if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY) + return -EINVAL; + + if (log->header_size < sizeof(*log) || log->header_size > 1024) { + ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size); + return -EINVAL; + } + if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) { + ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size); + return -EINVAL; + } + + *out_log = log; + *offset += log->size; + + ivpu_dbg(vdev, FW_BOOT, + "FW log name \"%s\", write offset 0x%x size 0x%x, wrap count %d, hdr version %d size %d format %d, alignment %d", + log->name, log->write_index, log->size, log->wrap_count, log->header_version, + log->header_size, log->format, log->alignment); + + return 0; +} + +static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) +{ + char line[IVPU_FW_LOG_LINE_LENGTH]; + u32 index = 0; + + if (!size || !buffer) + return; + + while (size--) { + if (*buffer == '\n' || *buffer == 0) { + line[index] = 0; + if (index != 0) + drm_printf(p, "%s\n", line); + index = 0; + buffer++; + continue; + } + if (index == IVPU_FW_LOG_LINE_LENGTH - 1) { + line[index] = 0; + index = 0; + drm_printf(p, "%s\n", line); + } + if (*buffer != '\r' && (isprint(*buffer) || iscntrl(*buffer))) + line[index++] = *buffer; + buffer++; + } + line[index] = 0; + if (index != 0) + drm_printf(p, "%s", line); +} + +static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix, + bool only_new_msgs, struct drm_printer *p) +{ + char *log_data = (void *)log + log->header_size; + u32 data_size = log->size - log->header_size; + u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0; + u32 log_end = READ_ONCE(log->write_index); + + if (log->wrap_count == log->read_wrap_count) { + if (log_end <= log_start) { + drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); + return; + } + } else if (log->wrap_count == log->read_wrap_count + 1) { + if (log_end > log_start) + log_start = log_end; + } else { + log_start = log_end; + } + + drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name); + if (log_end > log_start) { + fw_log_print_lines(log_data + log_start, log_end - log_start, p); + } else { + fw_log_print_lines(log_data + log_start, data_size - log_start, p); + fw_log_print_lines(log_data, log_end, p); + } + drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */ + drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name); +} + +static void +fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name, + struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p) +{ + struct vpu_tracing_buffer_header *log; + u32 next = 0; + + while (fw_log_from_bo(vdev, bo, &next, &log) == 0) + fw_log_print_buffer(log, name, only_new_msgs, p); +} + +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +{ + fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p); + fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p); +} + +void ivpu_fw_log_mark_read(struct ivpu_device *vdev) +{ + struct vpu_tracing_buffer_header *log; + u32 next; + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } +} + +void ivpu_fw_log_reset(struct ivpu_device *vdev) +{ + struct vpu_tracing_buffer_header *log; + u32 next; + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } +} diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h new file mode 100644 index 000000000000..8bb528a73cb7 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw_log.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_FW_LOG_H__ +#define __IVPU_FW_LOG_H__ + +#include <linux/types.h> + +#include "ivpu_drv.h" + +#define IVPU_FW_LOG_DEFAULT 0 +#define IVPU_FW_LOG_DEBUG 1 +#define IVPU_FW_LOG_INFO 2 +#define IVPU_FW_LOG_WARN 3 +#define IVPU_FW_LOG_ERROR 4 +#define IVPU_FW_LOG_FATAL 5 + +#define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M +#define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M +#define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K + +extern unsigned int ivpu_fw_log_level; + +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); +void ivpu_fw_log_mark_read(struct ivpu_device *vdev); +void ivpu_fw_log_reset(struct ivpu_device *vdev); + + +#endif /* __IVPU_FW_LOG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index d1f923971b4c..ece68f570b7e 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -15,253 +15,88 @@ #include <drm/drm_utils.h> #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); static const struct drm_gem_object_funcs ivpu_gem_funcs; -static struct lock_class_key prime_bo_lock_class_key; - -static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo) +static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action) { - /* Pages are managed by the underlying dma-buf */ - return 0; + ivpu_dbg(vdev, BO, + "%6s: bo %8p size %9zu ctx %d vpu_addr %9llx pages %d sgt %d mmu_mapped %d wc %d imported %d\n", + action, bo, ivpu_bo_size(bo), bo->ctx_id, bo->vpu_addr, + (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, + (bool)drm_gem_is_imported(&bo->base.base)); } -static void prime_free_pages_locked(struct ivpu_bo *bo) +static inline int ivpu_bo_lock(struct ivpu_bo *bo) { - /* Pages are managed by the underlying dma-buf */ + return dma_resv_lock(bo->base.base.resv, NULL); } -static int prime_map_pages_locked(struct ivpu_bo *bo) +static inline void ivpu_bo_unlock(struct ivpu_bo *bo) { - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - struct sg_table *sgt; - - WARN_ON(!bo->base.import_attach); - - sgt = dma_buf_map_attachment(bo->base.import_attach, DMA_BIDIRECTIONAL); - if (IS_ERR(sgt)) { - ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt)); - return PTR_ERR(sgt); - } - - bo->sgt = sgt; - return 0; + dma_resv_unlock(bo->base.base.resv); } -static void prime_unmap_pages_locked(struct ivpu_bo *bo) +static struct sg_table *ivpu_bo_map_attachment(struct ivpu_device *vdev, struct ivpu_bo *bo) { - WARN_ON(!bo->base.import_attach); - - dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL); - bo->sgt = NULL; -} - -static const struct ivpu_bo_ops prime_ops = { - .type = IVPU_BO_TYPE_PRIME, - .name = "prime", - .alloc_pages = prime_alloc_pages_locked, - .free_pages = prime_free_pages_locked, - .map_pages = prime_map_pages_locked, - .unmap_pages = prime_unmap_pages_locked, -}; - -static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) -{ - int npages = bo->base.size >> PAGE_SHIFT; - struct page **pages; - - pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - if (bo->flags & DRM_IVPU_BO_WC) - set_pages_array_wc(pages, npages); - else if (bo->flags & DRM_IVPU_BO_UNCACHED) - set_pages_array_uc(pages, npages); - - bo->pages = pages; - return 0; -} - -static void shmem_free_pages_locked(struct ivpu_bo *bo) -{ - if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); - - drm_gem_put_pages(&bo->base, bo->pages, true, false); - bo->pages = NULL; -} - -static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo) -{ - int npages = bo->base.size >> PAGE_SHIFT; - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); struct sg_table *sgt; - int ret; - - sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages); - if (IS_ERR(sgt)) { - ivpu_err(vdev, "Failed to allocate sgtable\n"); - return PTR_ERR(sgt); - } - ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0); - if (ret) { - ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); - goto err_free_sgt; - } - - bo->sgt = sgt; - return 0; + drm_WARN_ON(&vdev->drm, !bo->base.base.import_attach); -err_free_sgt: - kfree(sgt); - return ret; -} - -static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo) -{ - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0); - sg_free_table(bo->sgt); - kfree(bo->sgt); - bo->sgt = NULL; -} - -static const struct ivpu_bo_ops shmem_ops = { - .type = IVPU_BO_TYPE_SHMEM, - .name = "shmem", - .alloc_pages = shmem_alloc_pages_locked, - .free_pages = shmem_free_pages_locked, - .map_pages = ivpu_bo_map_pages_locked, - .unmap_pages = ivpu_bo_unmap_pages_locked, -}; - -static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo) -{ - unsigned int i, npages = bo->base.size >> PAGE_SHIFT; - struct page **pages; - int ret; - - pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - for (i = 0; i < npages; i++) { - pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); - if (!pages[i]) { - ret = -ENOMEM; - goto err_free_pages; - } - cond_resched(); - } - - bo->pages = pages; - return 0; - -err_free_pages: - while (i--) - put_page(pages[i]); - kvfree(pages); - return ret; -} - -static void internal_free_pages_locked(struct ivpu_bo *bo) -{ - unsigned int i, npages = bo->base.size >> PAGE_SHIFT; - - for (i = 0; i < npages; i++) - put_page(bo->pages[i]); - - kvfree(bo->pages); - bo->pages = NULL; -} + ivpu_bo_lock(bo); -static const struct ivpu_bo_ops internal_ops = { - .type = IVPU_BO_TYPE_INTERNAL, - .name = "internal", - .alloc_pages = internal_alloc_pages_locked, - .free_pages = internal_free_pages_locked, - .map_pages = ivpu_bo_map_pages_locked, - .unmap_pages = ivpu_bo_unmap_pages_locked, -}; - -static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo) -{ - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - int ret; - - lockdep_assert_held(&bo->lock); - drm_WARN_ON(&vdev->drm, bo->sgt); - - ret = bo->ops->alloc_pages(bo); - if (ret) { - ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret); - return ret; + sgt = bo->base.sgt; + if (!sgt) { + sgt = dma_buf_map_attachment(bo->base.base.import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + ivpu_err(vdev, "Failed to map BO in IOMMU: %ld\n", PTR_ERR(sgt)); + else + bo->base.sgt = sgt; } - ret = bo->ops->map_pages(bo); - if (ret) { - ivpu_err(vdev, "Failed to map pages for BO: %d", ret); - goto err_free_pages; - } - return ret; + ivpu_bo_unlock(bo); -err_free_pages: - bo->ops->free_pages(bo); - return ret; -} - -static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo) -{ - mutex_lock(&bo->lock); - - WARN_ON(!bo->sgt); - bo->ops->unmap_pages(bo); - WARN_ON(bo->sgt); - bo->ops->free_pages(bo); - WARN_ON(bo->pages); - - mutex_unlock(&bo->lock); + return sgt; } /* - * ivpu_bo_pin() - pin the backing physical pages and map them to VPU. + * ivpu_bo_bind() - pin the backing physical pages and map them to VPU. * * This function pins physical memory pages, then maps the physical pages * to IOMMU address space and finally updates the VPU MMU page tables * to allow the VPU to translate VPU address to IOMMU address. */ -int __must_check ivpu_bo_pin(struct ivpu_bo *bo) +int __must_check ivpu_bo_bind(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; int ret = 0; - mutex_lock(&bo->lock); + ivpu_dbg_bo(vdev, bo, "bind"); - if (!bo->vpu_addr) { - ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n", - bo->ctx->id, bo->handle); - ret = -EINVAL; - goto unlock; + if (bo->base.base.import_attach) + sgt = ivpu_bo_map_attachment(vdev, bo); + else + sgt = drm_gem_shmem_get_pages_sgt(&bo->base); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); + return ret; } - if (!bo->sgt) { - ret = ivpu_bo_alloc_and_map_pages_locked(bo); - if (ret) - goto unlock; - } + ivpu_bo_lock(bo); if (!bo->mmu_mapped) { - ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt, - ivpu_bo_is_snooped(bo)); + drm_WARN_ON(&vdev->drm, !bo->ctx); + ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, + ivpu_bo_is_snooped(bo), ivpu_bo_is_read_only(bo)); if (ret) { ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); goto unlock; @@ -270,7 +105,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo) } unlock: - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); return ret; } @@ -280,248 +115,250 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - int ret; + int idx, ret; - if (!range) { - if (bo->flags & DRM_IVPU_BO_HIGH_MEM) - range = &vdev->hw->ranges.user_high; - else - range = &vdev->hw->ranges.user_low; - } + if (!drm_dev_enter(&vdev->drm, &idx)) + return -ENODEV; + + ivpu_bo_lock(bo); - mutex_lock(&ctx->lock); - ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node); + ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node); if (!ret) { bo->ctx = ctx; + bo->ctx_id = ctx->id; bo->vpu_addr = bo->mm_node.start; - list_add_tail(&bo->ctx_node, &ctx->bo_list); + ivpu_dbg_bo(vdev, bo, "vaddr"); } - mutex_unlock(&ctx->lock); + + ivpu_bo_unlock(bo); + + drm_dev_exit(idx); return ret; } -static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo) +static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - struct ivpu_mmu_context *ctx = bo->ctx; - ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", - ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); - - mutex_lock(&bo->lock); + dma_resv_assert_held(bo->base.base.resv); if (bo->mmu_mapped) { - drm_WARN_ON(&vdev->drm, !bo->sgt); - ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt); + drm_WARN_ON(&vdev->drm, !bo->ctx); + drm_WARN_ON(&vdev->drm, !bo->vpu_addr); + drm_WARN_ON(&vdev->drm, !bo->base.sgt); + ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt); bo->mmu_mapped = false; } - mutex_lock(&ctx->lock); - list_del(&bo->ctx_node); - bo->vpu_addr = 0; - bo->ctx = NULL; - ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node); - mutex_unlock(&ctx->lock); + if (bo->ctx) { + ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node); + bo->ctx = NULL; + } - mutex_unlock(&bo->lock); + if (bo->base.sgt) { + if (bo->base.base.import_attach) { + dma_buf_unmap_attachment(bo->base.base.import_attach, + bo->base.sgt, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(bo->base.sgt); + kfree(bo->base.sgt); + } + bo->base.sgt = NULL; + } } -void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx) +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - struct ivpu_bo *bo, *tmp; + struct ivpu_bo *bo; - list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node) - ivpu_bo_free_vpu_addr(bo); + if (drm_WARN_ON(&vdev->drm, !ctx)) + return; + + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) { + ivpu_bo_lock(bo); + if (bo->ctx == ctx) { + ivpu_dbg_bo(vdev, bo, "unbind"); + ivpu_bo_unbind_locked(bo); + } + ivpu_bo_unlock(bo); + } + mutex_unlock(&vdev->bo_list_lock); } -static struct ivpu_bo * -ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context, - u64 size, u32 flags, const struct ivpu_bo_ops *ops, - const struct ivpu_addr_range *range, u64 user_ptr) +struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size) { struct ivpu_bo *bo; - int ret = 0; - - if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size))) - return ERR_PTR(-EINVAL); - switch (flags & DRM_IVPU_BO_CACHE_MASK) { - case DRM_IVPU_BO_CACHED: - case DRM_IVPU_BO_UNCACHED: - case DRM_IVPU_BO_WC: - break; - default: + if (size == 0 || !PAGE_ALIGNED(size)) return ERR_PTR(-EINVAL); - } bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) return ERR_PTR(-ENOMEM); - mutex_init(&bo->lock); - bo->base.funcs = &ivpu_gem_funcs; - bo->flags = flags; - bo->ops = ops; - bo->user_ptr = user_ptr; + bo->base.base.funcs = &ivpu_gem_funcs; + bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */ - if (ops->type == IVPU_BO_TYPE_SHMEM) - ret = drm_gem_object_init(&vdev->drm, &bo->base, size); - else - drm_gem_private_object_init(&vdev->drm, &bo->base, size); + INIT_LIST_HEAD(&bo->bo_list_node); - if (ret) { - ivpu_err(vdev, "Failed to initialize drm object\n"); - goto err_free; - } + return &bo->base.base; +} - if (flags & DRM_IVPU_BO_MAPPABLE) { - ret = drm_gem_create_mmap_offset(&bo->base); - if (ret) { - ivpu_err(vdev, "Failed to allocate mmap offset\n"); - goto err_release; - } - } +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct device *attach_dev = dev->dev; + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct ivpu_bo *bo; + int ret; - if (mmu_context) { - ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range); - if (ret) { - ivpu_err(vdev, "Failed to add BO to context: %d\n", ret); - goto err_release; - } + attach = dma_buf_attach(dma_buf, attach_dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(dma_buf); + + obj = drm_gem_shmem_prime_import_sg_table(dev, attach, NULL); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto fail_detach; } - return bo; + obj->import_attach = attach; + obj->resv = dma_buf->resv; + + bo = to_ivpu_bo(obj); + + mutex_lock(&vdev->bo_list_lock); + list_add_tail(&bo->bo_list_node, &vdev->bo_list); + mutex_unlock(&vdev->bo_list_lock); + + ivpu_dbg(vdev, BO, "import: bo %8p size %9zu\n", bo, ivpu_bo_size(bo)); + + return obj; + +fail_detach: + dma_buf_detach(dma_buf, attach); + dma_buf_put(dma_buf); -err_release: - drm_gem_object_release(&bo->base); -err_free: - kfree(bo); return ERR_PTR(ret); } -static void ivpu_bo_free(struct drm_gem_object *obj) +static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags) { - struct ivpu_bo *bo = to_ivpu_bo(obj); - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - if (bo->ctx) - ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", - bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); - else - ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n", - (bool)bo->sgt, bo->mmu_mapped); - - drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + struct drm_gem_shmem_object *shmem; + struct ivpu_bo *bo; - vunmap(bo->kvaddr); + switch (flags & DRM_IVPU_BO_CACHE_MASK) { + case DRM_IVPU_BO_CACHED: + case DRM_IVPU_BO_WC: + break; + default: + return ERR_PTR(-EINVAL); + } - if (bo->ctx) - ivpu_bo_free_vpu_addr(bo); + shmem = drm_gem_shmem_create(&vdev->drm, size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); - if (bo->sgt) - ivpu_bo_unmap_and_free_pages(bo); + bo = to_ivpu_bo(&shmem->base); + bo->base.map_wc = flags & DRM_IVPU_BO_WC; + bo->flags = flags; - if (bo->base.import_attach) - drm_prime_gem_destroy(&bo->base, bo->sgt); + mutex_lock(&vdev->bo_list_lock); + list_add_tail(&bo->bo_list_node, &vdev->bo_list); + mutex_unlock(&vdev->bo_list_lock); - drm_gem_object_release(&bo->base); + ivpu_dbg(vdev, BO, " alloc: bo %8p size %9llu\n", bo, size); - mutex_destroy(&bo->lock); - kfree(bo); + return bo; } -static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file) { + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_bo *bo = to_ivpu_bo(obj); - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name); + struct ivpu_addr_range *range; - if (obj->import_attach) { - /* Drop the reference drm_gem_mmap_obj() acquired.*/ - drm_gem_object_put(obj); - vma->vm_private_data = NULL; - return dma_buf_mmap(obj->dma_buf, vma, 0); + if (bo->ctx) { + ivpu_dbg(vdev, IOCTL, "Can't add BO %pe to ctx %u: already in ctx %u\n", + bo, file_priv->ctx.id, bo->ctx->id); + return -EALREADY; } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND; - vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags)); + if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) + range = &vdev->hw->ranges.shave; + else if (bo->flags & DRM_IVPU_BO_DMA_MEM) + range = &vdev->hw->ranges.dma; + else + range = &vdev->hw->ranges.user; - return 0; + return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range); } -static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj) +static void ivpu_gem_bo_free(struct drm_gem_object *obj) { + struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); - loff_t npages = obj->size >> PAGE_SHIFT; - int ret = 0; - mutex_lock(&bo->lock); + ivpu_dbg_bo(vdev, bo, "free"); - if (!bo->sgt) - ret = ivpu_bo_alloc_and_map_pages_locked(bo); + drm_WARN_ON(&vdev->drm, list_empty(&bo->bo_list_node)); - mutex_unlock(&bo->lock); + mutex_lock(&vdev->bo_list_lock); + list_del(&bo->bo_list_node); - if (ret) - return ERR_PTR(ret); + drm_WARN_ON(&vdev->drm, !drm_gem_is_imported(&bo->base.base) && + !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + drm_WARN_ON(&vdev->drm, ivpu_bo_size(bo) == 0); + drm_WARN_ON(&vdev->drm, bo->base.vaddr); - return drm_prime_pages_to_sg(obj->dev, bo->pages, npages); + ivpu_bo_lock(bo); + ivpu_bo_unbind_locked(bo); + ivpu_bo_unlock(bo); + + mutex_unlock(&vdev->bo_list_lock); + + drm_WARN_ON(&vdev->drm, bo->mmu_mapped); + drm_WARN_ON(&vdev->drm, bo->ctx); + + drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1); + drm_WARN_ON(obj->dev, bo->base.base.vma_node.vm_files.rb_node); + drm_gem_shmem_free(&bo->base); } -static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf) +static enum drm_gem_object_status ivpu_gem_status(struct drm_gem_object *obj) { - struct vm_area_struct *vma = vmf->vma; - struct drm_gem_object *obj = vma->vm_private_data; struct ivpu_bo *bo = to_ivpu_bo(obj); - loff_t npages = obj->size >> PAGE_SHIFT; - pgoff_t page_offset; - struct page *page; - vm_fault_t ret; - int err; - - mutex_lock(&bo->lock); - - if (!bo->sgt) { - err = ivpu_bo_alloc_and_map_pages_locked(bo); - if (err) { - ret = vmf_error(err); - goto unlock; - } - } - - /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - if (page_offset >= npages) { - ret = VM_FAULT_SIGBUS; - } else { - page = bo->pages[page_offset]; - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); - } + enum drm_gem_object_status status = 0; -unlock: - mutex_unlock(&bo->lock); + if (ivpu_bo_is_resident(bo)) + status |= DRM_GEM_OBJECT_RESIDENT; - return ret; + return status; } -static const struct vm_operations_struct ivpu_vm_ops = { - .fault = ivpu_vm_fault, - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - static const struct drm_gem_object_funcs ivpu_gem_funcs = { - .free = ivpu_bo_free, - .mmap = ivpu_bo_mmap, - .vm_ops = &ivpu_vm_ops, - .get_sg_table = ivpu_bo_get_sg_table, + .free = ivpu_gem_bo_free, + .open = ivpu_gem_bo_open, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = drm_gem_shmem_object_mmap, + .status = ivpu_gem_status, + .vm_ops = &drm_gem_shmem_vm_ops, }; -int -ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; @@ -530,123 +367,123 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct ivpu_bo *bo; int ret; - if (args->flags & ~DRM_IVPU_BO_FLAGS) + if (args->flags & ~DRM_IVPU_BO_FLAGS) { + ivpu_dbg(vdev, IOCTL, "Invalid BO flags 0x%x\n", args->flags); return -EINVAL; + } - if (size == 0) + if (size == 0) { + ivpu_dbg(vdev, IOCTL, "Invalid BO size %llu\n", args->size); return -EINVAL; + } - bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0); + bo = ivpu_bo_alloc(vdev, size, args->flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", + ivpu_dbg(vdev, IOCTL, "Failed to allocate BO: %pe ctx %u size %llu flags 0x%x\n", bo, file_priv->ctx.id, args->size, args->flags); return PTR_ERR(bo); } - ret = drm_gem_handle_create(file, &bo->base, &bo->handle); - if (!ret) { + drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 0); + + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) { + ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n", + bo, file_priv->ctx.id, args->size, args->flags); + } else { args->vpu_addr = bo->vpu_addr; - args->handle = bo->handle; + drm_WARN_ON(&vdev->drm, bo->base.base.handle_count != 1); } - drm_gem_object_put(&bo->base); - - ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n", - file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags); + drm_gem_object_put(&bo->base.base); return ret; } struct ivpu_bo * -ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags) +ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags) { - const struct ivpu_addr_range *range; - struct ivpu_addr_range fixed_range; + struct iosys_map map; struct ivpu_bo *bo; - pgprot_t prot; int ret; - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); + if (drm_WARN_ON(&vdev->drm, !range)) + return NULL; - if (vpu_addr) { - fixed_range.start = vpu_addr; - fixed_range.end = vpu_addr + size; - range = &fixed_range; - } else { - range = &vdev->hw->ranges.global_low; - } + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->start)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); - bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0); + bo = ivpu_bo_alloc(vdev, size, flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", - bo, vpu_addr, size, flags); + ivpu_err(vdev, "Failed to allocate BO: %pe vpu_addr 0x%llx size %llu flags 0x%x\n", + bo, range->start, size, flags); return NULL; } - ret = ivpu_bo_pin(bo); + ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range); + if (ret) { + ivpu_err(vdev, "Failed to allocate NPU address for BO: %pe ctx %u size %llu: %d\n", + bo, ctx->id, size, ret); + goto err_put; + } + + ret = ivpu_bo_bind(bo); if (ret) goto err_put; - if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT); + if (flags & DRM_IVPU_BO_MAPPABLE) { + ivpu_bo_lock(bo); + ret = drm_gem_shmem_vmap_locked(&bo->base, &map); + ivpu_bo_unlock(bo); - prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); - bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot); - if (!bo->kvaddr) { - ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n"); - goto err_put; + if (ret) + goto err_put; } - ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n", - bo->vpu_addr, bo->base.size, flags); - return bo; err_put: - drm_gem_object_put(&bo->base); + drm_gem_object_put(&bo->base.base); return NULL; } -void ivpu_bo_free_internal(struct ivpu_bo *bo) -{ - drm_gem_object_put(&bo->base); -} - -struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) +struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags) { - struct ivpu_device *vdev = to_ivpu_device(dev); - struct dma_buf_attachment *attach; - struct ivpu_bo *bo; + struct ivpu_addr_range range; - attach = dma_buf_attach(buf, dev->dev); - if (IS_ERR(attach)) - return ERR_CAST(attach); + if (!ivpu_is_within_range(addr, size, &vdev->hw->ranges.runtime)) { + ivpu_err(vdev, "Invalid runtime BO address 0x%llx size %llu\n", addr, size); + return NULL; + } - get_dma_buf(buf); + if (ivpu_hw_range_init(vdev, &range, addr, size)) + return NULL; - bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0); - if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size); - goto err_detach; - } + return ivpu_bo_create(vdev, &vdev->gctx, &range, size, flags); +} - lockdep_set_class(&bo->lock, &prime_bo_lock_class_key); +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags) +{ + return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags); +} - bo->base.import_attach = attach; +void ivpu_bo_free(struct ivpu_bo *bo) +{ + struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); - return &bo->base; + if (bo->flags & DRM_IVPU_BO_MAPPABLE) { + ivpu_bo_lock(bo); + drm_gem_shmem_vunmap_locked(&bo->base, &map); + ivpu_bo_unlock(bo); + } -err_detach: - dma_buf_detach(buf, attach); - dma_buf_put(buf); - return ERR_CAST(bo); + drm_gem_object_put(&bo->base.base); } int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_file_priv *file_priv = file->driver_priv; - struct ivpu_device *vdev = to_ivpu_device(dev); struct drm_ivpu_bo_info *args = data; struct drm_gem_object *obj; struct ivpu_bo *bo; @@ -658,22 +495,13 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file bo = to_ivpu_bo(obj); - mutex_lock(&bo->lock); - - if (!bo->ctx) { - ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL); - if (ret) { - ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret); - goto unlock; - } - } - + ivpu_bo_lock(bo); args->flags = bo->flags; args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node); args->vpu_addr = bo->vpu_addr; args->size = obj->size; -unlock: - mutex_unlock(&bo->lock); + ivpu_bo_unlock(bo); + drm_gem_object_put(obj); return ret; } @@ -687,6 +515,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */ + timeout += 1; + obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -EINVAL; @@ -706,43 +537,38 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { - unsigned long dma_refcount = 0; + ivpu_bo_lock(bo); + + drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", + bo, bo->ctx_id, bo->vpu_addr, bo->base.base.size, + bo->flags, kref_read(&bo->base.base.refcount)); - if (bo->base.dma_buf && bo->base.dma_buf->file) - dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count); + if (bo->base.pages) + drm_printf(p, " has_pages"); - drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, - kref_read(&bo->base.refcount), dma_refcount, bo->ops->name); + if (bo->mmu_mapped) + drm_printf(p, " mmu_mapped"); + + if (drm_gem_is_imported(&bo->base.base)) + drm_printf(p, " imported"); + + drm_printf(p, "\n"); + + ivpu_bo_unlock(bo); } void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) { struct ivpu_device *vdev = to_ivpu_device(dev); - struct ivpu_file_priv *file_priv; - unsigned long ctx_id; struct ivpu_bo *bo; - drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n", - "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type"); + drm_printf(p, "%-9s %-3s %-14s %-10s %-10s %-4s %s\n", + "bo", "ctx", "vpu_addr", "size", "flags", "refs", "attribs"); - mutex_lock(&vdev->gctx.lock); - list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node) + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) ivpu_bo_print_info(bo, p); - mutex_unlock(&vdev->gctx.lock); - - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); - if (!file_priv) - continue; - - mutex_lock(&file_priv->ctx.lock); - list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node) - ivpu_bo_print_info(bo, p); - mutex_unlock(&file_priv->ctx.lock); - - ivpu_file_priv_put(&file_priv); - } + mutex_unlock(&vdev->bo_list_lock); } void ivpu_bo_list_print(struct drm_device *dev) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 6b0ceda5f253..0c3350f22b55 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -1,79 +1,62 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_GEM_H__ #define __IVPU_GEM_H__ #include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> #include <drm/drm_mm.h> -struct dma_buf; -struct ivpu_bo_ops; struct ivpu_file_priv; struct ivpu_bo { - struct drm_gem_object base; - const struct ivpu_bo_ops *ops; - + struct drm_gem_shmem_object base; struct ivpu_mmu_context *ctx; - struct list_head ctx_node; + struct list_head bo_list_node; struct drm_mm_node mm_node; - struct mutex lock; /* Protects: pages, sgt, mmu_mapped */ - struct sg_table *sgt; - struct page **pages; - bool mmu_mapped; - - void *kvaddr; u64 vpu_addr; - u32 handle; u32 flags; - uintptr_t user_ptr; - u32 job_status; -}; - -enum ivpu_bo_type { - IVPU_BO_TYPE_SHMEM = 1, - IVPU_BO_TYPE_INTERNAL, - IVPU_BO_TYPE_PRIME, -}; - -struct ivpu_bo_ops { - enum ivpu_bo_type type; - const char *name; - int (*alloc_pages)(struct ivpu_bo *bo); - void (*free_pages)(struct ivpu_bo *bo); - int (*map_pages)(struct ivpu_bo *bo); - void (*unmap_pages)(struct ivpu_bo *bo); + u32 job_status; /* Valid only for command buffer */ + u32 ctx_id; + bool mmu_mapped; }; -int ivpu_bo_pin(struct ivpu_bo *bo); -void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx); -void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); -void ivpu_bo_list_print(struct drm_device *dev); +int ivpu_bo_bind(struct ivpu_bo *bo); +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); -struct ivpu_bo * -ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); -void ivpu_bo_free_internal(struct ivpu_bo *bo); +struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); -void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo); +struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags); +struct ivpu_bo *ivpu_bo_create_runtime(struct ivpu_device *vdev, u64 addr, u64 size, u32 flags); +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags); +void ivpu_bo_free(struct ivpu_bo *bo); int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); +void ivpu_bo_list_print(struct drm_device *dev); static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj) { - return container_of(obj, struct ivpu_bo, base); + return container_of(obj, struct ivpu_bo, base.base); } -static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset) +static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo) { - if (offset > bo->base.size || !bo->pages) - return NULL; + return bo->base.vaddr; +} - return bo->pages[offset / PAGE_SIZE]; +static inline size_t ivpu_bo_size(struct ivpu_bo *bo) +{ + return bo->base.base.size; } static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) @@ -81,25 +64,27 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) return bo->flags & DRM_IVPU_BO_CACHE_MASK; } -static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) +static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) { - return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; + return to_ivpu_device(bo->base.base.dev); } -static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot) +static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { - if (bo->flags & DRM_IVPU_BO_WC) - return pgprot_writecombine(prot); + if (ivpu_is_force_snoop_enabled(ivpu_bo_to_vdev(bo))) + return true; - if (bo->flags & DRM_IVPU_BO_UNCACHED) - return pgprot_noncached(prot); + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; +} - return prot; +static inline bool ivpu_bo_is_read_only(struct ivpu_bo *bo) +{ + return bo->flags & DRM_IVPU_BO_READ_ONLY; } -static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) +static inline bool ivpu_bo_is_resident(struct ivpu_bo *bo) { - return to_ivpu_device(bo->base.dev); + return !!bo->base.pages; } static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) @@ -107,21 +92,26 @@ static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) if (vpu_addr < bo->vpu_addr) return NULL; - if (vpu_addr >= (bo->vpu_addr + bo->base.size)) + if (vpu_addr >= (bo->vpu_addr + ivpu_bo_size(bo))) return NULL; - return bo->kvaddr + (vpu_addr - bo->vpu_addr); + return ivpu_bo_vaddr(bo) + (vpu_addr - bo->vpu_addr); } static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr) { - if (cpu_addr < bo->kvaddr) + if (cpu_addr < ivpu_bo_vaddr(bo)) return 0; - if (cpu_addr >= (bo->kvaddr + bo->base.size)) + if (cpu_addr >= (ivpu_bo_vaddr(bo) + ivpu_bo_size(bo))) return 0; - return bo->vpu_addr + (cpu_addr - bo->kvaddr); + return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo)); +} + +static inline bool ivpu_bo_is_mappable(struct ivpu_bo *bo) +{ + return bo->flags & DRM_IVPU_BO_MAPPABLE; } #endif /* __IVPU_GEM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem_userptr.c b/drivers/accel/ivpu/ivpu_gem_userptr.c new file mode 100644 index 000000000000..25ba606164c0 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem_userptr.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2025 Intel Corporation + */ + +#include <linux/dma-buf.h> +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/capability.h> + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" + +static struct sg_table * +ivpu_gem_userptr_dmabuf_map(struct dma_buf_attachment *attachment, + enum dma_data_direction direction) +{ + struct sg_table *sgt = attachment->dmabuf->priv; + int ret; + + ret = dma_map_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC); + if (ret) + return ERR_PTR(ret); + + return sgt; +} + +static void ivpu_gem_userptr_dmabuf_unmap(struct dma_buf_attachment *attachment, + struct sg_table *sgt, + enum dma_data_direction direction) +{ + dma_unmap_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void ivpu_gem_userptr_dmabuf_release(struct dma_buf *dma_buf) +{ + struct sg_table *sgt = dma_buf->priv; + struct sg_page_iter page_iter; + struct page *page; + + for_each_sgtable_page(sgt, &page_iter, 0) { + page = sg_page_iter_page(&page_iter); + unpin_user_page(page); + } + + sg_free_table(sgt); + kfree(sgt); +} + +static const struct dma_buf_ops ivpu_gem_userptr_dmabuf_ops = { + .map_dma_buf = ivpu_gem_userptr_dmabuf_map, + .unmap_dma_buf = ivpu_gem_userptr_dmabuf_unmap, + .release = ivpu_gem_userptr_dmabuf_release, +}; + +static struct dma_buf * +ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr, + size_t size, uint32_t flags) +{ + struct dma_buf_export_info exp_info = {}; + struct dma_buf *dma_buf; + struct sg_table *sgt; + struct page **pages; + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned int gup_flags = FOLL_LONGTERM; + int ret, i, pinned; + + /* Add FOLL_WRITE only if the BO is not read-only */ + if (!(flags & DRM_IVPU_BO_READ_ONLY)) + gup_flags |= FOLL_WRITE; + + pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); + + pinned = pin_user_pages_fast((unsigned long)user_ptr, nr_pages, gup_flags, pages); + if (pinned < 0) { + ret = pinned; + ivpu_dbg(vdev, IOCTL, "Failed to pin user pages: %d\n", ret); + goto free_pages_array; + } + + if (pinned != nr_pages) { + ivpu_dbg(vdev, IOCTL, "Pinned %d pages, expected %lu\n", pinned, nr_pages); + ret = -EFAULT; + goto unpin_pages; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto unpin_pages; + } + + ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, 0, size, GFP_KERNEL); + if (ret) { + ivpu_dbg(vdev, IOCTL, "Failed to create sg table: %d\n", ret); + goto free_sgt; + } + + exp_info.exp_name = "ivpu_userptr_dmabuf"; + exp_info.owner = THIS_MODULE; + exp_info.ops = &ivpu_gem_userptr_dmabuf_ops; + exp_info.size = size; + exp_info.flags = O_RDWR | O_CLOEXEC; + exp_info.priv = sgt; + + dma_buf = dma_buf_export(&exp_info); + if (IS_ERR(dma_buf)) { + ret = PTR_ERR(dma_buf); + ivpu_dbg(vdev, IOCTL, "Failed to export userptr dma-buf: %d\n", ret); + goto free_sg_table; + } + + kvfree(pages); + return dma_buf; + +free_sg_table: + sg_free_table(sgt); +free_sgt: + kfree(sgt); +unpin_pages: + for (i = 0; i < pinned; i++) + unpin_user_page(pages[i]); +free_pages_array: + kvfree(pages); + return ERR_PTR(ret); +} + +static struct ivpu_bo * +ivpu_bo_create_from_userptr(struct ivpu_device *vdev, void __user *user_ptr, + size_t size, uint32_t flags) +{ + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct ivpu_bo *bo; + + dma_buf = ivpu_create_userptr_dmabuf(vdev, user_ptr, size, flags); + if (IS_ERR(dma_buf)) + return ERR_CAST(dma_buf); + + obj = ivpu_gem_prime_import(&vdev->drm, dma_buf); + if (IS_ERR(obj)) { + dma_buf_put(dma_buf); + return ERR_CAST(obj); + } + + dma_buf_put(dma_buf); + + bo = to_ivpu_bo(obj); + bo->flags = flags; + + return bo; +} + +int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_bo_create_from_userptr *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + void __user *user_ptr = u64_to_user_ptr(args->user_ptr); + struct ivpu_bo *bo; + int ret; + + if (args->flags & ~(DRM_IVPU_BO_HIGH_MEM | DRM_IVPU_BO_DMA_MEM | DRM_IVPU_BO_READ_ONLY)) { + ivpu_dbg(vdev, IOCTL, "Invalid BO flags: 0x%x\n", args->flags); + return -EINVAL; + } + + if (!args->user_ptr || !args->size) { + ivpu_dbg(vdev, IOCTL, "Userptr or size are zero: ptr %llx size %llu\n", + args->user_ptr, args->size); + return -EINVAL; + } + + if (!PAGE_ALIGNED(args->user_ptr) || !PAGE_ALIGNED(args->size)) { + ivpu_dbg(vdev, IOCTL, "Userptr or size not page aligned: ptr %llx size %llu\n", + args->user_ptr, args->size); + return -EINVAL; + } + + if (!access_ok(user_ptr, args->size)) { + ivpu_dbg(vdev, IOCTL, "Userptr is not accessible: ptr %llx size %llu\n", + args->user_ptr, args->size); + return -EFAULT; + } + + bo = ivpu_bo_create_from_userptr(vdev, user_ptr, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) { + ivpu_dbg(vdev, IOCTL, "Failed to create handle for BO: %pe ctx %u size %llu flags 0x%x\n", + bo, file_priv->ctx.id, args->size, args->flags); + } else { + ivpu_dbg(vdev, BO, "Created userptr BO: handle=%u vpu_addr=0x%llx size=%llu flags=0x%x\n", + args->handle, bo->vpu_addr, args->size, bo->flags); + args->vpu_addr = bo->vpu_addr; + } + + drm_gem_object_put(&bo->base.base); + + return ret; +} diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c new file mode 100644 index 000000000000..d69cd0d93569 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - 2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" + +#include <asm/msr-index.h> +#include <asm/msr.h> +#include <linux/dmi.h> +#include <linux/fault-inject.h> +#include <linux/pm_runtime.h> + +#ifdef CONFIG_FAULT_INJECTION +DECLARE_FAULT_ATTR(ivpu_hw_failure); + +static char *ivpu_fail_hw; +module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444); +MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>"); +#endif + +#define FW_SHARED_MEM_ALIGNMENT SZ_512K /* VPU MTRR limitation */ + +#define ECC_MCA_SIGNAL_ENABLE_MASK 0xff + +static char *platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "SILICON"; + case IVPU_PLATFORM_SIMICS: + return "SIMICS"; + case IVPU_PLATFORM_FPGA: + return "FPGA"; + case IVPU_PLATFORM_HSLE: + return "HSLE"; + default: + return "Invalid platform"; + } +} + +static void platform_init(struct ivpu_device *vdev) +{ + int platform = ivpu_hw_btrs_platform_read(vdev); + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", platform_to_str(platform), platform); + + switch (platform) { + case IVPU_PLATFORM_SILICON: + case IVPU_PLATFORM_SIMICS: + case IVPU_PLATFORM_FPGA: + case IVPU_PLATFORM_HSLE: + vdev->platform = platform; + break; + + default: + ivpu_err(vdev, "Invalid platform type: %d\n", platform); + break; + } +} + +static void wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = false; + vdev->wa.clear_runtime_mem = false; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); + + if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + vdev->wa.disable_clock_relinquish = true; + + if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE) + vdev->wa.disable_clock_relinquish = false; + + if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_DISABLE) + vdev->wa.disable_clock_relinquish = true; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->wa.wp0_during_power_up = true; + + if (ivpu_test_mode & IVPU_TEST_MODE_D0I2_DISABLE) + vdev->wa.disable_d0i2 = true; + + IVPU_PRINT_WA(punit_disabled); + IVPU_PRINT_WA(clear_runtime_mem); + IVPU_PRINT_WA(interrupt_clear_with_0); + IVPU_PRINT_WA(disable_clock_relinquish); + IVPU_PRINT_WA(wp0_during_power_up); + IVPU_PRINT_WA(disable_d0i2); +} + +static void timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_test_mode & IVPU_TEST_MODE_DISABLE_TIMEOUTS) { + vdev->timeout.boot = -1; + vdev->timeout.jsm = -1; + vdev->timeout.tdr = -1; + vdev->timeout.inference = -1; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = -1; + } else if (ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 50; + vdev->timeout.jsm = 15000; + vdev->timeout.tdr = 30000; + vdev->timeout.inference = 900000; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 500; + vdev->timeout.state_dump_msg = 10000; + } else if (ivpu_is_simics(vdev)) { + vdev->timeout.boot = 50; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 10000; + vdev->timeout.inference = 300000; + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 100; + vdev->timeout.state_dump_msg = 10; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + vdev->timeout.inference = 60000; + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->timeout.autosuspend = 10; + else + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 5; + vdev->timeout.state_dump_msg = 100; + } +} + +static void priority_bands_init(struct ivpu_device *vdev) +{ + /* Idle */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 0; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE] = 160000; + /* Normal */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL] = 300000; + /* Focus */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS] = 200000; + /* Realtime */ + vdev->hw->hws.grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 0; + vdev->hw->hws.process_grace_period[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 50000; + vdev->hw->hws.process_quantum[VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME] = 200000; +} + +int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start, u64 size) +{ + u64 end; + + if (!range || check_add_overflow(start, size, &end)) { + ivpu_err(vdev, "Invalid range: start 0x%llx size %llu\n", start, size); + return -EINVAL; + } + + range->start = start; + range->end = end; + + return 0; +} + +static void memory_ranges_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x84800000, SZ_64M); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0xa0000000, 511 * SZ_1M); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x180000000, SZ_2G); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.dma, 0x200000000, SZ_128G); + } else { + ivpu_hw_range_init(vdev, &vdev->hw->ranges.runtime, 0x80000000, SZ_64M); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.global, 0x90000000, SZ_256M); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.shave, 0x80000000, SZ_2G); + ivpu_hw_range_init(vdev, &vdev->hw->ranges.user, 0x100000000, SZ_256G); + vdev->hw->ranges.dma = vdev->hw->ranges.user; + } + + drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vdev->hw->ranges.global.start, + FW_SHARED_MEM_ALIGNMENT)); +} + +static int wp_enable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, true); +} + +static int wp_disable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, false); +} + +int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(wp0_during_power_up)) { + /* WP requests may fail when powering down, so issue WP 0 here */ + ret = wp_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable workpoint: %d\n", ret); + } + + ret = ivpu_hw_btrs_d0i3_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); + + ret = wp_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable workpoint: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) >= IVPU_HW_BTRS_LNL) { + if (IVPU_WA(disable_clock_relinquish)) + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + ivpu_hw_btrs_profiling_freq_reg_set_lnl(vdev); + ivpu_hw_btrs_ats_print_lnl(vdev); + } + + ret = ivpu_hw_ip_host_ss_configure(vdev); + if (ret) { + ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); + return ret; + } + + ivpu_hw_ip_idle_gen_disable(vdev); + + ret = ivpu_hw_btrs_wait_for_clock_res_own_ack(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for clock resource own ACK\n"); + return ret; + } + + ret = ivpu_hw_ip_pwr_domain_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); + return ret; + } + + ret = ivpu_hw_ip_host_ss_axi_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_LNL) + ivpu_hw_btrs_set_port_arbitration_weights_lnl(vdev); + + ret = ivpu_hw_ip_top_noc_enable(vdev); + if (ret) + ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); + + return ret; +} + +static void save_d0i3_entry_timestamp(struct ivpu_device *vdev) +{ + vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); + vdev->hw->d0i3_entry_vpu_ts = ivpu_hw_ip_read_perf_timer_counter(vdev); +} + +int ivpu_hw_reset(struct ivpu_device *vdev) +{ + int ret = 0; + + if (ivpu_hw_btrs_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU IP\n"); + ret = -EIO; + } + + if (wp_disable(vdev)) { + ivpu_err(vdev, "Failed to disable workpoint\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + int ret = 0; + + save_d0i3_entry_timestamp(vdev); + + if (!ivpu_hw_is_idle(vdev)) + ivpu_warn(vdev, "NPU not idle during power down\n"); + + if (ivpu_hw_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU\n"); + ret = -EIO; + } + + if (ivpu_hw_btrs_d0i3_enable(vdev)) { + ivpu_err(vdev, "Failed to enter D0I3\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_init(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_info_init(vdev); + ivpu_hw_btrs_freq_ratios_init(vdev); + priority_bands_init(vdev); + memory_ranges_init(vdev); + platform_init(vdev); + wa_init(vdev); + timeouts_init(vdev); + atomic_set(&vdev->hw->firewall_irq_counter, 0); + +#ifdef CONFIG_FAULT_INJECTION + if (ivpu_fail_hw) + setup_fault_attr(&ivpu_hw_failure, ivpu_fail_hw); +#endif + + return 0; +} + +int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_ip_snoop_disable(vdev); + ivpu_hw_ip_tbu_mmu_enable(vdev); + ret = ivpu_hw_ip_soc_cpu_boot(vdev); + if (ret) + ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret); + + return ret; +} + +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + return; + } + + if (enable) + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; + else + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; +} + +void ivpu_irq_handlers_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx; + else + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_40xx; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_mtl; + else + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_lnl; +} + +void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + ivpu_hw_ip_irq_enable(vdev); + ivpu_hw_btrs_irq_enable(vdev); +} + +void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_irq_disable(vdev); + ivpu_hw_ip_irq_disable(vdev); +} + +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr) +{ + struct ivpu_device *vdev = ptr; + bool ip_handled, btrs_handled; + + ivpu_hw_btrs_global_int_disable(vdev); + + btrs_handled = ivpu_hw_btrs_irq_handler(vdev, irq); + if (!ivpu_hw_is_idle((vdev)) || !btrs_handled) + ip_handled = ivpu_hw_ip_irq_handler(vdev, irq); + else + ip_handled = false; + + /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ + ivpu_hw_btrs_global_int_enable(vdev); + + if (!ip_handled && !btrs_handled) + return IRQ_NONE; + + pm_runtime_mark_last_busy(vdev->drm.dev); + return IRQ_HANDLED; +} + +bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev) +{ + unsigned long long msr_integrity_caps; + int ret; + + if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX) + return false; + + ret = rdmsrq_safe(MSR_INTEGRITY_CAPS, &msr_integrity_caps); + if (ret) { + ivpu_warn(vdev, "Error reading MSR_INTEGRITY_CAPS: %d", ret); + return false; + } + + ivpu_dbg(vdev, MISC, "MSR_INTEGRITY_CAPS: 0x%llx\n", msr_integrity_caps); + + return msr_integrity_caps & ECC_MCA_SIGNAL_ENABLE_MASK; +} diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 50a9304ab09c..b6d0f0d0dccc 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,34 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_H__ #define __IVPU_HW_H__ #include "ivpu_drv.h" - -struct ivpu_hw_ops { - int (*info_init)(struct ivpu_device *vdev); - int (*power_up)(struct ivpu_device *vdev); - int (*boot_fw)(struct ivpu_device *vdev); - int (*power_down)(struct ivpu_device *vdev); - bool (*is_idle)(struct ivpu_device *vdev); - void (*wdt_disable)(struct ivpu_device *vdev); - void (*diagnose_failure)(struct ivpu_device *vdev); - u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); - void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); - u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); - u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); - void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); - void (*irq_clear)(struct ivpu_device *vdev); - void (*irq_enable)(struct ivpu_device *vdev); - void (*irq_disable)(struct ivpu_device *vdev); - irqreturn_t (*irq_handler)(int irq, void *ptr); -}; +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" struct ivpu_addr_range { resource_size_t start; @@ -36,13 +16,16 @@ struct ivpu_addr_range { }; struct ivpu_hw_info { - const struct ivpu_hw_ops *ops; struct { - struct ivpu_addr_range global_low; - struct ivpu_addr_range global_high; - struct ivpu_addr_range user_low; - struct ivpu_addr_range user_high; - struct ivpu_addr_range global_aliased_pio; + bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq); + bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq); + } irq; + struct { + struct ivpu_addr_range runtime; + struct ivpu_addr_range global; + struct ivpu_addr_range user; + struct ivpu_addr_range shave; + struct ivpu_addr_range dma; } ranges; struct { u8 min_ratio; @@ -54,117 +37,118 @@ struct ivpu_hw_info { u8 pn_ratio; u32 profiling_freq; } pll; + struct { + u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + } hws; u32 tile_fuse; u32 sku; u16 config; -}; - -extern const struct ivpu_hw_ops ivpu_hw_mtl_ops; - -static inline int ivpu_hw_info_init(struct ivpu_device *vdev) -{ - return vdev->hw->ops->info_init(vdev); -}; - -static inline int ivpu_hw_power_up(struct ivpu_device *vdev) -{ - ivpu_dbg(vdev, PM, "HW power up\n"); - - return vdev->hw->ops->power_up(vdev); -}; - -static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) -{ - return vdev->hw->ops->boot_fw(vdev); -}; - -static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) -{ - return vdev->hw->ops->is_idle(vdev); -}; + int dma_bits; + ktime_t d0i3_entry_host_ts; + u64 d0i3_entry_vpu_ts; + atomic_t firewall_irq_counter; +}; + +int ivpu_hw_init(struct ivpu_device *vdev); +int ivpu_hw_range_init(struct ivpu_device *vdev, struct ivpu_addr_range *range, u64 start, + u64 size); +int ivpu_hw_power_up(struct ivpu_device *vdev); +int ivpu_hw_power_down(struct ivpu_device *vdev); +int ivpu_hw_reset(struct ivpu_device *vdev); +int ivpu_hw_boot_fw(struct ivpu_device *vdev); +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable); +void ivpu_irq_handlers_init(struct ivpu_device *vdev); +void ivpu_hw_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_irq_disable(struct ivpu_device *vdev); +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr); +bool ivpu_hw_uses_ecc_mca_signal(struct ivpu_device *vdev); + +static inline u32 ivpu_hw_btrs_irq_handler(struct ivpu_device *vdev, int irq) +{ + return vdev->hw->irq.btrs_irq_handler(vdev, irq); +} -static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ip_irq_handler(struct ivpu_device *vdev, int irq) { - ivpu_dbg(vdev, PM, "HW power down\n"); - - return vdev->hw->ops->power_down(vdev); -}; + return vdev->hw->irq.ip_irq_handler(vdev, irq); +} -static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) { - vdev->hw->ops->wdt_disable(vdev); -}; + return range->end - range->start; +} -/* Register indirect accesses */ -static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_pll_freq_get(vdev); -}; + return ivpu_hw_btrs_dpu_max_freq_get(vdev); +} -static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_telemetry_offset_get(vdev); -}; + return ivpu_hw_btrs_dpu_freq_get(vdev); +} -static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_telemetry_size_get(vdev); -}; + ivpu_hw_ip_irq_clear(vdev); +} -static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_telemetry_enable_get(vdev); -}; + return vdev->hw->pll.profiling_freq; +} -static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) { - vdev->hw->ops->reg_db_set(vdev, db_id); -}; + ivpu_hw_ip_diagnose_failure(vdev); + ivpu_hw_btrs_diagnose_failure(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_offset_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); -}; + return ivpu_hw_btrs_telemetry_offset_get(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_size_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_count_get(vdev); -}; + return ivpu_hw_btrs_telemetry_size_get(vdev); +} -static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +static inline u32 ivpu_hw_telemetry_enable_get(struct ivpu_device *vdev) { - vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); -}; + return ivpu_hw_btrs_telemetry_enable_get(vdev); +} -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_clear(vdev); -}; + return ivpu_hw_btrs_is_idle(vdev); +} -static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_enable(vdev); -}; + return ivpu_hw_btrs_wait_for_idle(vdev); +} -static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +static inline void ivpu_hw_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) { - vdev->hw->ops->irq_disable(vdev); -}; + ivpu_hw_ip_ipc_tx_set(vdev, vpu_addr); +} -static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +static inline void ivpu_hw_db_set(struct ivpu_device *vdev, u32 db_id) { - range->start = start; - range->end = start + size; + ivpu_hw_ip_db_set(vdev, db_id); } -static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +static inline u32 ivpu_hw_ipc_rx_addr_get(struct ivpu_device *vdev) { - return range->end - range->start; + return ivpu_hw_ip_ipc_rx_addr_get(vdev); } -static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ipc_rx_count_get(struct ivpu_device *vdev) { - vdev->hw->ops->diagnose_failure(vdev); + return ivpu_hw_ip_ipc_rx_count_get(vdev); } #endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h new file mode 100644 index 000000000000..cf5e2f01049c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_37XX_REG_H__ +#define __IVPU_HW_37XX_REG_H__ + +#include <linux/bits.h> + +#define VPU_37XX_HOST_SS_CPR_CLK_SET 0x00000084u +#define VPU_37XX_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) +#define VPU_37XX_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_37XX_HOST_SS_CPR_RST_SET 0x00000094u +#define VPU_37XX_HOST_SS_CPR_RST_SET_TOP_NOC_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_CPR_RST_SET_DSS_MAS_MASK BIT_MASK(10) +#define VPU_37XX_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_37XX_HOST_SS_CPR_RST_CLR 0x00000098u +#define VPU_37XX_HOST_SS_CPR_RST_CLR_AON_MASK BIT_MASK(0) +#define VPU_37XX_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) +#define VPU_37XX_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_37XX_HOST_SS_HW_VERSION 0x00000108u +#define VPU_37XX_HOST_SS_HW_VERSION_SOC_REVISION_MASK GENMASK(7, 0) +#define VPU_37XX_HOST_SS_HW_VERSION_SOC_NUMBER_MASK GENMASK(15, 8) +#define VPU_37XX_HOST_SS_HW_VERSION_VPU_GENERATION_MASK GENMASK(23, 16) + +#define VPU_37XX_HOST_SS_GEN_CTRL 0x00000118u +#define VPU_37XX_HOST_SS_GEN_CTRL_PS_MASK GENMASK(31, 29) + +#define VPU_37XX_HOST_SS_NOC_QREQN 0x00000154u +#define VPU_37XX_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_37XX_HOST_SS_NOC_QACCEPTN 0x00000158u +#define VPU_37XX_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_37XX_HOST_SS_NOC_QDENY 0x0000015cu +#define VPU_37XX_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_37XX_TOP_NOC_QREQN 0x00000160u +#define VPU_37XX_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define VPU_37XX_TOP_NOC_QACCEPTN 0x00000164u +#define VPU_37XX_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define VPU_37XX_TOP_NOC_QDENY 0x00000168u +#define VPU_37XX_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN 0x00000170u +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK BIT_MASK(2) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK BIT_MASK(3) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK BIT_MASK(4) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK BIT_MASK(5) +#define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK BIT_MASK(6) + +#define VPU_37XX_HOST_SS_ICB_STATUS_0 0x00010210u +#define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK BIT_MASK(0) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) + +#define VPU_37XX_HOST_SS_ICB_STATUS_1 0x00010214u +#define VPU_37XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK BIT_MASK(0) +#define VPU_37XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK BIT_MASK(1) +#define VPU_37XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK BIT_MASK(2) + +#define VPU_37XX_HOST_SS_ICB_CLEAR_0 0x00010220u +#define VPU_37XX_HOST_SS_ICB_CLEAR_1 0x00010224u +#define VPU_37XX_HOST_SS_ICB_ENABLE_0 0x00010240u + +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM 0x000200f4u + +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT 0x000200fcu +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK GENMASK(7, 0) +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK GENMASK(15, 8) +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) +#define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK GENMASK(31, 24) + +#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0 0x00030020u +#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK BIT_MASK(3) + +#define VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN 0x00030200u +#define VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK BIT_MASK(0) + +#define VPU_37XX_HOST_SS_AON_DPU_ACTIVE 0x00030204u +#define VPU_37XX_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) + +#define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO 0x00041040u +#define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK BIT_MASK(0) +#define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) +#define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) + +#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u +#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) +#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) + +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK BIT_MASK(3) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK BIT_MASK(4) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK BIT_MASK(5) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK GENMASK(10, 6) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK GENMASK(15, 11) + +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV 0x00360004u +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK BIT_MASK(0) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK BIT_MASK(1) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK BIT_MASK(2) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK BIT_MASK(3) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK BIT_MASK(4) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK BIT_MASK(5) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK BIT_MASK(6) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK BIT_MASK(7) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) +#define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) + +#define VPU_37XX_CPU_SS_DSU_LEON_RT_BASE 0x04000000u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u + +#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) + +#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) + +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) + +#define VPU_37XX_CPU_SS_TIM_WATCHDOG 0x0602009cu +#define VPU_37XX_CPU_SS_TIM_WDOG_EN 0x060200a4u +#define VPU_37XX_CPU_SS_TIM_SAFE 0x060200a8u +#define VPU_37XX_CPU_SS_TIM_IPC_FIFO 0x060200f0u + +#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u +#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) + +#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT 0x06029000u + +#define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u +#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) + +#define VPU_37XX_CPU_SS_DOORBELL_1 0x06301000u + +#endif /* __IVPU_HW_37XX_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h new file mode 100644 index 000000000000..fc0ee8d637f9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_40XX_REG_H__ +#define __IVPU_HW_40XX_REG_H__ + +#include <linux/bits.h> + +#define VPU_40XX_HOST_SS_CPR_CLK_EN 0x00000080u +#define VPU_40XX_HOST_SS_CPR_CLK_EN_TOP_NOC_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_CPR_CLK_EN_DSS_MAS_MASK BIT_MASK(10) +#define VPU_40XX_HOST_SS_CPR_CLK_EN_CSS_MAS_MASK BIT_MASK(11) + +#define VPU_40XX_HOST_SS_CPR_CLK_SET 0x00000084u +#define VPU_40XX_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) +#define VPU_40XX_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_40XX_HOST_SS_CPR_RST_EN 0x00000090u +#define VPU_40XX_HOST_SS_CPR_RST_EN_TOP_NOC_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_CPR_RST_EN_DSS_MAS_MASK BIT_MASK(10) +#define VPU_40XX_HOST_SS_CPR_RST_EN_CSS_MAS_MASK BIT_MASK(11) + +#define VPU_40XX_HOST_SS_CPR_RST_SET 0x00000094u +#define VPU_40XX_HOST_SS_CPR_RST_SET_TOP_NOC_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_CPR_RST_SET_DSS_MAS_MASK BIT_MASK(10) +#define VPU_40XX_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_40XX_HOST_SS_CPR_RST_CLR 0x00000098u +#define VPU_40XX_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) +#define VPU_40XX_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) + +#define VPU_40XX_HOST_SS_HW_VERSION 0x00000108u +#define VPU_40XX_HOST_SS_HW_VERSION_SOC_REVISION_MASK GENMASK(7, 0) +#define VPU_40XX_HOST_SS_HW_VERSION_SOC_NUMBER_MASK GENMASK(15, 8) +#define VPU_40XX_HOST_SS_HW_VERSION_VPU_GENERATION_MASK GENMASK(23, 16) + +#define VPU_40XX_HOST_SS_SW_VERSION 0x0000010cu + +#define VPU_40XX_HOST_SS_GEN_CTRL 0x00000118u +#define VPU_40XX_HOST_SS_GEN_CTRL_PS_MASK GENMASK(31, 29) + +#define VPU_40XX_HOST_SS_NOC_QREQN 0x00000154u +#define VPU_40XX_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_40XX_HOST_SS_NOC_QACCEPTN 0x00000158u +#define VPU_40XX_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_40XX_HOST_SS_NOC_QDENY 0x0000015cu +#define VPU_40XX_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define VPU_40XX_TOP_NOC_QREQN 0x00000160u +#define VPU_40XX_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_40XX_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(2) + +#define VPU_40XX_TOP_NOC_QACCEPTN 0x00000164u +#define VPU_40XX_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_40XX_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(2) + +#define VPU_40XX_TOP_NOC_QDENY 0x00000168u +#define VPU_40XX_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_40XX_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(2) + +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN 0x00000170u +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK BIT_MASK(2) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK BIT_MASK(3) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK BIT_MASK(4) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK BIT_MASK(5) +#define VPU_40XX_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK BIT_MASK(6) + +#define VPU_40XX_HOST_SS_ICB_STATUS_0 0x00010210u +#define VPU_40XX_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK BIT_MASK(0) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) +#define VPU_40XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) + +#define VPU_40XX_HOST_SS_ICB_STATUS_1 0x00010214u +#define VPU_40XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK BIT_MASK(0) +#define VPU_40XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK BIT_MASK(1) +#define VPU_40XX_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK BIT_MASK(2) + +#define VPU_40XX_HOST_SS_ICB_CLEAR_0 0x00010220u +#define VPU_40XX_HOST_SS_ICB_CLEAR_1 0x00010224u +#define VPU_40XX_HOST_SS_ICB_ENABLE_0 0x00010240u +#define VPU_40XX_HOST_SS_ICB_ENABLE_1 0x00010244u + +#define VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM 0x000200f4u + +#define VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT 0x000200fcu +#define VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) + +#define VPU_40XX_HOST_SS_AON_PWR_ISO_EN0 0x00030020u +#define VPU_40XX_HOST_SS_AON_PWR_ISO_EN0_CSS_CPU_MASK BIT_MASK(3) + +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0_CSS_CPU_MASK BIT_MASK(3) + +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_CSS_CPU_MASK BIT_MASK(3) + +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu +#define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0_CSS_CPU_MASK BIT_MASK(3) + +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16) + +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) + +#define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u +#define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) +#define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1) + +#define VPU_40XX_HOST_SS_AON_DPU_ACTIVE 0x00030204u +#define VPU_40XX_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) + +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE 0x00030210u +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE_REQ_OVERRIDE_MASK BIT_MASK(0) + +#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO 0x00040040u +#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_DONE_MASK BIT_MASK(0) +#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) +#define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) + +#define VPU_40XX_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u +#define VPU_40XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) +#define VPU_40XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) + +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_SNOOP_OVERRIDE_EN_MASK BIT_MASK(3) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_AW_SNOOP_OVERRIDE_MASK BIT_MASK(4) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_AR_SNOOP_OVERRIDE_MASK BIT_MASK(5) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK GENMASK(10, 6) +#define VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK GENMASK(15, 11) + +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV 0x00360004u +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK BIT_MASK(0) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK BIT_MASK(1) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK BIT_MASK(2) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK BIT_MASK(3) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK BIT_MASK(4) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK BIT_MASK(5) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK BIT_MASK(6) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK BIT_MASK(7) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) +#define VPU_40XX_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) + +#define VPU_40XX_CPU_SS_DSU_LEON_RT_BASE 0x04000000u +#define VPU_40XX_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u +#define VPU_40XX_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u +#define VPU_40XX_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u +#define VPU_40XX_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u + +#define VPU_40XX_CPU_SS_TIM_WATCHDOG 0x0102009cu +#define VPU_40XX_CPU_SS_TIM_WDOG_EN 0x010200a4u +#define VPU_40XX_CPU_SS_TIM_SAFE 0x010200a8u + +#define VPU_40XX_CPU_SS_TIM_GEN_CONFIG 0x01021008u +#define VPU_40XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) + +#define VPU_40XX_CPU_SS_CPR_NOC_QREQN 0x01010030u +#define VPU_40XX_CPU_SS_CPR_NOC_QREQN_TOP_MMIO_MASK BIT_MASK(0) + +#define VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN 0x01010034u +#define VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN_TOP_MMIO_MASK BIT_MASK(0) + +#define VPU_40XX_CPU_SS_CPR_NOC_QDENY 0x01010038u +#define VPU_40XX_CPU_SS_CPR_NOC_QDENY_TOP_MMIO_MASK BIT_MASK(0) + +#define VPU_40XX_CPU_SS_TIM_IPC_FIFO 0x010200f0u +#define VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT 0x01029008u + +#define VPU_40XX_CPU_SS_DOORBELL_0 0x01300000u +#define VPU_40XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) + +#define VPU_40XX_CPU_SS_DOORBELL_1 0x01301000u + +#endif /* __IVPU_HW_40XX_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c new file mode 100644 index 000000000000..06e65c592618 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -0,0 +1,905 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2025 Intel Corporation + */ + +#include <linux/units.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_btrs_lnl_reg.h" +#include "ivpu_hw_btrs_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_pm.h" + +#define BTRS_MTL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR))) + +#define BTRS_LNL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR))) + +#define BTRS_MTL_ALL_IRQ_MASK (BTRS_MTL_IRQ_MASK | (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, \ + FREQ_CHANGE))) + +#define BTRS_IRQ_DISABLE_MASK ((u32)-1) + +#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) + + +#define PLL_CDYN_DEFAULT 0x80 +#define PLL_EPP_DEFAULT 0x80 +#define PLL_REF_CLK_FREQ 50000000ull +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) +#define TIMEOUT_US (150 * USEC_PER_MSEC) + +/* Work point configuration values */ +#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) +#define MTL_CONFIG_1_TILE 0x01 +#define MTL_CONFIG_2_TILE 0x02 +#define MTL_PLL_RATIO_5_3 0x01 +#define MTL_PLL_RATIO_4_3 0x02 +#define BTRS_MTL_TILE_FUSE_ENABLE_BOTH 0x0 +#define BTRS_MTL_TILE_SKU_BOTH 0x3630 + +#define BTRS_LNL_TILE_MAX_NUM 6 +#define BTRS_LNL_TILE_MAX_MASK 0x3f + +#define WEIGHTS_DEFAULT 0xf711f711u +#define WEIGHTS_ATS_DEFAULT 0x0000f711u + +#define DCT_REQ 0x2 +#define DCT_ENABLE 0x1 +#define DCT_DISABLE 0x0 + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); + +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); + if (REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) == BTRS_MTL_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + return true; + } + + return false; +} + +static void freq_ratios_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +static void freq_ratios_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + freq_ratios_init_mtl(vdev); + else + freq_ratios_init_lnl(vdev); + + hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.pn_ratio = clamp_t(u8, hw->pll.pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); +} + +static bool tile_disable_check(u32 config) +{ + /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */ + if (config == 0) + return true; + + if (config > BIT(BTRS_LNL_TILE_MAX_NUM - 1)) + return false; + + if ((config & (config - 1)) == 0) + return true; + + return false; +} + +static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config) +{ + u32 fuse; + u32 config; + + fuse = REGB_RD32(VPU_HW_BTRS_LNL_TILE_FUSE); + if (!REG_TEST_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, VALID, fuse)) { + ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse); + return -EIO; + } + + config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse); + if (!tile_disable_check(config)) + ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config); + + ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config); + + *tile_fuse_config = config; + return 0; +} + +static int info_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; + hw->sku = BTRS_MTL_TILE_SKU_BOTH; + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); + + return 0; +} + +static int info_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 tile_fuse_config; + int ret; + + ret = read_tile_config_fuse(vdev, &tile_fuse_config); + if (ret) + return ret; + + hw->tile_fuse = tile_fuse_config; + hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + + return 0; +} + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return info_init_mtl(vdev); + else + return info_init_lnl(vdev); +} + +static int wp_request_sync(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +static int wait_for_status_ready(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); +} + +struct wp_request { + u16 min; + u16 max; + u16 target; + u16 cfg; + u16 epp; + u16 cdyn; +}; + +static void wp_request_mtl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, EPP, PLL_EPP_DEFAULT, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_CMD, val); +} + +static void wp_request_lnl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, EPP, wp->epp, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CDYN, wp->cdyn, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_CMD, val); +} + +static void wp_request(struct ivpu_device *vdev, struct wp_request *wp) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + wp_request_mtl(vdev, wp); + else + wp_request_lnl(vdev, wp); +} + +static int wp_request_send(struct ivpu_device *vdev, struct wp_request *wp) +{ + int ret; + + ret = wp_request_sync(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before workpoint request: %d\n", ret); + return ret; + } + + wp_request(vdev, wp); + + ret = wp_request_sync(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after workpoint request: %d\n", ret); + + return ret; +} + +static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, bool enable) +{ + struct ivpu_hw_info *hw = vdev->hw; + + wp->min = hw->pll.min_ratio; + wp->max = hw->pll.max_ratio; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + wp->target = enable ? hw->pll.pn_ratio : 0; + wp->cfg = enable ? hw->config : 0; + wp->cdyn = 0; + wp->epp = 0; + } else { + wp->target = hw->pll.pn_ratio; + wp->cfg = 0; + wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0; + wp->epp = enable ? PLL_EPP_DEFAULT : 0; + } +} + +static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (ivpu_hw_btrs_gen(vdev) != IVPU_HW_BTRS_MTL) + return 0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +static int wait_for_cdyn_deassert(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_CDYN, CDYN, 0, PLL_TIMEOUT_US); +} + +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) +{ + struct wp_request wp; + int ret; + + if (IVPU_WA(punit_disabled)) { + ivpu_dbg(vdev, PM, "Skipping workpoint request\n"); + return 0; + } + + prepare_wp_request(vdev, &wp, enable); + + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); + + ret = wp_request_send(vdev, &wp); + if (ret) { + ivpu_err(vdev, "Failed to send workpoint request: %d\n", ret); + return ret; + } + + ret = wait_for_pll_lock(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL lock\n"); + return ret; + } + + ret = wait_for_status_ready(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU ready status\n"); + return ret; + } + + if (!enable) { + ret = wait_for_cdyn_deassert(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for CDYN deassert\n"); + return ret; + } + } + + return 0; +} + +static int d0i3_drive_mtl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + + return ret; +} + +static int d0i3_drive_lnl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_LNL_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + return ret; + } + + return 0; +} + +static int d0i3_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return d0i3_drive_mtl(vdev, enable); + else + return d0i3_drive_lnl(vdev, enable); +} + +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, true); + if (ret) + ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); + + udelay(5); /* VPU requires 5 us to complete the transition */ + + return ret; +} + +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, false); + if (ret) + ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); + + return ret; +} + +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); +} + +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT); + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT); +} + +static int ip_reset_mtl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +static int ip_reset_lnl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Wait for *_TRIGGER timed out\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_LNL_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return ip_reset_mtl(vdev); + else + return ip_reset_lnl(vdev); +} + +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + else + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n", + REGB_RD32(VPU_HW_BTRS_LNL_HM_ATS) ? "Enable" : "Disable"); +} + +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, DISABLE_CLK_RELINQUISH, val); + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev) +{ + u32 val; + + if (IVPU_WA(punit_disabled)) + return true; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, val); + } else { + val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, val); + } +} + +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); +} + +static u32 pll_config_get_mtl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); +} + +static u32 pll_config_get_lnl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); +} + +static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) +{ + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; +} + +static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio) / 2; +} + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(ratio); + else + return pll_ratio_to_dpu_freq_lnl(ratio); +} + +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) +{ + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); +} + +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); + else + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_mtl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + REGB_WR32(VPU_HW_BTRS_MTL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, status)) { + u32 ufi_log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, ufi_log)); + REGB_WR32(VPU_HW_BTRS_MTL_UFI_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + if (IVPU_WA(interrupt_clear_with_0)) + /* + * Writing 1 triggers an interrupt, so we can't perform read update write. + * Clear local interrupt status by writing 0 to all bits. + */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + else + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_LNL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) { + ivpu_dbg(vdev, IRQ, "Survivability IRQ\n"); + queue_work(system_percpu_wq, &vdev->irq_dct_work); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_lnl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + REGB_WR32(VPU_HW_BTRS_LNL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, status)) { + ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, status)) { + ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + REGB_WR32(VPU_HW_BTRS_LNL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW); + u32 cmd = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, CMD, val); + u32 param1 = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, PARAM1, val); + + if (cmd != DCT_REQ) { + ivpu_err_ratelimited(vdev, "Unsupported PCODE command: 0x%x\n", cmd); + return -EBADR; + } + + switch (param1) { + case DCT_ENABLE: + *enable = true; + return 0; + case DCT_DISABLE: + *enable = false; + return 0; + default: + ivpu_err_ratelimited(vdev, "Invalid PARAM1 value: %u\n", param1); + return -EINVAL; + } +} + +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent) +{ + u32 val = 0; + u32 cmd = enable ? DCT_ENABLE : DCT_DISABLE; + + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, CMD, DCT_REQ, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM1, cmd, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); + + REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); +} + +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET); +} + +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE); +} + +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE); +} + +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); +} + +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); +} + +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, (u32)(~BTRS_MTL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, (u32)(~BTRS_LNL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); + } +} + +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } +} + +static void diagnose_failure_mtl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, reg)) + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, reg)) { + u32 log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, log)); + } +} + +static void diagnose_failure_lnl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, reg)) { + ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, reg)) + ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, reg)) + ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, reg)) + ivpu_err(vdev, "Survivability IRQ\n"); +} + +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return diagnose_failure_mtl(vdev); + else + return diagnose_failure_lnl(vdev); +} + +int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + return REG_GET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PLATFORM, reg); +} diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h new file mode 100644 index 000000000000..c4c10e22f30f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2025 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_H__ +#define __IVPU_HW_BTRS_H__ + +#include "ivpu_drv.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_reg_io.h" + +#define PLL_PROFILING_FREQ_DEFAULT 38400000 +#define PLL_PROFILING_FREQ_HIGH 400000000 + +#define DCT_DEFAULT_ACTIVE_PERCENT 30u +#define DCT_PERIOD_US 35300u + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev); +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev); +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable); +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev); +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u8 active_percent); +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev); +int ivpu_hw_btrs_platform_read(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_BTRS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h new file mode 100644 index 000000000000..a81a9ba540fa --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_LNL_REG_H__ +#define __IVPU_HW_BTRS_LNL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT 0x00000000u +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6) + +#define VPU_HW_BTRS_LNL_LOCAL_INT_MASK 0x00000004u +#define VPU_HW_BTRS_LNL_GLOBAL_INT_MASK 0x00000008u + +#define VPU_HW_BTRS_LNL_HM_ATS 0x0000000cu + +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG1 0x00000010u +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG2 0x00000014u +#define VPU_HW_BTRS_LNL_ATS_ERR_CLEAR 0x00000018u + +#define VPU_HW_BTRS_LNL_CFI0_ERR_LOG 0x0000001cu +#define VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR 0x00000020u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u + +#define VPU_HW_BTRS_LNL_CFI1_ERR_LOG 0x00000040u +#define VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR 0x00000044u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW 0x00000048u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH 0x0000004cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR 0x00000050u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS 0x00000054u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW 0x00000058u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH 0x0000005cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR 0x00000060u + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS 0x00000070u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW 0x00000074u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0 0x00000130u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1 0x00000134u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2 0x00000138u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_CMD 0x0000013cu +#define VPU_HW_BTRS_LNL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_PLL_FREQ 0x00000148u +#define VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_LNL_CDYN 0x0000014cu +#define VPU_HW_BTRS_LNL_CDYN_CDYN_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_LNL_TILE_FUSE 0x00000150u +#define VPU_HW_BTRS_LNL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_TILE_FUSE_CONFIG_MASK GENMASK(6, 1) + +#define VPU_HW_BTRS_LNL_VPU_STATUS 0x00000154u +#define VPU_HW_BTRS_LNL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_VPU_STATUS_IDLE_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) +#define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) +#define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) +#define VPU_HW_BTRS_LNL_VPU_STATUS_PLATFORM_MASK GENMASK(31, 29) + +#define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u +#define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_D0I3_CONTROL 0x00000164u +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET 0x00000168u +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE 0x0000016cu +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE 0x00000170u + +#define VPU_HW_BTRS_LNL_FMIN_FUSE 0x00000174u +#define VPU_HW_BTRS_LNL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_LNL_FMAX_FUSE 0x00000178u +#define VPU_HW_BTRS_LNL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#endif /* __IVPU_HW_BTRS_LNL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h new file mode 100644 index 000000000000..e93d539e066f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_MTL_REG_H__ +#define __IVPU_HW_BTRS_MTL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_MTL_INTERRUPT_TYPE 0x00000000u + +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT 0x00000004u +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0 0x00000008u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1 0x0000000cu +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2 0x00000010u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_WP_REQ_CMD 0x00000014u +#define VPU_HW_BTRS_MTL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD 0x00000018u +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_CURRENT_PLL 0x0000001cu +#define VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_PLL_ENABLE 0x00000020u + +#define VPU_HW_BTRS_MTL_FMIN_FUSE 0x00000024u +#define VPU_HW_BTRS_MTL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_MTL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_MTL_FMAX_FUSE 0x00000028u +#define VPU_HW_BTRS_MTL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#define VPU_HW_BTRS_MTL_TILE_FUSE 0x0000002cu +#define VPU_HW_BTRS_MTL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_TILE_FUSE_SKU_MASK GENMASK(3, 2) + +#define VPU_HW_BTRS_MTL_LOCAL_INT_MASK 0x00000030u +#define VPU_HW_BTRS_MTL_GLOBAL_INT_MASK 0x00000034u + +#define VPU_HW_BTRS_MTL_PLL_STATUS 0x00000040u +#define VPU_HW_BTRS_MTL_PLL_STATUS_LOCK_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_STATUS 0x00000044u +#define VPU_HW_BTRS_MTL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_STATUS_IDLE_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL 0x00000060u +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_VPU_IP_RESET 0x00000050u +#define VPU_HW_BTRS_MTL_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET 0x00000080u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE 0x00000084u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE 0x00000088u + +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_0 0x000000a0u +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_1 0x000000a4u +#define VPU_HW_BTRS_MTL_ATS_ERR_CLEAR 0x000000a8u + +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG 0x000000b0u +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) + +#define VPU_HW_BTRS_MTL_UFI_ERR_CLEAR 0x000000b4u + +#endif /* __IVPU_HW_BTRS_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c new file mode 100644 index 000000000000..06aa1e7dc50b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -0,0 +1,1199 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define ICB_0_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_37XX ((((u64)ICB_1_IRQ_MASK_37XX) << 32) | ICB_0_IRQ_MASK_37XX) + +#define ICB_0_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_40XX ((((u64)ICB_1_IRQ_MASK_40XX) << 32) | ICB_0_IRQ_MASK_40XX) + +#define ITF_FIREWALL_VIOLATION_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +#define ITF_FIREWALL_VIOLATION_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static int wait_for_ip_bar(struct ivpu_device *vdev) +{ + return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100); +} + +static void host_ss_rst_clr(struct ivpu_device *vdev) +{ + u32 val = 0; + + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val); + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val); +} + +static int host_ss_noc_qreqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qreqn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qreqn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qacceptn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qdeny_check_37xx(vdev, exp_val); + else + return host_ss_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_qrenqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qrenqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qrenqn_check_37xx(vdev, exp_val); + else + return top_noc_qrenqn_check_40xx(vdev, exp_val); +} + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev) +{ + int ret; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ret = wait_for_ip_bar(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU IP bar\n"); + return ret; + } + host_ss_rst_clr(vdev); + } + + ret = host_ss_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qreqn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qacceptn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check %d\n", ret); + + return ret; +} + +static void idle_gen_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, val); +} + +static void idle_gen_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val); +} + +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, true); + else + idle_gen_drive_40xx(vdev, true); +} + +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, false); + else + idle_gen_drive_40xx(vdev, false); +} + +static void +pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status) +{ + u32 val; + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val); + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, STATUS_DLY, status, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, val); +} + +static void pwr_island_trickle_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + pwr_island_trickle_drive_37xx(vdev, true); + ndelay(500); + pwr_island_drive_37xx(vdev, true); + } else { + pwr_island_trickle_drive_40xx(vdev, true); + ndelay(500); + pwr_island_drive_40xx(vdev, true); + } +} + +static int wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); + else + return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); +} + +static void pwr_island_isolation_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + pwr_island_isolation_drive_37xx(vdev, enable); + else + pwr_island_isolation_drive_40xx(vdev, enable); +} + +static void pwr_island_isolation_disable(struct ivpu_device *vdev) +{ + pwr_island_isolation_drive(vdev, false); +} + +static void host_ss_clk_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val); +} + +static void host_ss_clk_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val); +} + +static void host_ss_clk_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_clk_drive_37xx(vdev, enable); + else + host_ss_clk_drive_40xx(vdev, enable); +} + +static void host_ss_clk_enable(struct ivpu_device *vdev) +{ + host_ss_clk_drive(vdev, true); +} + +static void host_ss_rst_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val); +} + +static void host_ss_rst_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val); +} + +static void host_ss_rst_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_rst_drive_37xx(vdev, enable); + else + host_ss_rst_drive_40xx(vdev, enable); +} + +static void host_ss_rst_enable(struct ivpu_device *vdev) +{ + host_ss_rst_drive(vdev, true); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_noc_qreqn_top_socmmio_drive_37xx(vdev, enable); + else + host_ss_noc_qreqn_top_socmmio_drive_40xx(vdev, enable); +} + +static int host_ss_axi_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + host_ss_noc_qreqn_top_socmmio_drive(vdev, enable); + + ret = host_ss_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed HOST SS NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed HOST SS NOC QDENY check: %d\n", ret); + + return ret; +} + +static void top_noc_qreqn_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + top_noc_qreqn_drive_37xx(vdev, enable); + else + top_noc_qreqn_drive_40xx(vdev, enable); +} + +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev) +{ + return host_ss_axi_drive(vdev, true); +} + +static int top_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qacceptn_check_37xx(vdev, exp_val); + else + return top_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int top_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qdeny_check_37xx(vdev, exp_val); + else + return top_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + top_noc_qreqn_drive(vdev, enable); + + ret = top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = top_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed TOP NOC QDENY check: %d\n", ret); + + return ret; +} + +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev) +{ + return top_noc_drive(vdev, true); +} + +static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); +} + +static void pwr_island_delay_set(struct ivpu_device *vdev) +{ + bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH; + u32 post, post1, post2, status; + + if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX) + return; + + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_WCL: + case PCI_DEVICE_ID_PTL_P: + post = high ? 18 : 0; + post1 = 0; + post2 = 0; + status = high ? 46 : 3; + break; + + case PCI_DEVICE_ID_NVL: + post = high ? 198 : 17; + post1 = 0; + post2 = high ? 198 : 17; + status = 0; + break; + + default: + dump_stack(); + ivpu_err(vdev, "Unknown device ID\n"); + return; + } + + pwr_island_delay_set_50xx(vdev, post, post1, post2, status); +} + +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev) +{ + int ret; + + pwr_island_delay_set(vdev); + pwr_island_enable(vdev); + + ret = wait_for_pwr_island_status(vdev, 0x1); + if (ret) { + ivpu_err(vdev, "Timed out waiting for power island status\n"); + return ret; + } + + ret = top_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QREQN check %d\n", ret); + return ret; + } + + host_ss_clk_enable(vdev); + pwr_island_isolation_disable(vdev); + host_ss_rst_enable(vdev); + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + dpu_active_drive_37xx(vdev, true); + + return ret; +} + +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); + else + return REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); +} + +static void ivpu_hw_ip_snoop_disable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + else + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + + REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +static void ivpu_hw_ip_snoop_disable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + + REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_snoop_disable_37xx(vdev); + else + return ivpu_hw_ip_snoop_disable_40xx(vdev); +} + +static void ivpu_hw_ip_tbu_mmu_enable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val); +} + +static void ivpu_hw_ip_tbu_mmu_enable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val); +} + +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_tbu_mmu_enable_37xx(vdev); + else + return ivpu_hw_ip_tbu_mmu_enable_40xx(vdev); +} + +static int soc_cpu_boot_37xx(struct ivpu_device *vdev) +{ + u32 val; + + val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = vdev->fw->entry_point >> 9; + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); + + return 0; +} + +static int cpu_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int cpu_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static void cpu_noc_top_mmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val); +} + +static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + int ret; + + cpu_noc_top_mmio_drive_40xx(vdev, enable); + + ret = cpu_noc_qacceptn_check_40xx(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = cpu_noc_qdeny_check_40xx(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int soc_cpu_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_60XX) + return 0; + + return soc_cpu_drive_40xx(vdev, true); +} + +static int soc_cpu_boot_40xx(struct ivpu_device *vdev) +{ + int ret; + u32 val; + u64 val64; + + ret = soc_cpu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); + return ret; + } + + val64 = vdev->fw->entry_point; + val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; + REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); + + val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); + val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); + + return 0; +} + +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return soc_cpu_boot_37xx(vdev); + else + return soc_cpu_boot_40xx(vdev); +} + +static void wdt_disable_37xx(struct ivpu_device *vdev) +{ + u32 val; + + /* Enable writing and set non-zero WDT value */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + /* Enable writing and disable watchdog timer */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); + + /* Now clear the timeout interrupt */ + val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +static void wdt_disable_40xx(struct ivpu_device *vdev) +{ + u32 val; + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0); + + val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return wdt_disable_37xx(vdev); + else + return wdt_disable_40xx(vdev); +} + +static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev) +{ + u32 count = readl(vdev->regv + VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev) +{ + u32 count = readl(vdev->regv + VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ipc_rx_count_get_37xx(vdev); + else + return ipc_rx_count_get_40xx(vdev); +} + +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_37XX); + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_37XX); + } else { + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_40XX); + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_40XX); + } +} + +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0); + } else { + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul); + } +} + +static void diagnose_failure_37xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (ipc_rx_count_get_37xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +static void diagnose_failure_40xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (ipc_rx_count_get_40xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + diagnose_failure_37xx(vdev); + else + diagnose_failure_40xx(vdev); +} + +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_37XX); + else + REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_40XX); +} + +static void irq_wdt_nce_handler(struct ivpu_device *vdev) +{ + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); +} + +static void irq_wdt_mss_handler(struct ivpu_device *vdev) +{ + ivpu_hw_ip_wdt_disable(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); +} + +static void irq_noc_firewall_handler(struct ivpu_device *vdev) +{ + atomic_inc(&vdev->hw->firewall_irq_counter); + + ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n", + atomic_read(&vdev->hw->firewall_irq_counter)); +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (!status) + return false; + + REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (!status) + return false; + + REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +static void db_set_37xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +static void db_set_40xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + db_set_37xx(vdev, db_id); + else + db_set_40xx(vdev, db_id); +} + +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM); + else + return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM); +} + +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); + else + REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); +} diff --git a/drivers/accel/ivpu/ivpu_hw_ip.h b/drivers/accel/ivpu/ivpu_hw_ip.h new file mode 100644 index 000000000000..5b1b391aa577 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_IP_H__ +#define __IVPU_HW_IP_H__ + +#include "ivpu_drv.h" + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev); +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev); +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev); +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev); +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev); +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq); +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq); +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id); +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev); +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr); +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_enable_50xx(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_disable_50xx(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_IP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c deleted file mode 100644 index 62bfaa9081c4..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ /dev/null @@ -1,1084 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#include "ivpu_drv.h" -#include "ivpu_fw.h" -#include "ivpu_hw_mtl_reg.h" -#include "ivpu_hw_reg_io.h" -#include "ivpu_hw.h" -#include "ivpu_ipc.h" -#include "ivpu_mmu.h" -#include "ivpu_pm.h" - -#define TILE_FUSE_ENABLE_BOTH 0x0 -#define TILE_FUSE_ENABLE_UPPER 0x1 -#define TILE_FUSE_ENABLE_LOWER 0x2 - -#define TILE_SKU_BOTH_MTL 0x3630 -#define TILE_SKU_LOWER_MTL 0x3631 -#define TILE_SKU_UPPER_MTL 0x3632 - -/* Work point configuration values */ -#define WP_CONFIG_1_TILE_5_3_RATIO 0x0101 -#define WP_CONFIG_1_TILE_4_3_RATIO 0x0102 -#define WP_CONFIG_2_TILE_5_3_RATIO 0x0201 -#define WP_CONFIG_2_TILE_4_3_RATIO 0x0202 -#define WP_CONFIG_0_TILE_PLL_OFF 0x0000 - -#define PLL_REF_CLK_FREQ (50 * 1000000) -#define PLL_SIMULATION_FREQ (10 * 1000000) -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) -#define PLL_DEFAULT_EPP_VALUE 0x80 - -#define TIM_SAFE_ENABLE 0xf1d0dead -#define TIM_WATCHDOG_RESET_VALUE 0xffffffff - -#define TIMEOUT_US (150 * USEC_PER_MSEC) -#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) -#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC) - -#define ICB_0_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) - -#define ICB_1_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ - (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) - -#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) - -#define BUTTRESS_IRQ_MASK ((REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ - (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) - -#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) -#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) - -#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ - (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) - -static char *ivpu_platform_to_str(u32 platform) -{ - switch (platform) { - case IVPU_PLATFORM_SILICON: - return "IVPU_PLATFORM_SILICON"; - case IVPU_PLATFORM_SIMICS: - return "IVPU_PLATFORM_SIMICS"; - case IVPU_PLATFORM_FPGA: - return "IVPU_PLATFORM_FPGA"; - default: - return "Invalid platform"; - } -} - -static void ivpu_hw_read_platform(struct ivpu_device *vdev) -{ - u32 gen_ctrl = REGV_RD32(MTL_VPU_HOST_SS_GEN_CTRL); - u32 platform = REG_GET_FLD(MTL_VPU_HOST_SS_GEN_CTRL, PS, gen_ctrl); - - if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA) - vdev->platform = platform; - else - vdev->platform = IVPU_PLATFORM_SILICON; - - ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", - ivpu_platform_to_str(vdev->platform), vdev->platform); -} - -static void ivpu_hw_wa_init(struct ivpu_device *vdev) -{ - vdev->wa.punit_disabled = ivpu_is_fpga(vdev); - vdev->wa.clear_runtime_mem = false; -} - -static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) -{ - if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) { - vdev->timeout.boot = 100000; - vdev->timeout.jsm = 50000; - vdev->timeout.tdr = 2000000; - vdev->timeout.reschedule_suspend = 1000; - } else { - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - } -} - -static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); -} - -/* Send KMD initiated workpoint change */ -static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, - u16 target_ratio, u16 config) -{ - int ret; - u32 val; - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) { - ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); - return ret; - } - - val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD0); - val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); - val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); - REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD0, val); - - val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD1); - val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); - val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); - REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD1, val); - - val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD2); - val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); - REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD2, val); - - val = REGB_RD32(MTL_BUTTRESS_WP_REQ_CMD); - val = REG_SET_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, val); - REGB_WR32(MTL_BUTTRESS_WP_REQ_CMD, val); - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) - ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); - - return ret; -} - -static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) -{ - u32 exp_val = enable ? 0x1 : 0x0; - - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(MTL_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); -} - -static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) -{ - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(MTL_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); -} - -static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio; - u32 fmin_fuse, fmax_fuse; - - fmin_fuse = REGB_RD32(MTL_BUTTRESS_FMIN_FUSE); - fuse_min_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); - fuse_pn_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); - - fmax_fuse = REGB_RD32(MTL_BUTTRESS_FMAX_FUSE); - fuse_max_ratio = REG_GET_FLD(MTL_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); - - hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); - hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); - hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); -} - -static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) -{ - struct ivpu_hw_info *hw = vdev->hw; - u16 target_ratio; - u16 config; - int ret; - - if (IVPU_WA(punit_disabled)) { - ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n", - ivpu_platform_to_str(vdev->platform)); - return 0; - } - - if (enable) { - target_ratio = hw->pll.pn_ratio; - config = hw->config; - } else { - target_ratio = 0; - config = 0; - } - - ivpu_dbg(vdev, PM, "PLL workpoint request: %d Hz\n", PLL_RATIO_TO_FREQ(target_ratio)); - - ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); - if (ret) { - ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); - return ret; - } - - ret = ivpu_pll_wait_for_lock(vdev, enable); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL lock\n"); - return ret; - } - - if (enable) { - ret = ivpu_pll_wait_for_status_ready(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); - return ret; - } - } - - return 0; -} - -static int ivpu_pll_enable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, true); -} - -static int ivpu_pll_disable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, false); -} - -static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR); - - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, MSS_MAS, val); - - REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_CLR, val); -} - -static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_SET); - - if (enable) { - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } - - REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_SET, val); -} - -static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_CLK_SET); - - if (enable) { - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } - - REGV_WR32(MTL_VPU_HOST_SS_CPR_CLK_SET, val); -} - -static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev) -{ - ivpu_boot_host_ss_rst_clr_assert(vdev); - - return ivpu_boot_noc_qreqn_check(vdev, 0x0); -} - -static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev) -{ - REGV_WR32(MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN, 0x0); -} - -static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); - if (enable) - val = REG_SET_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - else - val = REG_CLR_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - REGV_WR32(MTL_VPU_HOST_SS_NOC_QREQN, val); - - ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, true); -} - -static int ivpu_boot_host_ss_axi_disable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, false); -} - -static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); - if (enable) { - val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } else { - val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } - REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val); - - ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, true); -} - -static int ivpu_boot_host_ss_top_noc_disable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, false); -} - -static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); - - if (enable) - val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - - REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); -} - -static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0); - - if (enable) - val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - - REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, val); -} - -static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) -{ - /* FPGA model (UPF) is not power aware, skipped Power Island polling */ - if (ivpu_is_fpga(vdev)) - return 0; - - return REGV_POLL_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, - exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); -} - -static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0); - - if (enable) - val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - - REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, val); -} - -static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE); - - if (enable) - val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - else - val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - - REGV_WR32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, val); -} - -static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) -{ - ivpu_boot_dpu_active_drive(vdev, false); - ivpu_boot_pwr_island_isolation_drive(vdev, true); - ivpu_boot_pwr_island_trickle_drive(vdev, false); - ivpu_boot_pwr_island_drive(vdev, false); - - return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); -} - -static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) -{ - int ret; - - ivpu_boot_pwr_island_trickle_drive(vdev, true); - ivpu_boot_pwr_island_drive(vdev, true); - - ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); - if (ret) { - ivpu_err(vdev, "Timed out waiting for power island status\n"); - return ret; - } - - ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qrenqn check %d\n", ret); - return ret; - } - - ivpu_boot_host_ss_clk_drive(vdev, true); - ivpu_boot_pwr_island_isolation_drive(vdev, false); - ivpu_boot_host_ss_rst_drive(vdev, true); - ivpu_boot_dpu_active_drive(vdev, true); - - return ret; -} - -static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES); - - val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); - - REGV_WR32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, val); -} - -static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(MTL_VPU_HOST_IF_TBU_MMUSSIDV); - - if (ivpu_is_fpga(vdev)) { - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - } else { - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_AWMMUSSIDV, val); - val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_ARMMUSSIDV, val); - } - - REGV_WR32(MTL_VPU_HOST_IF_TBU_MMUSSIDV, val); -} - -static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) -{ - u32 val; - - val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); - val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); - - val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = vdev->fw->entry_point >> 9; - REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); - - val = REG_SET_FLD(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, DONE, val); - REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); - - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); -} - -static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); - return ret; - } - - val = REGB_RD32(MTL_BUTTRESS_VPU_D0I3_CONTROL); - if (enable) - val = REG_SET_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - else - val = REG_CLR_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - REGB_WR32(MTL_BUTTRESS_VPU_D0I3_CONTROL, val); - - ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); - - return ret; -} - -static int ivpu_hw_mtl_info_init(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u32 tile_fuse; - - tile_fuse = REGB_RD32(MTL_BUTTRESS_TILE_FUSE); - if (!REG_TEST_FLD(MTL_BUTTRESS_TILE_FUSE, VALID, tile_fuse)) - ivpu_warn(vdev, "Tile Fuse: Invalid (0x%x)\n", tile_fuse); - - hw->tile_fuse = REG_GET_FLD(MTL_BUTTRESS_TILE_FUSE, SKU, tile_fuse); - switch (hw->tile_fuse) { - case TILE_FUSE_ENABLE_LOWER: - hw->sku = TILE_SKU_LOWER_MTL; - hw->config = WP_CONFIG_1_TILE_5_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Lower\n"); - break; - case TILE_FUSE_ENABLE_UPPER: - hw->sku = TILE_SKU_UPPER_MTL; - hw->config = WP_CONFIG_1_TILE_4_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Upper\n"); - break; - case TILE_FUSE_ENABLE_BOTH: - hw->sku = TILE_SKU_BOTH_MTL; - hw->config = WP_CONFIG_2_TILE_5_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Both\n"); - break; - default: - hw->config = WP_CONFIG_0_TILE_PLL_OFF; - ivpu_dbg(vdev, MISC, "Tile Fuse: Disable\n"); - break; - } - - ivpu_pll_init_frequency_ratios(vdev); - - ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M); - ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M); - ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M); - ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G); - hw->ranges.global_aliased_pio = hw->ranges.user_low; - - return 0; -} - -static int ivpu_hw_mtl_reset(struct ivpu_device *vdev) -{ - int ret; - u32 val; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); - return ret; - } - - val = REGB_RD32(MTL_BUTTRESS_VPU_IP_RESET); - val = REG_SET_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, val); - REGB_WR32(MTL_BUTTRESS_VPU_IP_RESET, val); - - ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); - - return ret; -} - -static int ivpu_hw_mtl_d0i3_enable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, true); - if (ret) - ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); - - udelay(5); /* VPU requires 5 us to complete the transition */ - - return ret; -} - -static int ivpu_hw_mtl_d0i3_disable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, false); - if (ret) - ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); - - return ret; -} - -static int ivpu_hw_mtl_power_up(struct ivpu_device *vdev) -{ - int ret; - - ivpu_hw_read_platform(vdev); - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - - ret = ivpu_hw_mtl_reset(vdev); - if (ret) - ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); - - ret = ivpu_hw_mtl_d0i3_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); - - ret = ivpu_pll_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_configure(vdev); - if (ret) { - ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); - return ret; - } - - /* - * The control circuitry for vpu_idle indication logic powers up active. - * To ensure unnecessary low power mode signal from LRT during bring up, - * KMD disables the circuitry prior to bringing up the Main Power island. - */ - ivpu_boot_vpu_idle_gen_disable(vdev); - - ret = ivpu_boot_pwr_domain_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_axi_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_top_noc_enable(vdev); - if (ret) - ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); - - return ret; -} - -static int ivpu_hw_mtl_boot_fw(struct ivpu_device *vdev) -{ - ivpu_boot_no_snoop_enable(vdev); - ivpu_boot_tbu_mmu_enable(vdev); - ivpu_boot_soc_cpu_boot(vdev); - - return 0; -} - -static bool ivpu_hw_mtl_is_idle(struct ivpu_device *vdev) -{ - u32 val; - - if (IVPU_WA(punit_disabled)) - return true; - - val = REGB_RD32(MTL_BUTTRESS_VPU_STATUS); - return REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, READY, val) && - REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, IDLE, val); -} - -static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) -{ - int ret = 0; - - /* FPGA requires manual clearing of IP_Reset bit by enabling quiescent state */ - if (ivpu_is_fpga(vdev)) { - if (ivpu_boot_host_ss_top_noc_disable(vdev)) { - ivpu_err(vdev, "Failed to disable TOP NOC\n"); - ret = -EIO; - } - - if (ivpu_boot_host_ss_axi_disable(vdev)) { - ivpu_err(vdev, "Failed to disable AXI\n"); - ret = -EIO; - } - } - - if (ivpu_boot_pwr_domain_disable(vdev)) { - ivpu_err(vdev, "Failed to disable power domain\n"); - ret = -EIO; - } - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - if (ivpu_hw_mtl_d0i3_enable(vdev)) - ivpu_warn(vdev, "Failed to enable D0I3\n"); - - return ret; -} - -static void ivpu_hw_mtl_wdt_disable(struct ivpu_device *vdev) -{ - u32 val; - - /* Enable writing and set non-zero WDT value */ - REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); - - /* Enable writing and disable watchdog timer */ - REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0); - - /* Now clear the timeout interrupt */ - val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val); -} - -/* Register indirect accesses */ -static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(MTL_BUTTRESS_CURRENT_PLL); - pll_curr_ratio &= MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -static u32 ivpu_hw_mtl_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_OFFSET); -} - -static u32 ivpu_hw_mtl_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_SIZE); -} - -static u32 ivpu_hw_mtl_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_ENABLE); -} - -static void ivpu_hw_mtl_reg_db_set(struct ivpu_device *vdev, u32 db_id) -{ - u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET); - - REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val); -} - -static u32 ivpu_hw_mtl_reg_ipc_rx_addr_get(struct ivpu_device *vdev) -{ - return REGV_RD32(MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM); -} - -static u32 ivpu_hw_mtl_reg_ipc_rx_count_get(struct ivpu_device *vdev) -{ - u32 count = REGV_RD32_SILENT(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT); - - return REG_GET_FLD(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); -} - -static void ivpu_hw_mtl_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) -{ - REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr); -} - -static void ivpu_hw_mtl_irq_clear(struct ivpu_device *vdev) -{ - REGV_WR64(MTL_VPU_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); -} - -static void ivpu_hw_mtl_irq_enable(struct ivpu_device *vdev) -{ - REGV_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); - REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); - REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); - REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); -} - -static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev) -{ - REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); - REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); - REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, 0x0ull); - REGB_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0); -} - -static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev) -{ - ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); - - ivpu_pm_schedule_recovery(vdev); -} - -static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev) -{ - ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); - - ivpu_hw_wdt_disable(vdev); - ivpu_pm_schedule_recovery(vdev); -} - -static void ivpu_hw_mtl_irq_noc_firewall_handler(struct ivpu_device *vdev) -{ - ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); - - ivpu_pm_schedule_recovery(vdev); -} - -/* Handler for IRQs from VPU core (irqV) */ -static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) -{ - u32 status = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - - REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) - ivpu_mmu_irq_evtq_handler(vdev); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) - ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) - ivpu_mmu_irq_gerr_handler(vdev); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) - ivpu_hw_mtl_irq_wdt_mss_handler(vdev); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) - ivpu_hw_mtl_irq_wdt_nce_handler(vdev); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) - ivpu_hw_mtl_irq_noc_firewall_handler(vdev); - - return status; -} - -/* Handler for IRQs from Buttress core (irqB) */ -static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) -{ - u32 status = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - bool schedule_recovery = false; - - if (status == 0) - return 0; - - /* Disable global interrupt before handling local buttress interrupts */ - REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); - - if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(MTL_BUTTRESS_CURRENT_PLL)); - - if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); - REGB_WR32(MTL_BUTTRESS_ATS_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { - u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - REGB_WR32(MTL_BUTTRESS_UFI_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - /* - * Clear local interrupt status by writing 0 to all bits. - * This must be done after interrupts are cleared at the source. - * Writing 1 triggers an interrupt, so we can't perform read update write. - */ - REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0); - - /* Re-enable global interrupt */ - REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); - - if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); - - return status; -} - -static irqreturn_t ivpu_hw_mtl_irq_handler(int irq, void *ptr) -{ - struct ivpu_device *vdev = ptr; - u32 ret_irqv, ret_irqb; - - ret_irqv = ivpu_hw_mtl_irqv_handler(vdev, irq); - ret_irqb = ivpu_hw_mtl_irqb_handler(vdev, irq); - - return IRQ_RETVAL(ret_irqb | ret_irqv); -} - -static void ivpu_hw_mtl_diagnose_failure(struct ivpu_device *vdev) -{ - u32 irqv = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - u32 irqb = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (ivpu_hw_mtl_reg_ipc_rx_count_get(vdev)) - ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) - ivpu_err(vdev, "WDT MSS timeout detected\n"); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) - ivpu_err(vdev, "WDT NCE timeout detected\n"); - - if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) - ivpu_err(vdev, "NOC Firewall irq detected\n"); - - if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); - - if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) { - u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - } -} - -const struct ivpu_hw_ops ivpu_hw_mtl_ops = { - .info_init = ivpu_hw_mtl_info_init, - .power_up = ivpu_hw_mtl_power_up, - .is_idle = ivpu_hw_mtl_is_idle, - .power_down = ivpu_hw_mtl_power_down, - .boot_fw = ivpu_hw_mtl_boot_fw, - .wdt_disable = ivpu_hw_mtl_wdt_disable, - .diagnose_failure = ivpu_hw_mtl_diagnose_failure, - .reg_pll_freq_get = ivpu_hw_mtl_reg_pll_freq_get, - .reg_telemetry_offset_get = ivpu_hw_mtl_reg_telemetry_offset_get, - .reg_telemetry_size_get = ivpu_hw_mtl_reg_telemetry_size_get, - .reg_telemetry_enable_get = ivpu_hw_mtl_reg_telemetry_enable_get, - .reg_db_set = ivpu_hw_mtl_reg_db_set, - .reg_ipc_rx_addr_get = ivpu_hw_mtl_reg_ipc_rx_addr_get, - .reg_ipc_rx_count_get = ivpu_hw_mtl_reg_ipc_rx_count_get, - .reg_ipc_tx_set = ivpu_hw_mtl_reg_ipc_tx_set, - .irq_clear = ivpu_hw_mtl_irq_clear, - .irq_enable = ivpu_hw_mtl_irq_enable, - .irq_disable = ivpu_hw_mtl_irq_disable, - .irq_handler = ivpu_hw_mtl_irq_handler, -}; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h deleted file mode 100644 index d83ccfd9a871..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h +++ /dev/null @@ -1,280 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#ifndef __IVPU_HW_MTL_REG_H__ -#define __IVPU_HW_MTL_REG_H__ - -#include <linux/bits.h> - -#define MTL_BUTTRESS_INTERRUPT_TYPE 0x00000000u - -#define MTL_BUTTRESS_INTERRUPT_STAT 0x00000004u -#define MTL_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) -#define MTL_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) -#define MTL_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) - -#define MTL_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u -#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) - -#define MTL_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu -#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) - -#define MTL_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u -#define MTL_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) - -#define MTL_BUTTRESS_WP_REQ_CMD 0x00000014u -#define MTL_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) - -#define MTL_BUTTRESS_WP_DOWNLOAD 0x00000018u -#define MTL_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) - -#define MTL_BUTTRESS_CURRENT_PLL 0x0000001cu -#define MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) - -#define MTL_BUTTRESS_PLL_ENABLE 0x00000020u - -#define MTL_BUTTRESS_FMIN_FUSE 0x00000024u -#define MTL_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define MTL_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) - -#define MTL_BUTTRESS_FMAX_FUSE 0x00000028u -#define MTL_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) - -#define MTL_BUTTRESS_TILE_FUSE 0x0000002cu -#define MTL_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define MTL_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) - -#define MTL_BUTTRESS_LOCAL_INT_MASK 0x00000030u -#define MTL_BUTTRESS_GLOBAL_INT_MASK 0x00000034u - -#define MTL_BUTTRESS_PLL_STATUS 0x00000040u -#define MTL_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) - -#define MTL_BUTTRESS_VPU_STATUS 0x00000044u -#define MTL_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) -#define MTL_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) - -#define MTL_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u -#define MTL_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define MTL_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) - -#define MTL_BUTTRESS_VPU_IP_RESET 0x00000050u -#define MTL_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) - -#define MTL_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u -#define MTL_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u -#define MTL_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u - -#define MTL_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u -#define MTL_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u -#define MTL_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u - -#define MTL_BUTTRESS_UFI_ERR_LOG 0x000000b0u -#define MTL_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) -#define MTL_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) -#define MTL_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) - -#define MTL_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u - -#define MTL_VPU_HOST_SS_CPR_CLK_SET 0x00000084u -#define MTL_VPU_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) -#define MTL_VPU_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK BIT_MASK(11) - -#define MTL_VPU_HOST_SS_CPR_RST_SET 0x00000094u -#define MTL_VPU_HOST_SS_CPR_RST_SET_TOP_NOC_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_CPR_RST_SET_DSS_MAS_MASK BIT_MASK(10) -#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) - -#define MTL_VPU_HOST_SS_CPR_RST_CLR 0x00000098u -#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) -#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) - -#define MTL_VPU_HOST_SS_HW_VERSION 0x00000108u -#define MTL_VPU_HOST_SS_HW_VERSION_SOC_REVISION_MASK GENMASK(7, 0) -#define MTL_VPU_HOST_SS_HW_VERSION_SOC_NUMBER_MASK GENMASK(15, 8) -#define MTL_VPU_HOST_SS_HW_VERSION_VPU_GENERATION_MASK GENMASK(23, 16) - -#define MTL_VPU_HOST_SS_GEN_CTRL 0x00000118u -#define MTL_VPU_HOST_SS_GEN_CTRL_PS_MASK GENMASK(31, 29) - -#define MTL_VPU_HOST_SS_NOC_QREQN 0x00000154u -#define MTL_VPU_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK BIT_MASK(0) - -#define MTL_VPU_HOST_SS_NOC_QACCEPTN 0x00000158u -#define MTL_VPU_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK BIT_MASK(0) - -#define MTL_VPU_HOST_SS_NOC_QDENY 0x0000015cu -#define MTL_VPU_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) - -#define MTL_VPU_TOP_NOC_QREQN 0x00000160u -#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) - -#define MTL_VPU_TOP_NOC_QACCEPTN 0x00000164u -#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) - -#define MTL_VPU_TOP_NOC_QDENY 0x00000168u -#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) - -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN 0x00000170u -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK BIT_MASK(2) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK BIT_MASK(3) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK BIT_MASK(4) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK BIT_MASK(5) -#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK BIT_MASK(6) - -#define MTL_VPU_HOST_SS_ICB_STATUS_0 0x00010210u -#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK BIT_MASK(0) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) -#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) - -#define MTL_VPU_HOST_SS_ICB_STATUS_1 0x00010214u -#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK BIT_MASK(0) -#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK BIT_MASK(1) -#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK BIT_MASK(2) - -#define MTL_VPU_HOST_SS_ICB_CLEAR_0 0x00010220u -#define MTL_VPU_HOST_SS_ICB_CLEAR_1 0x00010224u -#define MTL_VPU_HOST_SS_ICB_ENABLE_0 0x00010240u - -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM 0x000200f4u - -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT 0x000200fcu -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK GENMASK(7, 0) -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK GENMASK(15, 8) -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) -#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK GENMASK(31, 24) - -#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0 0x00030020u -#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK BIT_MASK(3) - -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) - -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) - -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu -#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK BIT_MASK(3) - -#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN 0x00030200u -#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK BIT_MASK(0) - -#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE 0x00030204u -#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) - -#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO 0x00041040u -#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK BIT_MASK(0) -#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) -#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) - -#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u -#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) -#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) - -#define MTL_VPU_HOST_MMU_IDR0 0x00200000u -#define MTL_VPU_HOST_MMU_IDR1 0x00200004u -#define MTL_VPU_HOST_MMU_IDR3 0x0020000cu -#define MTL_VPU_HOST_MMU_IDR5 0x00200014u -#define MTL_VPU_HOST_MMU_CR0 0x00200020u -#define MTL_VPU_HOST_MMU_CR0ACK 0x00200024u -#define MTL_VPU_HOST_MMU_CR1 0x00200028u -#define MTL_VPU_HOST_MMU_CR2 0x0020002cu -#define MTL_VPU_HOST_MMU_IRQ_CTRL 0x00200050u -#define MTL_VPU_HOST_MMU_IRQ_CTRLACK 0x00200054u - -#define MTL_VPU_HOST_MMU_GERROR 0x00200060u -#define MTL_VPU_HOST_MMU_GERROR_CMDQ_MASK BIT_MASK(0) -#define MTL_VPU_HOST_MMU_GERROR_EVTQ_ABT_MASK BIT_MASK(2) -#define MTL_VPU_HOST_MMU_GERROR_PRIQ_ABT_MASK BIT_MASK(3) -#define MTL_VPU_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) -#define MTL_VPU_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) -#define MTL_VPU_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) -#define MTL_VPU_HOST_MMU_GERROR_MSI_ABT_MASK BIT_MASK(7) - -#define MTL_VPU_HOST_MMU_GERRORN 0x00200064u - -#define MTL_VPU_HOST_MMU_STRTAB_BASE 0x00200080u -#define MTL_VPU_HOST_MMU_STRTAB_BASE_CFG 0x00200088u -#define MTL_VPU_HOST_MMU_CMDQ_BASE 0x00200090u -#define MTL_VPU_HOST_MMU_CMDQ_PROD 0x00200098u -#define MTL_VPU_HOST_MMU_CMDQ_CONS 0x0020009cu -#define MTL_VPU_HOST_MMU_EVTQ_BASE 0x002000a0u -#define MTL_VPU_HOST_MMU_EVTQ_PROD 0x002000a8u -#define MTL_VPU_HOST_MMU_EVTQ_CONS 0x002000acu -#define MTL_VPU_HOST_MMU_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) -#define MTL_VPU_HOST_MMU_EVTQ_CONS_SEC (0x002000acu + SZ_64K) - -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK BIT_MASK(3) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK BIT_MASK(4) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK BIT_MASK(5) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK GENMASK(10, 6) -#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK GENMASK(15, 11) - -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV 0x00360004u -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK BIT_MASK(0) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK BIT_MASK(1) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK BIT_MASK(2) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK BIT_MASK(3) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK BIT_MASK(4) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK BIT_MASK(5) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK BIT_MASK(6) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK BIT_MASK(7) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) -#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) - -#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE 0x04000000u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u - -#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) - -#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) - -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) - -#define MTL_VPU_CPU_SS_TIM_WATCHDOG 0x0602009cu -#define MTL_VPU_CPU_SS_TIM_WDOG_EN 0x060200a4u -#define MTL_VPU_CPU_SS_TIM_SAFE 0x060200a8u -#define MTL_VPU_CPU_SS_TIM_IPC_FIFO 0x060200f0u - -#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG 0x06021008u -#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) - -#define MTL_VPU_CPU_SS_DOORBELL_0 0x06300000u -#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) - -#define MTL_VPU_CPU_SS_DOORBELL_1 0x06301000u - -#endif /* __IVPU_HW_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h index 43c2c0c2d050..66259b0ead02 100644 --- a/drivers/accel/ivpu/ivpu_hw_reg_io.h +++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h @@ -7,6 +7,7 @@ #define __IVPU_HW_REG_IO_H__ #include <linux/bitfield.h> +#include <linux/fault-inject.h> #include <linux/io.h> #include <linux/iopoll.h> @@ -16,13 +17,11 @@ #define REG_IO_ERROR 0xffffffff #define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__) -#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg)) #define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__) #define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__) #define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__) #define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__) -#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg)) #define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__) #define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__) #define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__) @@ -47,23 +46,42 @@ #define REG_TEST_FLD_NUM(REG, FLD, num, val) \ ((num) == FIELD_GET(REG##_##FLD##_MASK, val)) -#define REGB_POLL(reg, var, cond, timeout_us) \ - read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) +#define REGB_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \ + ivpu_hw_reg_poll_fld(vdev, vdev->regb, reg, reg##_##fld##_MASK, \ + FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \ + __func__, #reg, #fld) -#define REGV_POLL(reg, var, cond, timeout_us) \ - read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) +#define REGV_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \ + ivpu_hw_reg_poll_fld(vdev, vdev->regv, reg, reg##_##fld##_MASK, \ + FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \ + __func__, #reg, #fld) -#define REGB_POLL_FLD(reg, fld, val, timeout_us) \ -({ \ - u32 var; \ - REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ -}) +extern struct fault_attr ivpu_hw_failure; -#define REGV_POLL_FLD(reg, fld, val, timeout_us) \ -({ \ - u32 var; \ - REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ -}) +static inline int __must_check +ivpu_hw_reg_poll_fld(struct ivpu_device *vdev, void __iomem *base, + u32 reg_offset, u32 reg_mask, u32 exp_masked_val, u32 timeout_us, + const char *func_name, const char *reg_name, const char *fld_name) +{ + u32 reg_val; + int ret; + + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s started (exp_val 0x%x)\n", + func_name, reg_name, reg_offset, fld_name, exp_masked_val); + + ret = read_poll_timeout(readl, reg_val, (reg_val & reg_mask) == exp_masked_val, + REG_POLL_SLEEP_US, timeout_us, false, base + reg_offset); + +#ifdef CONFIG_FAULT_INJECTION + if (should_fail(&ivpu_hw_failure, 1)) + ret = -ETIMEDOUT; +#endif + + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s %s (reg_val 0x%08x)\n", + func_name, reg_name, reg_offset, fld_name, ret ? "ETIMEDOUT" : "OK", reg_val); + + return ret; +} static inline u32 ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, @@ -71,7 +89,7 @@ ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, { u32 val = readl(base + reg); - ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%08x\n", func, name, reg, val); return val; } @@ -81,7 +99,7 @@ ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg, { u64 val = readq(base + reg); - ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%016llx\n", func, name, reg, val); return val; } @@ -89,7 +107,7 @@ static inline void ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val, const char *name, const char *func) { - ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%08x\n", func, name, reg, val); writel(val, base + reg); } @@ -97,7 +115,7 @@ static inline void ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val, const char *name, const char *func) { - ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%016llx\n", func, name, reg, val); writeq(val, base + reg); } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 3adcfa80fc0e..1f13bf95b2b3 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/genalloc.h> #include <linux/highmem.h> -#include <linux/kthread.h> +#include <linux/pm_runtime.h> #include <linux/wait.h> #include "ivpu_drv.h" @@ -15,21 +15,15 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" #define IPC_MAX_RX_MSG 128 -#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD) struct ivpu_ipc_tx_buf { struct ivpu_ipc_hdr ipc; struct vpu_jsm_msg jsm; }; -struct ivpu_ipc_rx_msg { - struct list_head link; - struct ivpu_ipc_hdr *ipc_hdr; - struct vpu_jsm_msg *jsm_msg; -}; - static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c, struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr) { @@ -45,8 +39,9 @@ static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c, u32 *payload = (u32 *)&jsm_msg->payload; ivpu_dbg(vdev, JSM, - "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", - c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result, + "%s: vpu:0x%08x (type:%s, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", + c, vpu_addr, ivpu_jsm_msg_type_to_str(jsm_msg->type), + jsm_msg->status, jsm_msg->request_id, jsm_msg->result, payload[0], payload[1], payload[2], payload[3], payload[4]); } @@ -64,8 +59,8 @@ static void ivpu_ipc_mem_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - ivpu_bo_free_internal(ipc->mem_rx); - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_rx); + ivpu_bo_free(ipc->mem_tx); } static int @@ -79,8 +74,8 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf)); if (!tx_buf_vpu_addr) { - ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n", - sizeof(*tx_buf)); + ivpu_err_ratelimited(vdev, "Failed to reserve IPC buffer, size %ld\n", + sizeof(*tx_buf)); return -ENOMEM; } @@ -93,12 +88,12 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm); if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE) - ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n", - tx_buf_vpu_addr); + ivpu_warn_ratelimited(vdev, "IPC message vpu:0x%x not released by firmware\n", + tx_buf_vpu_addr); if (tx_buf->jsm.status != VPU_JSM_MSG_FREE) - ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n", - jsm_vpu_addr); + ivpu_warn_ratelimited(vdev, "JSM message vpu:0x%x not released by firmware\n", + jsm_vpu_addr); memset(tx_buf, 0, sizeof(*tx_buf)); tx_buf->ipc.data_addr = jsm_vpu_addr; @@ -135,11 +130,51 @@ static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr) static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) { - ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); + ivpu_hw_ipc_tx_set(vdev, vpu_addr); +} + +static void +ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + + lockdep_assert_held(&ipc->cons_lock); + + rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); + if (!rx_msg) { + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + return; + } + + atomic_inc(&ipc->rx_msg_count); + + rx_msg->ipc_hdr = ipc_hdr; + rx_msg->jsm_msg = jsm_msg; + rx_msg->callback = cons->rx_callback; + + if (rx_msg->callback) { + list_add_tail(&rx_msg->link, &ipc->cb_msg_list); + } else { + spin_lock(&cons->rx_lock); + list_add_tail(&rx_msg->link, &cons->rx_msg_list); + spin_unlock(&cons->rx_lock); + wake_up(&cons->rx_msg_wq); + } +} + +static void +ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg) +{ + list_del(&rx_msg->link); + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&vdev->ipc->rx_msg_count); + kfree(rx_msg); } -void -ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel) +void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + u32 channel, ivpu_ipc_rx_callback_t rx_callback) { struct ivpu_ipc_info *ipc = vdev->ipc; @@ -147,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, cons->channel = channel; cons->tx_vpu_addr = 0; cons->request_id = 0; - spin_lock_init(&cons->rx_msg_lock); + cons->aborted = false; + cons->rx_callback = rx_callback; + spin_lock_init(&cons->rx_lock); INIT_LIST_HEAD(&cons->rx_msg_list); init_waitqueue_head(&cons->rx_msg_wq); - spin_lock_irq(&ipc->cons_list_lock); + spin_lock_irq(&ipc->cons_lock); list_add_tail(&cons->link, &ipc->cons_list); - spin_unlock_irq(&ipc->cons_list_lock); + spin_unlock_irq(&ipc->cons_lock); } void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons) @@ -161,31 +198,24 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg, *r; - spin_lock_irq(&ipc->cons_list_lock); + spin_lock_irq(&ipc->cons_lock); list_del(&cons->link); - spin_unlock_irq(&ipc->cons_list_lock); - - spin_lock_irq(&cons->rx_msg_lock); - list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) { - list_del(&rx_msg->link); - ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); - atomic_dec(&ipc->rx_msg_count); - kfree(rx_msg); - } - spin_unlock_irq(&cons->rx_msg_lock); + spin_unlock_irq(&ipc->cons_lock); + + spin_lock_irq(&cons->rx_lock); + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock_irq(&cons->rx_lock); ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); } -static int -ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) { struct ivpu_ipc_info *ipc = vdev->ipc; int ret; - ret = mutex_lock_interruptible(&ipc->lock); - if (ret) - return ret; + mutex_lock(&ipc->lock); if (!ipc->on) { ret = -EAGAIN; @@ -197,89 +227,100 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v goto unlock; ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + trace_jsm("[tx]", req); unlock: mutex_unlock(&ipc->lock); return ret; } +static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons) +{ + bool ret; + + spin_lock_irq(&cons->rx_lock); + ret = !list_empty(&cons->rx_msg_list) || cons->aborted; + spin_unlock_irq(&cons->rx_lock); + + return ret; +} + int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_buf, - struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms) + struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms) { - struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg; int wait_ret, ret = 0; - wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq, - (IS_KTHREAD() && kthread_should_stop()) || - !list_empty(&cons->rx_msg_list), - msecs_to_jiffies(timeout_ms)); + if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n")) + return -EINVAL; - if (IS_KTHREAD() && kthread_should_stop()) - return -EINTR; + wait_ret = wait_event_timeout(cons->rx_msg_wq, + ivpu_ipc_rx_need_wakeup(cons), + msecs_to_jiffies(timeout_ms)); if (wait_ret == 0) return -ETIMEDOUT; - if (wait_ret < 0) - return -ERESTARTSYS; - - spin_lock_irq(&cons->rx_msg_lock); + spin_lock_irq(&cons->rx_lock); + if (cons->aborted) { + spin_unlock_irq(&cons->rx_lock); + return -ECANCELED; + } rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); if (!rx_msg) { - spin_unlock_irq(&cons->rx_msg_lock); + spin_unlock_irq(&cons->rx_lock); return -EAGAIN; } - list_del(&rx_msg->link); - spin_unlock_irq(&cons->rx_msg_lock); if (ipc_buf) memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf)); if (rx_msg->jsm_msg) { - u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload)); + u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { - ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } - if (ipc_payload) - memcpy(ipc_payload, rx_msg->jsm_msg, size); + if (jsm_msg) + memcpy(jsm_msg, rx_msg->jsm_msg, size); + trace_jsm("[rx]", rx_msg->jsm_msg); } - ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); - atomic_dec(&ipc->rx_msg_count); - kfree(rx_msg); - + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock_irq(&cons->rx_lock); return ret; } -static int +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms) { struct ivpu_ipc_consumer cons; int ret; - ivpu_ipc_consumer_add(vdev, &cons, channel); + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) && + pm_runtime_enabled(vdev->drm.dev)); + + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ret = ivpu_ipc_send(vdev, &cons, req); if (ret) { - ivpu_warn(vdev, "IPC send failed: %d\n", ret); + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); goto consumer_del; } ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms); if (ret) { - ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret); + ivpu_warn_ratelimited(vdev, "IPC receive failed: type %s, ret %d\n", + ivpu_jsm_msg_type_to_str(req->type), ret); goto consumer_del; } if (resp->type != expected_resp_type) { - ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type); + ivpu_warn_ratelimited(vdev, "Invalid JSM response type: 0x%x\n", resp->type); ret = -EBADE; } @@ -289,9 +330,8 @@ consumer_del: } int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; @@ -301,24 +341,47 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, if (ret < 0) return ret; - ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp, - channel, timeout_ms); + ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) goto rpm_put; hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (hb_ret == -ETIMEDOUT) { - ivpu_hw_diagnose_failure(vdev); - ivpu_pm_schedule_recovery(vdev); - } + if (hb_ret == -ETIMEDOUT) + ivpu_pm_trigger_recovery(vdev, "IPC timeout"); rpm_put: ivpu_rpm_put(vdev); return ret; } +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms) +{ + struct ivpu_ipc_consumer cons; + int ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); + + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + msleep(timeout_ms); + +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); + ivpu_rpm_put(vdev); + return ret; +} + static bool ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) @@ -332,35 +395,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons return false; } -static void -ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) -{ - struct ivpu_ipc_info *ipc = vdev->ipc; - struct ivpu_ipc_rx_msg *rx_msg; - unsigned long flags; - - lockdep_assert_held(&ipc->cons_list_lock); - - rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); - if (!rx_msg) { - ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); - return; - } - - atomic_inc(&ipc->rx_msg_count); - - rx_msg->ipc_hdr = ipc_hdr; - rx_msg->jsm_msg = jsm_msg; - - spin_lock_irqsave(&cons->rx_msg_lock, flags); - list_add_tail(&rx_msg->link, &cons->rx_msg_list); - spin_unlock_irqrestore(&cons->rx_msg_lock, flags); - - wake_up(&cons->rx_msg_wq); -} - -int ivpu_ipc_irq_handler(struct ivpu_device *vdev) +void ivpu_ipc_irq_handler(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons; @@ -374,16 +409,16 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt. * Without purge IPC FIFO to 0 next IPC interrupts won't be generated. */ - while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { - vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); + while (ivpu_hw_ipc_rx_count_get(vdev)) { + vpu_addr = ivpu_hw_ipc_rx_addr_get(vdev); if (vpu_addr == REG_IO_ERROR) { - ivpu_err(vdev, "Failed to read IPC rx addr register\n"); - return -EIO; + ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n"); + return; } ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr); if (!ipc_hdr) { - ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr); + ivpu_warn_ratelimited(vdev, "IPC msg 0x%x out of range\n", vpu_addr); continue; } ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr); @@ -392,7 +427,8 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) { jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr); if (!jsm_msg) { - ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr); + ivpu_warn_ratelimited(vdev, "JSM msg 0x%x out of range\n", + ipc_hdr->data_addr); ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL); continue; } @@ -400,21 +436,22 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) } if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) { - ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG); + ivpu_warn_ratelimited(vdev, "IPC RX msg dropped, msg count %d\n", + IPC_MAX_RX_MSG); ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); continue; } dispatched = false; - spin_lock_irqsave(&ipc->cons_list_lock, flags); + spin_lock_irqsave(&ipc->cons_lock, flags); list_for_each_entry(cons, &ipc->cons_list, link) { if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) { - ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg); + ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg); dispatched = true; break; } } - spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + spin_unlock_irqrestore(&ipc->cons_lock, flags); if (!dispatched) { ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr); @@ -422,21 +459,45 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) } } - return 0; + queue_work(system_percpu_wq, &vdev->irq_ipc_work); +} + +void ivpu_ipc_irq_work_fn(struct work_struct *work) +{ + struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_ipc_work); + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg, *r; + struct list_head cb_msg_list; + + INIT_LIST_HEAD(&cb_msg_list); + + spin_lock_irq(&ipc->cons_lock); + list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list); + spin_unlock_irq(&ipc->cons_lock); + + list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) { + rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + ivpu_ipc_rx_msg_del(vdev, rx_msg); + } } int ivpu_ipc_init(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - int ret = -ENOMEM; + int ret; - ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); - if (!ipc->mem_tx) - return ret; + ipc->mem_tx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!ipc->mem_tx) { + ivpu_err(vdev, "Failed to allocate mem_tx\n"); + return -ENOMEM; + } - ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); - if (!ipc->mem_rx) + ipc->mem_rx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!ipc->mem_rx) { + ivpu_err(vdev, "Failed to allocate mem_rx\n"); + ret = -ENOMEM; goto err_free_tx; + } ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT), -1, "TX_IPC_JSM"); @@ -446,28 +507,38 @@ int ivpu_ipc_init(struct ivpu_device *vdev) goto err_free_rx; } - ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1); + ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ivpu_bo_size(ipc->mem_tx), -1); if (ret) { ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret); goto err_free_rx; } + spin_lock_init(&ipc->cons_lock); INIT_LIST_HEAD(&ipc->cons_list); - spin_lock_init(&ipc->cons_list_lock); - drmm_mutex_init(&vdev->drm, &ipc->lock); - + INIT_LIST_HEAD(&ipc->cb_msg_list); + ret = drmm_mutex_init(&vdev->drm, &ipc->lock); + if (ret) { + ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret); + goto err_free_rx; + } ivpu_ipc_reset(vdev); return 0; err_free_rx: - ivpu_bo_free_internal(ipc->mem_rx); + ivpu_bo_free(ipc->mem_rx); err_free_tx: - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_tx); return ret; } void ivpu_ipc_fini(struct ivpu_device *vdev) { + struct ivpu_ipc_info *ipc = vdev->ipc; + + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); + drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); + ivpu_ipc_mem_fini(vdev); } @@ -484,16 +555,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons, *c; - unsigned long flags; + struct ivpu_ipc_rx_msg *rx_msg, *r; + + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); mutex_lock(&ipc->lock); ipc->on = false; mutex_unlock(&ipc->lock); - spin_lock_irqsave(&ipc->cons_list_lock, flags); - list_for_each_entry_safe(cons, c, &ipc->cons_list, link) + spin_lock_irq(&ipc->cons_lock); + list_for_each_entry_safe(cons, c, &ipc->cons_list, link) { + spin_lock(&cons->rx_lock); + if (!cons->rx_callback) + cons->aborted = true; + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock(&cons->rx_lock); wake_up(&cons->rx_msg_wq); - spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + } + spin_unlock_irq(&ipc->cons_lock); + + drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); } void ivpu_ipc_reset(struct ivpu_device *vdev) @@ -501,9 +583,10 @@ void ivpu_ipc_reset(struct ivpu_device *vdev) struct ivpu_ipc_info *ipc = vdev->ipc; mutex_lock(&ipc->lock); + drm_WARN_ON(&vdev->drm, ipc->on); - memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size); - memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size); + memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx)); + memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx)); wmb(); /* Flush WC buffers for TX and RX rings */ mutex_unlock(&ipc->lock); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 9838202ecfad..b524a1985b9d 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_IPC_H__ @@ -21,7 +21,7 @@ struct ivpu_bo; #define IVPU_IPC_ALIGNMENT 64 #define IVPU_IPC_HDR_FREE 0 -#define IVPU_IPC_HDR_ALLOCATED 0 +#define IVPU_IPC_HDR_ALLOCATED 1 /** * struct ivpu_ipc_hdr - The IPC message header structure, exchanged @@ -42,13 +42,26 @@ struct ivpu_ipc_hdr { u8 status; } __packed __aligned(IVPU_IPC_ALIGNMENT); +typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev, + struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg); + +struct ivpu_ipc_rx_msg { + struct list_head link; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; + ivpu_ipc_rx_callback_t callback; +}; + struct ivpu_ipc_consumer { struct list_head link; u32 channel; u32 tx_vpu_addr; u32 request_id; + bool aborted; + ivpu_ipc_rx_callback_t rx_callback; - spinlock_t rx_msg_lock; /* Protects rx_msg_list */ + spinlock_t rx_lock; /* Protects rx_msg_list and aborted */ struct list_head rx_msg_list; wait_queue_head_t rx_msg_wq; }; @@ -60,8 +73,9 @@ struct ivpu_ipc_info { atomic_t rx_msg_count; - spinlock_t cons_list_lock; /* Protects cons_list */ + spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */ struct list_head cons_list; + struct list_head cb_msg_list; atomic_t request_id; struct mutex lock; /* Lock on status */ @@ -75,19 +89,25 @@ void ivpu_ipc_enable(struct ivpu_device *vdev); void ivpu_ipc_disable(struct ivpu_device *vdev); void ivpu_ipc_reset(struct ivpu_device *vdev); -int ivpu_ipc_irq_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_work_fn(struct work_struct *work); void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - u32 channel); + u32 channel, ivpu_ipc_rx_callback_t callback); void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct vpu_jsm_msg *req); int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload, + struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms); - +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms); + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 3276bd9107b4..4f8564e2878a 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -1,162 +1,388 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include <drm/drm_file.h> #include <linux/bitfield.h> #include <linux/highmem.h> -#include <linux/kthread.h> #include <linux/pci.h> +#include <linux/pm_runtime.h> #include <linux/module.h> #include <uapi/drm/ivpu_accel.h> #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" +#include "ivpu_mmu.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" #define CMD_BUF_IDX 0 -#define JOB_ID_JOB_MASK GENMASK(7, 0) -#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 -static unsigned int ivpu_tdr_timeout_ms; -module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); -MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); - static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) { - ivpu_hw_reg_db_set(vdev, cmdq->db_id); + ivpu_hw_db_set(vdev, cmdq->db_id); +} + +static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (ivpu_fw_preempt_buf_size(vdev) == 0) + return 0; + + cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, + vdev->fw->primary_preempt_buf_size, + DRM_IVPU_BO_WC); + if (!cmdq->primary_preempt_buf) { + ivpu_err(vdev, "Failed to create primary preemption buffer\n"); + return -ENOMEM; + } + + cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, + vdev->fw->secondary_preempt_buf_size, + DRM_IVPU_BO_WC); + if (!cmdq->secondary_preempt_buf) { + ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); + goto err_free_primary; + } + + return 0; + +err_free_primary: + ivpu_bo_free(cmdq->primary_preempt_buf); + cmdq->primary_preempt_buf = NULL; + return -ENOMEM; +} + +static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (cmdq->primary_preempt_buf) + ivpu_bo_free(cmdq->primary_preempt_buf); + if (cmdq->secondary_preempt_buf) + ivpu_bo_free(cmdq->secondary_preempt_buf); +} + +static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv, + struct ivpu_cmdq *cmdq, struct ivpu_job *job) +{ + int ret; + + /* Use preemption buffer provided by the user space */ + if (job->primary_preempt_buf) + return 0; + + if (!cmdq->primary_preempt_buf) { + /* Allocate per command queue preemption buffers */ + ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); + if (ret) + return ret; + } + + /* Use preemption buffers allocated by the kernel */ + job->primary_preempt_buf = cmdq->primary_preempt_buf; + job->secondary_preempt_buf = cmdq->secondary_preempt_buf; + + return 0; } -static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; - struct vpu_job_queue_header *jobq_header; struct ivpu_cmdq *cmdq; cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); if (!cmdq) return NULL; - cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); + cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!cmdq->mem) - goto cmdq_free; - - cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); - cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / - sizeof(struct vpu_job_queue_entry)); - - cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; - jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = engine; - jobq_header->head = 0; - jobq_header->tail = 0; - wmb(); /* Flush WC buffer for jobq->header */ + goto err_free_cmdq; return cmdq; -cmdq_free: +err_free_cmdq: kfree(cmdq); return NULL; } -static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +/** + * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue. + * @cmdq: Pointer to the command queue structure. + * + * Returns the number of entries that can fit in the command queue memory. + */ +static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) { - if (!cmdq) - return; + size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); + + return size / sizeof(struct vpu_job_queue_entry); +} + +/** + * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode. + * @vdev: Pointer to the ivpu device structure. + * @flags: Input flags to determine the command queue flags. + * + * Returns the calculated command queue flags, considering both the input flags + * and the current test mode settings. + */ +static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags) +{ + u32 cmdq_flags = 0; - ivpu_bo_free_internal(cmdq->mem); + if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX)) + cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + + /* Test mode can override the TURBO flag coming from the application */ + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE) + cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE) + cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + + return cmdq_flags; +} + +static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); + ivpu_bo_free(cmdq->mem); kfree(cmdq); } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags) { struct ivpu_device *vdev = file_priv->vdev; - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_cmdq *cmdq = NULL; int ret; lockdep_assert_held(&file_priv->lock); + cmdq = ivpu_cmdq_alloc(file_priv); if (!cmdq) { - cmdq = ivpu_cmdq_alloc(file_priv, engine); - if (!cmdq) - return NULL; - file_priv->cmdq[engine] = cmdq; + ivpu_err(vdev, "Failed to allocate command queue\n"); + return NULL; } + ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, + &file_priv->cmdq_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret); + goto err_free_cmdq; + } + + cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); + cmdq->priority = priority; - if (cmdq->db_registered) - return cmdq; + cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); + cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE; + cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags); - ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, - cmdq->mem->vpu_addr, cmdq->mem->base.size); + ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n", + cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags); + return cmdq; + +err_free_cmdq: + ivpu_cmdq_free(file_priv, cmdq); + return NULL; +} + +static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, + u8 priority) +{ + struct ivpu_device *vdev = file_priv->vdev; + int ret; + + ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, + task_pid_nr(current), engine, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (ret) - return NULL; + return ret; - cmdq->db_registered = true; + ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, + priority); + if (ret) + return ret; - return cmdq; + return 0; } -static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_device *vdev = file_priv->vdev; + int ret; + + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate doorbell ID: %d\n", ret); + return ret; + } + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + else + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + + if (!ret) { + ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n", + cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority); + } else { + xa_erase(&vdev->db_xa, cmdq->db_id); + cmdq->db_id = 0; + } + + return ret; +} + +static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq) +{ + jobq->header.head = 0; + jobq->header.tail = 0; + + wmb(); /* Flush WC buffer for jobq->header */ +} + +static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + struct ivpu_device *vdev = file_priv->vdev; + int ret; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - file_priv->cmdq[engine] = NULL; - if (cmdq->db_registered) - ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + if (cmdq->db_id) + return 0; + + ivpu_cmdq_jobq_reset(vdev, cmdq->jobq); - ivpu_cmdq_free(file_priv, cmdq); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority); + if (ret) + return ret; } + + ret = ivpu_register_db(file_priv, cmdq); + if (ret) + return ret; + + return 0; } -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - int i; + struct ivpu_device *vdev = file_priv->vdev; + int ret; - mutex_lock(&file_priv->lock); + lockdep_assert_held(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_release_locked(file_priv, i); + if (!cmdq->db_id) + return 0; - mutex_unlock(&file_priv->lock); + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); + if (!ret) + ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n", + cmdq->id, file_priv->ctx.id); + } + + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); + cmdq->db_id = 0; + + return 0; } -/* - * Mark the doorbell as unregistered and reset job queue pointers. - * This function needs to be called when the VPU hardware is restarted - * and FW looses job queue state. The next time job queue is used it - * will be registered again. - */ -static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +static inline u8 ivpu_job_to_jsm_priority(u8 priority) +{ + if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) + return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL; + + return priority - 1; +} + +static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + ivpu_cmdq_unregister(file_priv, cmdq); + xa_erase(&file_priv->cmdq_xa, cmdq->id); + ivpu_cmdq_free(file_priv, cmdq); +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority) +{ + struct ivpu_cmdq *cmdq; + unsigned long id; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - cmdq->db_registered = false; - cmdq->jobq->header.head = 0; - cmdq->jobq->header.tail = 0; - wmb(); /* Flush WC buffer for jobq header */ + xa_for_each(&file_priv->cmdq_xa, id, cmdq) + if (cmdq->is_legacy && cmdq->priority == priority) + break; + + if (!cmdq) { + cmdq = ivpu_cmdq_create(file_priv, priority, 0); + if (!cmdq) + return NULL; + cmdq->is_legacy = true; } + + return cmdq; } -static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id) { - int i; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq; + + lockdep_assert_held(&file_priv->lock); + + cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id); + if (!cmdq) { + ivpu_dbg(vdev, IOCTL, "Failed to find command queue with ID: %u\n", cmdq_id); + return NULL; + } + + return cmdq; +} + +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) +{ + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + + lockdep_assert_held(&file_priv->lock); + + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_destroy(file_priv, cmdq); +} + +/* + * Mark the doorbell as unregistered + * This function needs to be called when the VPU hardware is restarted + * and FW loses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) +{ + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; mutex_lock(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_reset_locked(file_priv, i); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); + cmdq->db_id = 0; + } mutex_unlock(&file_priv->lock); } @@ -166,15 +392,32 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) struct ivpu_file_priv *file_priv; unsigned long ctx_id; - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); - if (!file_priv) - continue; + mutex_lock(&vdev->context_list_lock); - ivpu_cmdq_reset_all(file_priv); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + ivpu_cmdq_reset(file_priv); - ivpu_file_priv_put(&file_priv); - } + mutex_unlock(&vdev->context_list_lock); +} + +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + + lockdep_assert_held(&file_priv->lock); + ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id); + + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_unregister(file_priv, cmdq); + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) + ivpu_jsm_context_release(vdev, file_priv->ctx.id); + + ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id); + + file_priv->aborted = true; } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) @@ -187,15 +430,28 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) /* Check if there is space left in job queue */ if (next_entry == header->head) { - ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", - job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", + job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); return -EBUSY; } - entry = &cmdq->jobq->job[tail]; + entry = &cmdq->jobq->slot[tail].job; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) + entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; + + if (job->primary_preempt_buf) { + entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr; + entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf); + } + + if (job->secondary_preempt_buf) { + entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr; + entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf); + } + wmb(); /* Ensure that tail is updated after filling entry */ header->tail = next_entry; wmb(); /* Flush WC buffer for jobq header */ @@ -246,190 +502,267 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) return &fence->base; } -static void job_get(struct ivpu_job *job, struct ivpu_job **link) +static void ivpu_job_destroy(struct ivpu_job *job) { struct ivpu_device *vdev = job->vdev; - - kref_get(&job->ref); - *link = job; - - ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); -} - -static void job_release(struct kref *ref) -{ - struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); - struct ivpu_device *vdev = job->vdev; u32 i; + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); + for (i = 0; i < job->bo_count; i++) if (job->bos[i]) - drm_gem_object_put(&job->bos[i]->base); + drm_gem_object_put(&job->bos[i]->base.base); dma_fence_put(job->done_fence); ivpu_file_priv_put(&job->file_priv); - - ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); kfree(job); - - /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */ - ivpu_rpm_put(vdev); -} - -static void job_put(struct ivpu_job *job) -{ - struct ivpu_device *vdev = job->vdev; - - ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); - kref_put(&job->ref, job_release); } static struct ivpu_job * -ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) { struct ivpu_device *vdev = file_priv->vdev; struct ivpu_job *job; - size_t buf_size; - int ret; - - ret = ivpu_rpm_get(vdev); - if (ret < 0) - return NULL; - buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); - job = kzalloc(buf_size, GFP_KERNEL); + job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL); if (!job) - goto err_rpm_put; - - kref_init(&job->ref); + return NULL; job->vdev = vdev; job->engine_idx = engine_idx; job->bo_count = bo_count; job->done_fence = ivpu_fence_create(vdev); if (!job->done_fence) { - ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); + ivpu_err(vdev, "Failed to create a fence\n"); goto err_free_job; } job->file_priv = ivpu_file_priv_get(file_priv); + trace_job("create", job); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); - return job; err_free_job: kfree(job); -err_rpm_put: - ivpu_rpm_put(vdev); return NULL; } -static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id) { struct ivpu_job *job; + lockdep_assert_held(&vdev->submitted_jobs_lock); + job = xa_erase(&vdev->submitted_jobs_xa, job_id); + if (xa_empty(&vdev->submitted_jobs_xa) && job) { + vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), + vdev->busy_time); + } + + return job; +} + +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + lockdep_assert_held(&vdev->submitted_jobs_lock); + + switch (job_status) { + case VPU_JSM_STATUS_PROCESSING_ERR: + case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX: + { + struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id); + + if (!job) + return false; + + /* Trigger an engine reset */ + guard(mutex)(&job->file_priv->lock); + + job->job_status = job_status; + + if (job->file_priv->has_mmu_faults) + return false; + + /* + * Mark context as faulty and defer destruction of the job to jobs abort thread + * handler to synchronize between both faults and jobs returning context violation + * status and ensure both are handled in the same way + */ + job->file_priv->has_mmu_faults = true; + queue_work(system_percpu_wq, &vdev->context_abort_work); + return true; + } + default: + /* Complete job with error status, engine reset not required */ + break; + } + + return false; +} + +static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + lockdep_assert_held(&vdev->submitted_jobs_lock); + + job = xa_load(&vdev->submitted_jobs_xa, job_id); if (!job) return -ENOENT; - if (job->file_priv->has_mmu_faults) - job_status = VPU_JSM_STATUS_ABORTED; + ivpu_job_remove_from_submitted_jobs(vdev, job_id); + + if (job->job_status == VPU_JSM_STATUS_SUCCESS) { + if (job->file_priv->has_mmu_faults) + job->job_status = DRM_IVPU_JOB_STATUS_ABORTED; + else + job->job_status = job_status; + } - job->bos[CMD_BUF_IDX]->job_status = job_status; + job->bos[CMD_BUF_IDX]->job_status = job->job_status; dma_fence_signal(job->done_fence); - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + trace_job("done", job); + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, + job->job_status); + + ivpu_job_destroy(job); + ivpu_stop_job_timeout_detection(vdev); + + ivpu_rpm_put(vdev); + + if (!xa_empty(&vdev->submitted_jobs_xa)) + ivpu_start_job_timeout_detection(vdev); - job_put(job); return 0; } -static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) +void ivpu_jobs_abort_all(struct ivpu_device *vdev) { - struct vpu_ipc_msg_payload_job_done *payload; - struct vpu_jsm_msg *job_ret_msg = msg; - int ret; + struct ivpu_job *job; + unsigned long id; - payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; + mutex_lock(&vdev->submitted_jobs_lock); - ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); - if (ret) - ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); + xa_for_each(&vdev->submitted_jobs_xa, id, job) + ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); + + mutex_unlock(&vdev->submitted_jobs_lock); } -void ivpu_jobs_abort_all(struct ivpu_device *vdev) +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) { struct ivpu_job *job; unsigned long id; + mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) - ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); + if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id) + ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); + + mutex_unlock(&vdev->submitted_jobs_lock); } -static int ivpu_direct_job_submission(struct ivpu_job *job) +static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; - struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; + bool is_first_job; int ret; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&vdev->submitted_jobs_lock); mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + if (cmdq_id == 0) + cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority); + else + cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id); if (!cmdq) { - ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", - file_priv->ctx.id, job->engine_idx); ret = -EINVAL; goto err_unlock; } - job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; + ret = ivpu_cmdq_register(file_priv, cmdq); + if (ret) { + ivpu_err(vdev, "Failed to register command queue: %d\n", ret); + goto err_unlock; + } - job_get(job, &job); - ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job); if (ret) { - ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); - goto err_job_put; + ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n", + job->job_id, ret); + goto err_unlock; + } + + job->cmdq_id = cmdq->id; + + is_first_job = xa_empty(&vdev->submitted_jobs_xa); + ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", + file_priv->ctx.id); + ret = -EBUSY; + goto err_unlock; } ret = ivpu_cmdq_push_job(cmdq, job); if (ret) - goto err_xa_erase; + goto err_erase_xa; - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); + ivpu_start_job_timeout_detection(vdev); - if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { - ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { cmdq->jobq->header.head = cmdq->jobq->header.tail; wmb(); /* Flush WC buffer for jobq header */ } else { ivpu_cmdq_ring_db(vdev, cmdq); + if (is_first_job) + vdev->busy_start_ts = ktime_get(); } + trace_job("submit", job); + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, + job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); + mutex_unlock(&file_priv->lock); + + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { + ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + } + + mutex_unlock(&vdev->submitted_jobs_lock); + return 0; -err_xa_erase: +err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_job_put: - job_put(job); err_unlock: mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); + ivpu_rpm_put(vdev); return ret; } static int ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, - u32 buf_count, u32 commands_offset) + u32 buf_count, u32 commands_offset, u32 preempt_buffer_index) { - struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = file_priv->vdev; struct ww_acquire_ctx acquire_ctx; + enum dma_resv_usage usage; struct ivpu_bo *bo; int ret; u32 i; @@ -437,46 +770,66 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 for (i = 0; i < buf_count; i++) { struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); - if (!obj) + if (!obj) { + ivpu_dbg(vdev, IOCTL, "Failed to lookup GEM object with handle %u\n", + buf_handles[i]); return -ENOENT; + } job->bos[i] = to_ivpu_bo(obj); - ret = ivpu_bo_pin(job->bos[i]); + ret = ivpu_bo_bind(job->bos[i]); if (ret) return ret; } bo = job->bos[CMD_BUF_IDX]; - if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { - ivpu_warn(vdev, "Buffer is already in use\n"); + if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) { + ivpu_dbg(vdev, IOCTL, "Buffer is already in use by another job\n"); return -EBUSY; } - if (commands_offset >= bo->base.size) { - ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); + if (commands_offset >= ivpu_bo_size(bo)) { + ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u for buffer size %zu\n", + commands_offset, ivpu_bo_size(bo)); return -EINVAL; } job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; + if (preempt_buffer_index) { + struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index]; + + if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) { + ivpu_dbg(vdev, IOCTL, "Preemption buffer is too small\n"); + return -EINVAL; + } + if (ivpu_bo_is_mappable(preempt_bo)) { + ivpu_dbg(vdev, IOCTL, "Preemption buffer cannot be mappable\n"); + return -EINVAL; + } + job->primary_preempt_buf = preempt_bo; + } + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); if (ret) { - ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to lock reservations: %d\n", ret); return ret; } for (i = 0; i < buf_count; i++) { - ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1); if (ret) { - ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to reserve fences: %d\n", ret); goto unlock_reservations; } } - for (i = 0; i < buf_count; i++) - dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + for (i = 0; i < buf_count; i++) { + usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP; + dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage); + } unlock_reservations: drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); @@ -486,129 +839,315 @@ unlock_reservations: return ret; } -int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id, + u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset, + u32 preempt_buffer_index, u8 priority) { - int ret = 0; - struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; - struct drm_ivpu_submit *params = data; struct ivpu_job *job; u32 *buf_handles; + int idx, ret; + + buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL); + if (!buf_handles) + return -ENOMEM; + + ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto err_free_handles; + } + + if (!drm_dev_enter(&vdev->drm, &idx)) { + ret = -ENODEV; + goto err_free_handles; + } + + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", + file_priv->ctx.id, cmdq_id, buffer_count); + + job = ivpu_job_create(file_priv, engine, buffer_count); + if (!job) { + ret = -ENOMEM; + goto err_exit_dev; + } + + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset, + preempt_buffer_index); + if (ret) + goto err_destroy_job; + + down_read(&vdev->pm->reset_lock); + ret = ivpu_job_submit(job, priority, cmdq_id); + up_read(&vdev->pm->reset_lock); + if (ret) + goto err_signal_fence; + + drm_dev_exit(idx); + kfree(buf_handles); + return ret; + +err_signal_fence: + dma_fence_signal(job->done_fence); +err_destroy_job: + ivpu_job_destroy(job); +err_exit_dev: + drm_dev_exit(idx); +err_free_handles: + kfree(buf_handles); + return ret; +} + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_submit *args = data; + u8 priority; + + if (args->engine != DRM_IVPU_ENGINE_COMPUTE) { + ivpu_dbg(vdev, IOCTL, "Invalid engine %d\n", args->engine); + return -EINVAL; + } - if (params->engine > DRM_IVPU_ENGINE_COPY) + if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { + ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); return -EINVAL; + } - if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) + if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { + ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); return -EINVAL; + } - if (!IS_ALIGNED(params->commands_offset, 8)) + if (!IS_ALIGNED(args->commands_offset, 8)) { + ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); return -EINVAL; + } - if (!file_priv->ctx.id) + if (!file_priv->ctx.id) { + ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); return -EINVAL; + } - if (file_priv->has_mmu_faults) + if (file_priv->has_mmu_faults) { + ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); return -EBADFD; + } - buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); - if (!buf_handles) - return -ENOMEM; + priority = ivpu_job_to_jsm_priority(args->priority); - ret = copy_from_user(buf_handles, - (void __user *)params->buffers_ptr, - params->buffer_count * sizeof(u32)); - if (ret) { - ret = -EFAULT; - goto free_handles; + return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine, + (void __user *)args->buffers_ptr, args->commands_offset, 0, priority); +} + +int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_cmdq_submit *args = data; + + if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { + ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); + return -ENODEV; } - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", - file_priv->ctx.id, params->buffer_count); + if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) { + ivpu_dbg(vdev, IOCTL, "Invalid command queue ID %u\n", args->cmdq_id); + return -EINVAL; + } - job = ivpu_create_job(file_priv, params->engine, params->buffer_count); - if (!job) { - ivpu_err(vdev, "Failed to create job\n"); - ret = -ENOMEM; - goto free_handles; + if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { + ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); + return -EINVAL; } - ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, - params->commands_offset); - if (ret) { - ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); - goto job_put; + if (args->preempt_buffer_index >= args->buffer_count) { + ivpu_dbg(vdev, IOCTL, "Invalid preemption buffer index %u\n", + args->preempt_buffer_index); + return -EINVAL; } - ret = ivpu_direct_job_submission(job); - if (ret) { - dma_fence_signal(job->done_fence); - ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); + if (!IS_ALIGNED(args->commands_offset, 8)) { + ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); + return -EINVAL; } -job_put: - job_put(job); -free_handles: - kfree(buf_handles); + if (!file_priv->ctx.id) { + ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); + return -EINVAL; + } - return ret; + if (file_priv->has_mmu_faults) { + ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); + return -EBADFD; + } + + return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE, + (void __user *)args->buffers_ptr, args->commands_offset, + args->preempt_buffer_index, 0); } -static int ivpu_job_done_thread(void *arg) +int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_device *vdev = (struct ivpu_device *)arg; - struct ivpu_ipc_consumer cons; - struct vpu_jsm_msg jsm_msg; - bool jobs_submitted; - unsigned int timeout; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_cmdq_create *args = data; + struct ivpu_cmdq *cmdq; int ret; - ivpu_dbg(vdev, JOB, "Started %s\n", __func__); - - ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); - - while (!kthread_should_stop()) { - timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; - jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); - ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); - if (!ret) { - ivpu_job_done_message(vdev, &jsm_msg); - } else if (ret == -ETIMEDOUT) { - if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { - ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); - ivpu_hw_diagnose_failure(vdev); - ivpu_pm_schedule_recovery(vdev); - } - } + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { + ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); + return -ENODEV; + } + + if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { + ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); + return -EINVAL; } - ivpu_ipc_consumer_del(vdev, &cons); + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; - ivpu_jobs_abort_all(vdev); + mutex_lock(&file_priv->lock); - ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); - return 0; + cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags); + if (cmdq) + args->cmdq_id = cmdq->id; + + mutex_unlock(&file_priv->lock); + + ivpu_rpm_put(vdev); + + return cmdq ? 0 : -ENOMEM; +} + +int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_cmdq_destroy *args = data; + struct ivpu_cmdq *cmdq; + u32 cmdq_id = 0; + int ret; + + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { + ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); + return -ENODEV; + } + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->lock); + + cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); + if (!cmdq || cmdq->is_legacy) { + ret = -ENOENT; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; + } + + mutex_unlock(&file_priv->lock); + + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + + return ret; } -int ivpu_job_done_thread_init(struct ivpu_device *vdev) +static void +ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg) { - struct task_struct *thread; + struct vpu_ipc_msg_payload_job_done *payload; - thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); - if (IS_ERR(thread)) { - ivpu_err(vdev, "Failed to start job completion thread\n"); - return -EIO; + if (!jsm_msg) { + ivpu_err(vdev, "IPC message has no JSM payload\n"); + return; + } + + if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { + ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result); + return; } - get_task_struct(thread); - wake_up_process(thread); + payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; - vdev->job_done_thread = thread; + mutex_lock(&vdev->submitted_jobs_lock); + if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status)) + /* No engine error, complete the job normally */ + ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); + mutex_unlock(&vdev->submitted_jobs_lock); +} - return 0; +void ivpu_job_done_consumer_init(struct ivpu_device *vdev) +{ + ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer, + VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback); +} + +void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) +{ + ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); } -void ivpu_job_done_thread_fini(struct ivpu_device *vdev) +void ivpu_context_abort_work_fn(struct work_struct *work) { - kthread_stop(vdev->job_done_thread); - put_task_struct(vdev->job_done_thread); + struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); + struct ivpu_file_priv *file_priv; + struct ivpu_job *job; + unsigned long ctx_id; + unsigned long id; + + if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0)) + return; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + if (ivpu_jsm_reset_engine(vdev, 0)) + goto runtime_put; + + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) + continue; + + mutex_lock(&file_priv->lock); + ivpu_context_abort_locked(file_priv); + mutex_unlock(&file_priv->lock); + } + mutex_unlock(&vdev->context_list_lock); + + /* + * We will not receive new MMU event interrupts until existing events are discarded + * however, we want to discard these events only after aborting the faulty context + * to avoid generating new faults from that context + */ + ivpu_mmu_discard_events(vdev); + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + goto runtime_put; + + if (ivpu_jsm_hws_resume_engine(vdev, 0)) + goto runtime_put; + /* + * In hardware scheduling mode NPU already has stopped processing jobs + * and won't send us any further notifications, thus we have to free job related resources + * and notify userspace + */ + mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) + if (job->file_priv->aborted) + ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); + mutex_unlock(&vdev->submitted_jobs_lock); + +runtime_put: + pm_runtime_put_autosuspend(vdev->drm.dev); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index aa1f0b9479b0..3ab61e6a5616 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_JOB_H__ @@ -15,52 +15,78 @@ struct ivpu_device; struct ivpu_file_priv; /** - * struct ivpu_cmdq - Object representing device queue used to send jobs. - * @jobq: Pointer to job queue memory shared with the device - * @mem: Memory allocated for the job queue, shared with device - * @entry_count Number of job entries in the queue - * @db_id: Doorbell assigned to this job queue - * @db_registered: True if doorbell is registered in device + * struct ivpu_cmdq - Represents a command queue for submitting jobs to the VPU. + * Tracks queue memory, preemption buffers, and metadata for job management. + * @jobq: Pointer to job queue memory shared with the device + * @primary_preempt_buf: Primary preemption buffer for this queue (optional) + * @secondary_preempt_buf: Secondary preemption buffer for this queue (optional) + * @mem: Memory allocated for the job queue, shared with device + * @entry_count: Number of job entries in the queue + * @id: Unique command queue ID + * @db_id: Doorbell ID assigned to this job queue + * @priority: Priority level of the command queue + * @is_legacy: True if this is a legacy command queue */ struct ivpu_cmdq { struct vpu_job_queue *jobq; + struct ivpu_bo *primary_preempt_buf; + struct ivpu_bo *secondary_preempt_buf; struct ivpu_bo *mem; u32 entry_count; + u32 id; u32 db_id; - bool db_registered; + u8 priority; + bool is_legacy; }; /** - * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. - * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. - * This is a unit of execution, and be tracked by the job_id for - * any status reporting from VPU FW through IPC JOB RET/DONE message. - * @file_priv: The client that submitted this job - * @job_id: Job ID for KMD tracking and job status reporting from VPU FW - * @status: Status of the Job from IPC JOB RET/DONE message - * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job - * @submit_status_offset: Offset within batch buffer where job completion handler - will update the job status + * struct ivpu_job - Representing a batch or DMA buffer submitted to the VPU. + * Each job is a unit of execution, tracked by job_id for status reporting from VPU FW. + * The structure holds all resources and metadata needed for job submission, execution, + * and completion handling. + * @vdev: Pointer to the VPU device + * @file_priv: The client context that submitted this job + * @done_fence: Fence signaled when job completes + * @cmd_buf_vpu_addr: VPU address of the command buffer for this job + * @cmdq_id: Command queue ID used for submission + * @job_id: Unique job ID for tracking and status reporting + * @engine_idx: Engine index for job execution + * @job_status: Status reported by firmware for this job + * @primary_preempt_buf: Primary preemption buffer for job + * @secondary_preempt_buf: Secondary preemption buffer for job (optional) + * @bo_count: Number of buffer objects associated with this job + * @bos: Array of buffer objects used by the job (batch buffer is at index 0) */ struct ivpu_job { - struct kref ref; struct ivpu_device *vdev; struct ivpu_file_priv *file_priv; struct dma_fence *done_fence; u64 cmd_buf_vpu_addr; + u32 cmdq_id; u32 job_id; u32 engine_idx; + u32 job_status; + struct ivpu_bo *primary_preempt_buf; + struct ivpu_bo *secondary_preempt_buf; size_t bo_count; - struct ivpu_bo *bos[]; + struct ivpu_bo *bos[] __counted_by(bo_count); }; int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv); + +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id); -int ivpu_job_done_thread_init(struct ivpu_device *vdev); -void ivpu_job_done_thread_fini(struct ivpu_device *vdev); +void ivpu_job_done_consumer_init(struct ivpu_device *vdev); +void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status); +void ivpu_context_abort_work_fn(struct work_struct *work); void ivpu_jobs_abort_all(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index af77dafac97e..0256b2dfefc1 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -1,11 +1,96 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include "ivpu_drv.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" +#include "vpu_jsm_api.h" + +const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) +{ + #define IVPU_CASE_TO_STR(x) case x: return #x + switch (type) { + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNKNOWN); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER); + IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE); + } + #undef IVPU_CASE_TO_STR + + return "Unknown JSM message type"; +} int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, u64 jobq_base, u32 jobq_size) @@ -21,14 +106,10 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) @@ -41,14 +122,10 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) @@ -57,7 +134,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.query_engine_hb.engine_idx = engine; @@ -65,7 +142,8 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret); + ivpu_err_ratelimited(vdev, "Failed to get heartbeat from engine %d: %d\n", + engine, ret); return ret; } @@ -79,15 +157,17 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_reset.engine_idx = engine; ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) - ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret); + ivpu_pm_trigger_recovery(vdev, "Engine reset failed"); + } return ret; } @@ -98,7 +178,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_preempt.engine_idx = engine; @@ -107,7 +187,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret); + ivpu_err_ratelimited(vdev, "Failed to preempt engine %d: %d\n", engine, ret); return ret; } @@ -118,13 +198,13 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size struct vpu_jsm_msg resp; int ret; - if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1)) - return -ENOMEM; + strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN); ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm); if (ret) - ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); + ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", + command, ret); return ret; } @@ -139,7 +219,7 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to get trace capability: %d\n", ret); return ret; } @@ -163,7 +243,320 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_warn(vdev, "Failed to set config: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to set config: %d\n", ret); + + return ret; +} + +int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.ssid_release.host_ssid = host_ssid; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to release context: %d\n", ret); + + return ret; +} + +int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER }; + struct vpu_jsm_msg resp; + int ret; + + if (IVPU_WA(disable_d0i3_msg)) + return 0; + + req.payload.pwr_d0i3_enter.send_response = 1; + + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp, + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg); + if (ret) + return ret; + + return ivpu_hw_wait_for_idle(vdev); +} + +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_CREATE_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_create_cmdq.host_ssid = ctx_id; + req.payload.hws_create_cmdq.process_id = pid; + req.payload.hws_create_cmdq.engine_idx = engine; + req.payload.hws_create_cmdq.cmdq_group = cmdq_group; + req.payload.hws_create_cmdq.cmdq_id = cmdq_id; + req.payload.hws_create_cmdq.cmdq_base = cmdq_base; + req.payload.hws_create_cmdq.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to create command queue: %d\n", ret); return ret; } + +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DESTROY_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_destroy_cmdq.host_ssid = ctx_id; + req.payload.hws_destroy_cmdq.cmdq_id = cmdq_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to destroy command queue: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_REGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.hws_register_db.db_id = db_id; + req.payload.hws_register_db.host_ssid = ctx_id; + req.payload.hws_register_db.cmdq_id = cmdq_id; + req.payload.hws_register_db.cmdq_base = cmdq_base; + req.payload.hws_register_db.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); + + return ret; +} + +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_ENGINE_RESUME }; + struct vpu_jsm_msg resp; + int ret; + + if (engine != VPU_ENGINE_COMPUTE) + return -EINVAL; + + req.payload.hws_resume_engine.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret); + ivpu_pm_trigger_recovery(vdev, "Engine resume failed"); + } + + return ret; +} + +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_context_sched_properties.host_ssid = ctx_id; + req.payload.hws_set_context_sched_properties.cmdq_id = cmdq_id; + req.payload.hws_set_context_sched_properties.priority_band = priority; + req.payload.hws_set_context_sched_properties.realtime_priority_level = 0; + req.payload.hws_set_context_sched_properties.in_process_priority = 0; + req.payload.hws_set_context_sched_properties.context_quantum = 20000; + req.payload.hws_set_context_sched_properties.grace_period_same_priority = 10000; + req.payload.hws_set_context_sched_properties.grace_period_lower_priority = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set context sched properties: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_scheduling_log.engine_idx = engine_idx; + req.payload.hws_set_scheduling_log.host_ssid = host_ssid; + req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va; + req.payload.hws_set_scheduling_log.notify_index = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set scheduling log: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP }; + struct vpu_jsm_msg resp; + struct ivpu_hw_info *hw = vdev->hw; + struct vpu_ipc_msg_payload_hws_priority_band_setup *setup = + &req.payload.hws_priority_band_setup; + int ret; + + for (int band = VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE; + band < VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT; band++) { + setup->grace_period[band] = hw->hws.grace_period[band]; + setup->process_grace_period[band] = hw->hws.process_grace_period[band]; + setup->process_quantum[band] = hw->hws.process_quantum[band]; + } + setup->normal_band_percentage = 10; + + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, + &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.sampling_rate = sampling_rate; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret); + return ret; + } + + return ret; +} + +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_update.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_update.buffer_addr = buffer_addr; + req.payload.metric_streamer_update.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret); + return ret; + } + + if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) { + ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n", + resp.payload.metric_streamer_done.bytes_written, buffer_size); + return -EOVERFLOW; + } + + *bytes_written = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret); + return ret; + } + + if (!resp.payload.metric_streamer_done.sample_size) { + ivpu_warn_ratelimited(vdev, "Invalid sample size\n"); + return -EBADMSG; + } + + if (sample_size) + *sample_size = resp.payload.metric_streamer_done.sample_size; + if (info_size) + *info_size = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_ENABLE }; + struct vpu_jsm_msg resp; + + req.payload.pwr_dct_control.dct_active_us = active_us; + req.payload.pwr_dct_control.dct_inactive_us = inactive_us; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_dct_disable(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE }; + struct vpu_jsm_msg resp; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_state_dump(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; + + return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.state_dump_msg); +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index 1a3e2e2740bd..9e84d3526a14 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_JSM_MSG_H__ @@ -8,6 +8,8 @@ #include "vpu_jsm_api.h" +const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type); + int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, u64 jobq_base, u32 jobq_size); int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id); @@ -19,5 +21,28 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati u64 *trace_hw_component_mask); int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, u64 trace_hw_component_mask); +int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid); +int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev); +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id); +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine); +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority); +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va); +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev); +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size); +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask); +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written); +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size); +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); +int ivpu_jsm_dct_disable(struct ivpu_device *vdev); +int ivpu_jsm_state_dump(struct ivpu_device *vdev); #endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 694e978aba66..e1baf6b64935 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -1,18 +1,61 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/circ_buf.h> #include <linux/highmem.h> #include "ivpu_drv.h" -#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw.h" #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" #include "ivpu_pm.h" +#define IVPU_MMU_REG_IDR0 0x00200000u +#define IVPU_MMU_REG_IDR1 0x00200004u +#define IVPU_MMU_REG_IDR3 0x0020000cu +#define IVPU_MMU_REG_IDR5 0x00200014u +#define IVPU_MMU_REG_CR0 0x00200020u +#define IVPU_MMU_REG_CR0ACK 0x00200024u +#define IVPU_MMU_REG_CR0ACK_VAL_MASK GENMASK(31, 0) +#define IVPU_MMU_REG_CR0_ATSCHK_MASK BIT(4) +#define IVPU_MMU_REG_CR0_CMDQEN_MASK BIT(3) +#define IVPU_MMU_REG_CR0_EVTQEN_MASK BIT(2) +#define IVPU_MMU_REG_CR0_PRIQEN_MASK BIT(1) +#define IVPU_MMU_REG_CR0_SMMUEN_MASK BIT(0) + +#define IVPU_MMU_REG_CR1 0x00200028u +#define IVPU_MMU_REG_CR2 0x0020002cu +#define IVPU_MMU_REG_IRQ_CTRL 0x00200050u +#define IVPU_MMU_REG_IRQ_CTRLACK 0x00200054u +#define IVPU_MMU_REG_IRQ_CTRLACK_VAL_MASK GENMASK(31, 0) + +#define IVPU_MMU_REG_GERROR 0x00200060u +#define IVPU_MMU_REG_GERROR_CMDQ_MASK BIT_MASK(0) +#define IVPU_MMU_REG_GERROR_EVTQ_ABT_MASK BIT_MASK(2) +#define IVPU_MMU_REG_GERROR_PRIQ_ABT_MASK BIT_MASK(3) +#define IVPU_MMU_REG_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) +#define IVPU_MMU_REG_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) +#define IVPU_MMU_REG_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) +#define IVPU_MMU_REG_GERROR_MSI_ABT_MASK BIT_MASK(7) + +#define IVPU_MMU_REG_GERRORN 0x00200064u + +#define IVPU_MMU_REG_STRTAB_BASE 0x00200080u +#define IVPU_MMU_REG_STRTAB_BASE_CFG 0x00200088u +#define IVPU_MMU_REG_CMDQ_BASE 0x00200090u +#define IVPU_MMU_REG_CMDQ_PROD 0x00200098u +#define IVPU_MMU_REG_CMDQ_CONS 0x0020009cu +#define IVPU_MMU_REG_CMDQ_CONS_VAL_MASK GENMASK(23, 0) +#define IVPU_MMU_REG_CMDQ_CONS_ERR_MASK GENMASK(30, 24) +#define IVPU_MMU_REG_EVTQ_BASE 0x002000a0u +#define IVPU_MMU_REG_EVTQ_PROD 0x002000a8u +#define IVPU_MMU_REG_EVTQ_CONS 0x002000acu +#define IVPU_MMU_REG_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) +#define IVPU_MMU_REG_EVTQ_CONS_SEC (0x002000acu + SZ_64K) + #define IVPU_MMU_IDR0_REF 0x080f3e0f #define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f #define IVPU_MMU_IDR1_REF 0x0e739d18 @@ -35,10 +78,10 @@ #define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */ #define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2) -#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1) -#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1) -#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) +#define IVPU_MMU_Q_WRAP_MASK GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0) +#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) #define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK) +#define IVPU_MMU_Q_WRP(val) ((val) & IVPU_MMU_Q_COUNT) #define IVPU_MMU_CMDQ_CMD_SIZE 16 #define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE) @@ -104,12 +147,6 @@ #define IVPU_MMU_IRQ_EVTQ_EN BIT(2) #define IVPU_MMU_IRQ_GERROR_EN BIT(0) -#define IVPU_MMU_CR0_ATSCHK BIT(4) -#define IVPU_MMU_CR0_CMDQEN BIT(3) -#define IVPU_MMU_CR0_EVTQEN BIT(2) -#define IVPU_MMU_CR0_PRIQEN BIT(1) -#define IVPU_MMU_CR0_SMMUEN BIT(0) - #define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10) #define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8) #define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6) @@ -143,6 +180,16 @@ #define IVPU_MMU_CD_0_ASET BIT(47) #define IVPU_MMU_CD_0_ASID GENMASK_ULL(63, 48) +#define IVPU_MMU_T0SZ_48BIT 16 +#define IVPU_MMU_T0SZ_38BIT 26 + +#define IVPU_MMU_IPS_48BIT 5 +#define IVPU_MMU_IPS_44BIT 4 +#define IVPU_MMU_IPS_42BIT 3 +#define IVPU_MMU_IPS_40BIT 2 +#define IVPU_MMU_IPS_36BIT 1 +#define IVPU_MMU_IPS_32BIT 0 + #define IVPU_MMU_CD_1_TTB0_MASK GENMASK_ULL(51, 4) #define IVPU_MMU_STE_0_S1CDMAX GENMASK_ULL(63, 59) @@ -176,15 +223,20 @@ #define IVPU_MMU_REG_TIMEOUT_US (10 * USEC_PER_MSEC) #define IVPU_MMU_QUEUE_TIMEOUT_US (100 * USEC_PER_MSEC) -#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \ - (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT))) +#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(IVPU_MMU_REG_GERROR, CMDQ)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT))) + +#define IVPU_MMU_CERROR_NONE 0x0 +#define IVPU_MMU_CERROR_ILL 0x1 +#define IVPU_MMU_CERROR_ABT 0x2 +#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3 -static char *ivpu_mmu_event_to_str(u32 cmd) +static const char *ivpu_mmu_event_to_str(u32 cmd) { switch (cmd) { case IVPU_MMU_EVT_F_UUT: @@ -226,7 +278,23 @@ static char *ivpu_mmu_event_to_str(u32 cmd) case IVPU_MMU_EVT_F_VMS_FETCH: return "Fetch of VMS caused external abort"; default: - return "Unknown CMDQ command"; + return "Unknown event"; + } +} + +static const char *ivpu_mmu_cmdq_err_to_str(u32 err) +{ + switch (err) { + case IVPU_MMU_CERROR_NONE: + return "No error"; + case IVPU_MMU_CERROR_ILL: + return "Illegal command"; + case IVPU_MMU_CERROR_ABT: + return "External abort on command queue read"; + case IVPU_MMU_CERROR_ATC_INV_SYNC: + return "Sync failed to complete ATS invalidation"; + default: + return "Unknown error"; } } @@ -240,15 +308,15 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev) else val_ref = IVPU_MMU_IDR0_REF; - val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0); + val = REGV_RD32(IVPU_MMU_REG_IDR0); if (val != val_ref) ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref); - val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1); + val = REGV_RD32(IVPU_MMU_REG_IDR1); if (val != IVPU_MMU_IDR1_REF) ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF); - val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3); + val = REGV_RD32(IVPU_MMU_REG_IDR3); if (val != IVPU_MMU_IDR3_REF) ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF); @@ -259,7 +327,7 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev) else val_ref = IVPU_MMU_IDR5_REF; - val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5); + val = REGV_RD32(IVPU_MMU_REG_IDR5); if (val != val_ref) ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref); } @@ -366,19 +434,18 @@ static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev) return ret; } -static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val) +static int ivpu_mmu_reg_write_cr0(struct ivpu_device *vdev, u32 val) { - u32 reg_ack = reg + 4; /* ACK register is 4B after base register */ - u32 val_ack; - int ret; + REGV_WR32(IVPU_MMU_REG_CR0, val); - REGV_WR32(reg, val); + return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); +} - ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Failed to write register 0x%x\n", reg); +static int ivpu_mmu_reg_write_irq_ctrl(struct ivpu_device *vdev, u32 val) +{ + REGV_WR32(IVPU_MMU_REG_IRQ_CTRL, val); - return ret; + return REGV_POLL_FLD(IVPU_MMU_REG_IRQ_CTRLACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); } static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) @@ -386,35 +453,54 @@ static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN; int ret; - ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0); + ret = ivpu_mmu_reg_write_irq_ctrl(vdev, 0); if (ret) return ret; - return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl); + return ivpu_mmu_reg_write_irq_ctrl(vdev, irq_ctrl); } static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) { struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + int ret; - return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons), - IVPU_MMU_QUEUE_TIMEOUT_US); + ret = REGV_POLL_FLD(IVPU_MMU_REG_CMDQ_CONS, VAL, cmdq->prod, + IVPU_MMU_QUEUE_TIMEOUT_US); + if (ret) + return ret; + + cmdq->cons = cmdq->prod; + + return 0; +} + +static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q) +{ + return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) && + (IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons))); +} + +static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q) +{ + return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) && + (IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons))); } static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) { - struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; - u64 *queue_buffer = q->base; - int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); + struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + u64 *queue_buffer = cmdq->base; + int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); - if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) { + if (ivpu_mmu_queue_is_full(cmdq)) { ivpu_err(vdev, "Failed to write MMU CMD %s\n", name); return -EBUSY; } queue_buffer[idx] = data0; queue_buffer[idx + 1] = data1; - q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK; + cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK; ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1); @@ -427,21 +513,27 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) u64 val; int ret; - val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf); + val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC); ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0); if (ret) return ret; - clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); - REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); + REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod); ret = ivpu_mmu_cmdq_wait_for_cons(vdev); - if (ret) - ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret); + if (ret) { + u32 err; + + val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS); + err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val); + + ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret, + ivpu_mmu_cmdq_err_to_str(err)); + ivpu_hw_diagnose_failure(vdev); + } return ret; } @@ -476,16 +568,16 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) int ret; memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); - clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); mmu->cmdq.prod = 0; mmu->cmdq.cons = 0; memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); - clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); mmu->evtq.prod = 0; mmu->evtq.cons = 0; - ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0); + ret = ivpu_mmu_reg_write_cr0(vdev, 0); if (ret) return ret; @@ -495,17 +587,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) | FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) | FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB); - REGV_WR32(MTL_VPU_HOST_MMU_CR1, val); + REGV_WR32(IVPU_MMU_REG_CR1, val); - REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q); - REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg); + REGV_WR64(IVPU_MMU_REG_STRTAB_BASE, mmu->strtab.dma_q); + REGV_WR32(IVPU_MMU_REG_STRTAB_BASE_CFG, mmu->strtab.base_cfg); - REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q); - REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0); - REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0); + REGV_WR64(IVPU_MMU_REG_CMDQ_BASE, mmu->cmdq.dma_q); + REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0); + REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0); - val = IVPU_MMU_CR0_CMDQEN; - ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + val = REG_SET_FLD(IVPU_MMU_REG_CR0, CMDQEN, 0); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -521,17 +613,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) if (ret) return ret; - REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q); - REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0); - REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0); + REGV_WR64(IVPU_MMU_REG_EVTQ_BASE, mmu->evtq.dma_q); + REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0); - val |= IVPU_MMU_CR0_EVTQEN; - ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; - val |= IVPU_MMU_CR0_ATSCHK; - ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + val = REG_SET_FLD(IVPU_MMU_REG_CR0, ATSCHK, val); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -539,8 +631,8 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) if (ret) return ret; - val |= IVPU_MMU_CR0_SMMUEN; - return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + val = REG_SET_FLD(IVPU_MMU_REG_CR0, SMMUEN, val); + return ivpu_mmu_reg_write_cr0(vdev, val); } static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) @@ -571,7 +663,8 @@ static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) WRITE_ONCE(entry[1], str[1]); WRITE_ONCE(entry[0], str[0]); - clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); } @@ -587,16 +680,11 @@ static int ivpu_mmu_strtab_init(struct ivpu_device *vdev) int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid) { struct ivpu_mmu_info *mmu = vdev->mmu; - int ret; - - ret = mutex_lock_interruptible(&mmu->lock); - if (ret) - return ret; + int ret = 0; - if (!mmu->on) { - ret = 0; + mutex_lock(&mmu->lock); + if (!mmu->on) goto unlock; - } ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid); if (ret) @@ -608,96 +696,71 @@ unlock: return ret; } -static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid) { struct ivpu_mmu_info *mmu = vdev->mmu; struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; u64 *entry; u64 cd[4]; - int ret; + int ret = 0; if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) return -EINVAL; entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); - - if (cd_dma != 0) { - cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) | - FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | - IVPU_MMU_CD_0_TCR_EPD1 | - IVPU_MMU_CD_0_AA64 | - IVPU_MMU_CD_0_R | - IVPU_MMU_CD_0_ASET | - IVPU_MMU_CD_0_V; - cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; - cd[2] = 0; - cd[3] = 0x0000000000007444; - - /* For global context generate memory fault on VPU */ - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - cd[0] |= IVPU_MMU_CD_0_A; - } else { - memset(cd, 0, sizeof(cd)); - } + drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid); + + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global and reserved contexts generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + + if (valid) + cd[0] |= IVPU_MMU_CD_0_V; WRITE_ONCE(entry[1], cd[1]); WRITE_ONCE(entry[2], cd[2]); WRITE_ONCE(entry[3], cd[3]); WRITE_ONCE(entry[0], cd[0]); - clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); - - ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); - ret = mutex_lock_interruptible(&mmu->lock); - if (ret) - return ret; + ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); - if (!mmu->on) { - ret = 0; + mutex_lock(&mmu->lock); + if (!mmu->on) goto unlock; - } ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); if (ret) - goto unlock; + goto err_invalidate; ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err_invalidate; unlock: mutex_unlock(&mmu->lock); - return ret; -} - -static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); - if (ret) - ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); - - return ret; -} - -static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) -{ - int ret; - - if (ssid == 0) { - ivpu_err(vdev, "Invalid SSID: %u\n", ssid); - return -EINVAL; - } - - ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); - if (ret) - ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + return 0; +err_invalidate: + WRITE_ONCE(entry[0], 0); + mutex_unlock(&mmu->lock); return ret; } @@ -708,20 +771,17 @@ int ivpu_mmu_init(struct ivpu_device *vdev) ivpu_dbg(vdev, MMU, "Init..\n"); - drmm_mutex_init(&vdev->drm, &mmu->lock); ivpu_mmu_config_check(vdev); - ret = ivpu_mmu_structs_alloc(vdev); + ret = drmm_mutex_init(&vdev->drm, &mmu->lock); if (ret) return ret; - ret = ivpu_mmu_strtab_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); + ret = ivpu_mmu_structs_alloc(vdev); + if (ret) return ret; - } - ret = ivpu_mmu_cd_add_gbl(vdev); + ret = ivpu_mmu_strtab_init(vdev); if (ret) { ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); return ret; @@ -791,8 +851,9 @@ static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) u64 in_addr = ((u64)event[5]) << 32 | event[4]; u32 sid = event[1]; - ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", - op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); + ivpu_err_ratelimited(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", + op, ivpu_mmu_event_to_str(op), ssid, sid, + event[2], event[3], in_addr, fetch_addr); } static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) @@ -801,38 +862,123 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) u32 idx = IVPU_MMU_Q_IDX(evtq->cons); u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); - evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC); - if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) + evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); + if (ivpu_mmu_queue_is_empty(evtq)) return NULL; - clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); - evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; - REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); - return evt; } +static int ivpu_mmu_evtq_set(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(IVPU_MMU_REG_CR0); + + if (enable) + val = REG_SET_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); + else + val = REG_CLR_FLD(IVPU_MMU_REG_CR0, EVTQEN, val); + REGV_WR32(IVPU_MMU_REG_CR0, val); + + return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); +} + +static int ivpu_mmu_evtq_enable(struct ivpu_device *vdev) +{ + return ivpu_mmu_evtq_set(vdev, true); +} + +static int ivpu_mmu_evtq_disable(struct ivpu_device *vdev) +{ + return ivpu_mmu_evtq_set(vdev, false); +} + +void ivpu_mmu_discard_events(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + + mutex_lock(&mmu->lock); + /* + * Disable event queue (stop MMU from updating the producer) + * to allow synchronization of consumer and producer indexes + */ + ivpu_mmu_evtq_disable(vdev); + + vdev->mmu->evtq.cons = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); + vdev->mmu->evtq.prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); + + ivpu_mmu_evtq_enable(vdev); + + drm_WARN_ON_ONCE(&vdev->drm, vdev->mmu->evtq.cons != vdev->mmu->evtq.prod); + + mutex_unlock(&mmu->lock); +} + +int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry; + u64 val; + + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) + return -EINVAL; + + mutex_lock(&mmu->lock); + + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); + + val = READ_ONCE(entry[0]); + val &= ~IVPU_MMU_CD_0_R; + WRITE_ONCE(entry[0], val); + + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + + ivpu_mmu_cmdq_write_cfgi_all(vdev); + ivpu_mmu_cmdq_sync(vdev); + + mutex_unlock(&mmu->lock); + + return 0; +} + void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) { - bool schedule_recovery = false; + struct ivpu_file_priv *file_priv; u32 *event; u32 ssid; ivpu_dbg(vdev, IRQ, "MMU event queue\n"); - while ((event = ivpu_mmu_get_event(vdev)) != NULL) { - ivpu_mmu_dump_event(vdev, event); - - ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - schedule_recovery = true; - else - ivpu_mmu_user_context_mark_invalid(vdev, ssid); + while ((event = ivpu_mmu_get_event(vdev))) { + ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, *event); + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID || + ssid == IVPU_RESERVED_CONTEXT_MMU_SSID) { + ivpu_mmu_dump_event(vdev, event); + ivpu_pm_trigger_recovery(vdev, "MMU event"); + return; + } + + file_priv = xa_load(&vdev->context_xa, ssid); + if (file_priv) { + if (!READ_ONCE(file_priv->has_mmu_faults)) { + ivpu_mmu_dump_event(vdev, event); + WRITE_ONCE(file_priv->has_mmu_faults, true); + } + } } - if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + queue_work(system_percpu_wq, &vdev->context_abort_work); +} + +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) +{ + u32 *event; + + while ((event = ivpu_mmu_get_event(vdev)) != NULL) + ivpu_mmu_dump_event(vdev, event); } void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) @@ -841,43 +987,43 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) ivpu_dbg(vdev, IRQ, "MMU error\n"); - gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR); - gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN); + gerror_val = REGV_RD32(IVPU_MMU_REG_GERROR); + gerrorn_val = REGV_RD32(IVPU_MMU_REG_GERRORN); active = gerror_val ^ gerrorn_val; if (!(active & IVPU_MMU_GERROR_ERR_MASK)) return; - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT, active)) ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT, active)) ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n"); - if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, CMDQ, active)) ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n"); - REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val); + REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val); } -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) { - return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); + return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true); } -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid) { - ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ + ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false); } diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index cb551126806b..1ce7529746ad 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -40,11 +40,14 @@ struct ivpu_mmu_info { int ivpu_mmu_init(struct ivpu_device *vdev); void ivpu_mmu_disable(struct ivpu_device *vdev); int ivpu_mmu_enable(struct ivpu_device *vdev); -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid); int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev); +void ivpu_mmu_discard_events(struct ivpu_device *vdev); +int ivpu_mmu_disable_ssid_events(struct ivpu_device *vdev, u32 ssid); #endif /* __IVPU_MMU_H__ */ diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 8ce9b12ac356..87ad593ef47d 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -5,27 +5,38 @@ #include <linux/bitfield.h> #include <linux/highmem.h> +#include <linux/set_memory.h> +#include <linux/vmalloc.h> + +#include <drm/drm_cache.h> #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" -#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) +#define IVPU_MMU_VPU_ADDRESS_MASK GENMASK(47, 12) +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39) +#define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30) #define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) #define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) -#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) +#define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0)) +#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52) #define IVPU_MMU_ENTRY_FLAG_NG BIT(11) #define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_RO BIT(7) #define IVPU_MMU_ENTRY_FLAG_USER BIT(6) #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) #define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) -#define IVPU_MMU_PAGE_SIZE SZ_4K -#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) -#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) -#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) +#define IVPU_MMU_PAGE_SIZE SZ_4K +#define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16) +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) +#define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE) +#define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE) +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) #define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 #define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) @@ -33,172 +44,378 @@ #define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) -static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma) { - dma_addr_t pgd_dma; - u64 *pgd; + dma_addr_t dma_addr; + struct page *page; + void *cpu; - pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); - if (!pgd) - return -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!page) + return NULL; - pgtable->pgd = pgd; - pgtable->pgd_dma = pgd_dma; + set_pages_array_wc(&page, 1); - return 0; + dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(vdev->drm.dev, dma_addr)) + goto err_free_page; + + cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (!cpu) + goto err_dma_unmap_page; + + + *dma = dma_addr; + return cpu; + +err_dma_unmap_page: + dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + +err_free_page: + put_page(page); + return NULL; +} + +static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr) +{ + struct page *page; + + if (cpu_addr) { + page = vmalloc_to_page(cpu_addr); + vunmap(cpu_addr); + dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE, + DMA_BIDIRECTIONAL); + set_pages_array_wb(&page, 1); + put_page(page); + } } -static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { - int pgd_index, pmd_index; + int pgd_idx, pud_idx, pmd_idx; + dma_addr_t pud_dma, pmd_dma, pte_dma; + u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr; - for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { - u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; - u64 *pmd = pgtable->pgd_entries[pgd_index]; + for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) { + pud_dma_ptr = pgtable->pud_ptrs[pgd_idx]; + pud_dma = pgtable->pgd_dma_ptr[pgd_idx]; - if (!pmd_entries) + if (!pud_dma_ptr) continue; - for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { - if (pmd_entries[pmd_index]) - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, - pmd_entries[pmd_index], - pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) { + pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx]; + pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx]; + + if (!pmd_dma_ptr) + continue; + + for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) { + pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx]; + pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx]; + + ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma); + } + + kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]); + ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma); } - kfree(pmd_entries); - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], - pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + kfree(pgtable->pmd_ptrs[pgd_idx]); + kfree(pgtable->pte_ptrs[pgd_idx]); + ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma); } - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, - pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); + ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); + pgtable->pgd_dma_ptr = NULL; + pgtable->pgd_dma = 0; +} + +static u64* +ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr; + dma_addr_t pgd_dma; + + if (pgd_dma_ptr) + return pgd_dma_ptr; + + pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); + if (!pgd_dma_ptr) + return NULL; + + pgtable->pgd_dma_ptr = pgd_dma_ptr; + pgtable->pgd_dma = pgd_dma; + + return pgd_dma_ptr; +} + +static u64* +ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx) +{ + u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx]; + dma_addr_t pud_dma; + + if (pud_dma_ptr) + return pud_dma_ptr; + + pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma); + if (!pud_dma_ptr) + return NULL; + + drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]); + pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pmd_ptrs[pgd_idx]) + goto err_free_pud_dma_ptr; + + drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]); + pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pte_ptrs[pgd_idx]) + goto err_free_pmd_ptrs; + + pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr; + pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID; + + return pud_dma_ptr; + +err_free_pmd_ptrs: + kfree(pgtable->pmd_ptrs[pgd_idx]); + +err_free_pud_dma_ptr: + ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma); + return NULL; } static u64* -ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx, + int pud_idx) { - u64 **pmd_entries; + u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx]; dma_addr_t pmd_dma; - u64 *pmd; - if (pgtable->pgd_entries[pgd_index]) - return pgtable->pgd_entries[pgd_index]; + if (pmd_dma_ptr) + return pmd_dma_ptr; - pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); - if (!pmd) + pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma); + if (!pmd_dma_ptr) return NULL; - pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); - if (!pmd_entries) - goto err_free_pgd; + drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]); + pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pte_ptrs[pgd_idx][pud_idx]) + goto err_free_pmd_dma_ptr; - pgtable->pgd_entries[pgd_index] = pmd; - pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; - pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; + pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr; + pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID; - return pmd; + return pmd_dma_ptr; -err_free_pgd: - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); +err_free_pmd_dma_ptr: + ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma); return NULL; } static u64* ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, - int pgd_index, int pmd_index) + int pgd_idx, int pud_idx, int pmd_idx) { + u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx]; dma_addr_t pte_dma; - u64 *pte; - if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) - return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; + if (pte_dma_ptr) + return pte_dma_ptr; - pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); - if (!pte) + pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma); + if (!pte_dma_ptr) return NULL; - pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; - pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; + pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr; + pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID; - return pte; + return pte_dma_ptr; } static int ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, dma_addr_t dma_addr, int prot) + u64 vpu_addr, dma_addr_t dma_addr, u64 prot) { u64 *pte; - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID); - /* Allocate PMD - second level page table if needed */ - if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) + /* Allocate PGD - first level page table if needed */ + if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable)) return -ENOMEM; - /* Allocate PTE - third level page table if needed */ - pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); + /* Allocate PUD - second level page table if needed */ + if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx)) + return -ENOMEM; + + /* Allocate PMD - third level page table if needed */ + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx)) + return -ENOMEM; + + /* Allocate PTE - fourth level page table if needed */ + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx); if (!pte) return -ENOMEM; - /* Update PTE - third level page table with DMA address */ - pte[pte_index] = dma_addr | prot; + /* Update PTE */ + pte[pte_idx] = dma_addr | prot; return 0; } -static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) +static int +ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + dma_addr_t dma_addr, u64 prot) { - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + size_t size = IVPU_MMU_CONT_PAGES_SIZE; - /* Update PTE with dummy physical address and clear flags */ - ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; -} - -static void -ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) -{ - u64 end_addr = vpu_addr + size; - u64 *pgd = ctx->pgtable.pgd; + drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size)); + drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size)); - /* Align to PMD entry (2 MB) */ - vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); + prot |= IVPU_MMU_ENTRY_FLAG_CONT; - while (vpu_addr < end_addr) { - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; - u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; + while (size) { + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); - while (vpu_addr < end_addr && vpu_addr < pmd_end) { - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; + if (ret) + return ret; - clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); - vpu_addr += IVPU_MMU_PTE_MAP_SIZE; - } - clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); + size -= IVPU_MMU_PAGE_SIZE; + vpu_addr += IVPU_MMU_PAGE_SIZE; + dma_addr += IVPU_MMU_PAGE_SIZE; } - clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); + + return 0; +} + +static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Update PTE with dummy physical address and clear flags */ + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID; } static int ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) + u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot) { + int map_size; + int ret; + while (size) { - int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE && + IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) { + ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot); + map_size = IVPU_MMU_CONT_PAGES_SIZE; + } else { + ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + map_size = IVPU_MMU_PAGE_SIZE; + } if (ret) return ret; + vpu_addr += map_size; + dma_addr += map_size; + size -= map_size; + } + + return 0; +} + +static void ivpu_mmu_context_set_page_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] |= IVPU_MMU_ENTRY_FLAG_RO; +} + +static void ivpu_mmu_context_split_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] &= ~IVPU_MMU_ENTRY_FLAG_CONT; +} + +static void ivpu_mmu_context_split_64k_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + u64 start = ALIGN_DOWN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 end = ALIGN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 offset = 0; + + ivpu_dbg(vdev, MMU_MAP, "Split 64K page ctx: %u vpu_addr: 0x%llx\n", ctx->id, vpu_addr); + + while (start + offset < end) { + ivpu_mmu_context_split_page(vdev, ctx, start + offset); + offset += IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + size_t size) +{ + u64 end = vpu_addr + size; + size_t size_left = size; + int ret; + + if (size == 0) + return 0; + + if (drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr | size, IVPU_MMU_PAGE_SIZE))) + return -EINVAL; + + mutex_lock(&ctx->lock); + + ivpu_dbg(vdev, MMU_MAP, "Set read-only pages ctx: %u vpu_addr: 0x%llx size: %lu\n", + ctx->id, vpu_addr, size); + + if (!ivpu_disable_mmu_cont_pages) { + /* Split 64K contiguous page at the beginning if needed */ + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr); + + /* Split 64K contiguous page at the end if needed */ + if (!IS_ALIGNED(vpu_addr + size, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr + size); + } + + while (size_left) { + if (vpu_addr < end) + ivpu_mmu_context_set_page_ro(vdev, ctx, vpu_addr); + vpu_addr += IVPU_MMU_PAGE_SIZE; - dma_addr += IVPU_MMU_PAGE_SIZE; - size -= IVPU_MMU_PAGE_SIZE; + size_left -= IVPU_MMU_PAGE_SIZE; } + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); + + mutex_unlock(&ctx->lock); + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + return 0; } @@ -213,48 +430,71 @@ static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_a int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only) { + size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; - int prot; int ret; + u64 prot; u64 i; + if (drm_WARN_ON(&vdev->drm, !ctx)) + return -EINVAL; + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) return -EINVAL; - /* - * VPU is only 32 bit, but DMA engine is 38 bit - * Ranges < 2 GB are reserved for VPU internal registers - * Limit range to 8 GB - */ - if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) + + if (vpu_addr & ~IVPU_MMU_VPU_ADDRESS_MASK) return -EINVAL; prot = IVPU_MMU_ENTRY_MAPPED; if (llc_coherent) prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; + if (read_only) + prot |= IVPU_MMU_ENTRY_FLAG_RO; mutex_lock(&ctx->lock); for_each_sgtable_dma_sg(sgt, sg, i) { - u64 dma_addr = sg_dma_address(sg) - sg->offset; + dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); - mutex_unlock(&ctx->lock); - return ret; + goto err_unmap_pages; } - ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); vpu_addr += size; } - mutex_unlock(&ctx->lock); + if (!ctx->is_cd_valid) { + ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n", + ctx->id, ret); + goto err_unmap_pages; + } + ctx->is_cd_valid = true; + } + + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + goto err_unmap_pages; + } + + mutex_unlock(&ctx->lock); + return 0; + +err_unmap_pages: + ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr); + mutex_unlock(&ctx->lock); return ret; } @@ -266,133 +506,135 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct int ret; u64 i; - if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) - ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); + if (drm_WARN_ON(&vdev->drm, !ctx)) + return; mutex_lock(&ctx->lock); for_each_sgtable_dma_sg(sgt, sg, i) { + dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); - ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); vpu_addr += size; } + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); + mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); if (ret) - ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + ivpu_warn_ratelimited(vdev, "Failed to invalidate TLB for ctx %u: %d\n", + ctx->id, ret); } int -ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, - const struct ivpu_addr_range *range, - u64 size, struct drm_mm_node *node) +ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node) { - lockdep_assert_held(&ctx->lock); + int ret; + + WARN_ON(!range); - return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, - 0, range->start, range->end, DRM_MM_INSERT_BEST); + mutex_lock(&ctx->lock); + if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) { + ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST); + if (!ret) + goto unlock; + } + + ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST); +unlock: + mutex_unlock(&ctx->lock); + return ret; } void -ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) +ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) { - lockdep_assert_held(&ctx->lock); - + mutex_lock(&ctx->lock); drm_mm_remove_node(node); + mutex_unlock(&ctx->lock); } -static int -ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) { u64 start, end; - int ret; mutex_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->bo_list); - - ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); - if (ret) - return ret; if (!context_id) { - start = vdev->hw->ranges.global_low.start; - end = vdev->hw->ranges.global_high.end; + start = vdev->hw->ranges.runtime.start; + end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user_low.start; - end = vdev->hw->ranges.user_high.end; + start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start); + end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end); } drm_mm_init(&ctx->mm, start, end - start); ctx->id = context_id; - - return 0; } -static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); + if (ctx->is_cd_valid) { + ivpu_mmu_cd_clear(vdev, ctx->id); + ctx->is_cd_valid = false; + } mutex_destroy(&ctx->lock); - ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); + ivpu_mmu_pgtables_free(vdev, &ctx->pgtable); drm_mm_takedown(&ctx->mm); } -int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +void ivpu_mmu_global_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); + ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); } void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_context_fini(vdev, &vdev->gctx); + ivpu_mmu_context_fini(vdev, &vdev->gctx); } -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) -{ - struct ivpu_file_priv *file_priv; - - xa_lock(&vdev->context_xa); - - file_priv = xa_load(&vdev->context_xa, ssid); - if (file_priv) - file_priv->has_mmu_faults = true; - - xa_unlock(&vdev->context_xa); -} - -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) +int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) { int ret; - drm_WARN_ON(&vdev->drm, !ctx_id); + ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); - ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); - if (ret) { - ivpu_err(vdev, "Failed to initialize context: %d\n", ret); - return ret; + mutex_lock(&vdev->rctx.lock); + + if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) { + ivpu_err(vdev, "Failed to allocate root page table for reserved context\n"); + ret = -ENOMEM; + goto err_ctx_fini; } - ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); + ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable); if (ret) { - ivpu_err(vdev, "Failed to set page table: %d\n", ret); - goto err_context_fini; + ivpu_err(vdev, "Failed to set context descriptor for reserved context\n"); + goto err_ctx_fini; } - return 0; + mutex_unlock(&vdev->rctx.lock); + return ret; -err_context_fini: - ivpu_mmu_context_fini(vdev, ctx); +err_ctx_fini: + mutex_unlock(&vdev->rctx.lock); + ivpu_mmu_context_fini(vdev, &vdev->rctx); return ret; } -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) { - drm_WARN_ON(&vdev->drm, !ctx->id); - - ivpu_mmu_clear_pgtable(vdev, ctx->id); - ivpu_mmu_context_fini(vdev, ctx); + ivpu_mmu_cd_clear(vdev, vdev->rctx.id); + ivpu_mmu_context_fini(vdev, &vdev->rctx); } diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index ddf11b95023a..663a11a9db11 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -12,39 +12,40 @@ struct ivpu_device; struct ivpu_file_priv; struct ivpu_addr_range; -#define IVPU_MMU_PGTABLE_ENTRIES 512 +#define IVPU_MMU_PGTABLE_ENTRIES 512ull struct ivpu_mmu_pgtable { - u64 **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES]; - u64 *pgd_entries[IVPU_MMU_PGTABLE_ENTRIES]; - u64 *pgd; + u64 ***pte_ptrs[IVPU_MMU_PGTABLE_ENTRIES]; + u64 **pmd_ptrs[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pud_ptrs[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pgd_dma_ptr; dma_addr_t pgd_dma; }; struct ivpu_mmu_context { - struct mutex lock; /* protects: mm, pgtable, bo_list */ + struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */ struct drm_mm mm; struct ivpu_mmu_pgtable pgtable; - struct list_head bo_list; + bool is_cd_valid; u32 id; }; -int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id); +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_global_context_init(struct ivpu_device *vdev); void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); +int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); +void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); - -int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, - const struct ivpu_addr_range *range, - u64 size, struct drm_mm_node *node); -void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, - struct drm_mm_node *node); +int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node); +void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node); int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only); void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt); +int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, size_t size); #endif /* __IVPU_MMU_CONTEXT_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c new file mode 100644 index 000000000000..1d9c1cb17924 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <drm/drm_file.h> +#include <linux/pm_runtime.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ms.h" +#include "ivpu_pm.h" + +#define MS_INFO_BUFFER_SIZE SZ_64K +#define MS_NUM_BUFFERS 2 +#define MS_READ_PERIOD_MULTIPLIER 2 +#define MS_MIN_SAMPLE_PERIOD_NS 1000000 + +static struct ivpu_ms_instance * +get_instance_by_mask(struct ivpu_file_priv *file_priv, u64 metric_mask) +{ + struct ivpu_ms_instance *ms; + + lockdep_assert_held(&file_priv->ms_lock); + + list_for_each_entry(ms, &file_priv->ms_instance_list, ms_instance_node) + if (ms->mask == metric_mask) + return ms; + + return NULL; +} + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_start *args = data; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u32 sample_size; + u64 buf_size; + int ret; + + if (!args->metric_group_mask || !args->read_period_samples || + args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->ms_lock); + + if (get_instance_by_mask(file_priv, args->metric_group_mask)) { + ivpu_dbg(vdev, IOCTL, "Instance already exists (mask %#llx)\n", + args->metric_group_mask); + ret = -EALREADY; + goto unlock; + } + + ms = kzalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) { + ret = -ENOMEM; + goto unlock; + } + + ms->mask = args->metric_group_mask; + + ret = ivpu_jsm_metric_streamer_info(vdev, ms->mask, 0, 0, &sample_size, NULL); + if (ret) + goto err_free_ms; + + buf_size = PAGE_ALIGN((u64)args->read_period_samples * sample_size * + MS_READ_PERIOD_MULTIPLIER * MS_NUM_BUFFERS); + if (buf_size > ivpu_hw_range_size(&vdev->hw->ranges.global)) { + ivpu_dbg(vdev, IOCTL, "Requested MS buffer size %llu exceeds range size %llu\n", + buf_size, ivpu_hw_range_size(&vdev->hw->ranges.global)); + ret = -EINVAL; + goto err_free_ms; + } + + ms->bo = ivpu_bo_create_global(vdev, buf_size, DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!ms->bo) { + ivpu_dbg(vdev, IOCTL, "Failed to allocate MS buffer (size %llu)\n", buf_size); + ret = -ENOMEM; + goto err_free_ms; + } + + ms->buff_size = ivpu_bo_size(ms->bo) / MS_NUM_BUFFERS; + ms->active_buff_vpu_addr = ms->bo->vpu_addr; + ms->inactive_buff_vpu_addr = ms->bo->vpu_addr + ms->buff_size; + ms->active_buff_ptr = ivpu_bo_vaddr(ms->bo); + ms->inactive_buff_ptr = ivpu_bo_vaddr(ms->bo) + ms->buff_size; + + ret = ivpu_jsm_metric_streamer_start(vdev, ms->mask, args->sampling_period_ns, + ms->active_buff_vpu_addr, ms->buff_size); + if (ret) + goto err_free_bo; + + args->sample_size = sample_size; + args->max_data_size = ivpu_bo_size(ms->bo); + list_add_tail(&ms->ms_instance_node, &file_priv->ms_instance_list); + goto unlock; + +err_free_bo: + ivpu_bo_free(ms->bo); +err_free_ms: + kfree(ms); +unlock: + mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); + return ret; +} + +static int +copy_leftover_bytes(struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 copy_bytes; + + if (ms->leftover_bytes) { + copy_bytes = min(user_size - *user_bytes_copied, ms->leftover_bytes); + if (copy_to_user(user_ptr + *user_bytes_copied, ms->leftover_addr, copy_bytes)) + return -EFAULT; + + ms->leftover_bytes -= copy_bytes; + ms->leftover_addr += copy_bytes; + *user_bytes_copied += copy_bytes; + } + + return 0; +} + +static int +copy_samples_to_user(struct ivpu_device *vdev, struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 bytes_written; + int ret; + + *user_bytes_copied = 0; + + ret = copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); + if (ret) + return ret; + + if (*user_bytes_copied == user_size) + return 0; + + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, ms->inactive_buff_vpu_addr, + ms->buff_size, &bytes_written); + if (ret) + return ret; + + swap(ms->active_buff_vpu_addr, ms->inactive_buff_vpu_addr); + swap(ms->active_buff_ptr, ms->inactive_buff_ptr); + + ms->leftover_bytes = bytes_written; + ms->leftover_addr = ms->inactive_buff_ptr; + + return copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); +} + +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 bytes_written; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (!ms) { + ivpu_dbg(vdev, IOCTL, "Instance doesn't exist for mask: %#llx\n", + args->metric_group_mask); + ret = -EINVAL; + goto unlock; + } + + if (!args->buffer_size) { + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, 0, 0, &bytes_written); + if (ret) + goto unlock; + args->data_size = bytes_written + ms->leftover_bytes; + goto unlock; + } + + if (!args->buffer_ptr) { + ret = -EINVAL; + goto unlock; + } + + ret = copy_samples_to_user(vdev, ms, u64_to_user_ptr(args->buffer_ptr), + args->buffer_size, &args->data_size); +unlock: + mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); + return ret; +} + +static void free_instance(struct ivpu_file_priv *file_priv, struct ivpu_ms_instance *ms) +{ + lockdep_assert_held(&file_priv->ms_lock); + + list_del(&ms->ms_instance_node); + ivpu_jsm_metric_streamer_stop(file_priv->vdev, ms->mask); + ivpu_bo_free(ms->bo); + kfree(ms); +} + +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (ms) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); + return ms ? 0 : -EINVAL; +} + +static inline struct ivpu_bo *get_ms_info_bo(struct ivpu_file_priv *file_priv) +{ + lockdep_assert_held(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) + return file_priv->ms_info_bo; + + file_priv->ms_info_bo = ivpu_bo_create_global(file_priv->vdev, MS_INFO_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + return file_priv->ms_info_bo; +} + +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_bo *bo; + u64 info_size; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + if (!args->buffer_size) + return ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, + 0, 0, NULL, &args->data_size); + if (!args->buffer_ptr) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + bo = get_ms_info_bo(file_priv); + if (!bo) { + ret = -ENOMEM; + goto unlock; + } + + ret = ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, bo->vpu_addr, + ivpu_bo_size(bo), NULL, &info_size); + if (ret) + goto unlock; + + if (args->buffer_size < info_size) { + ret = -ENOSPC; + goto unlock; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer_ptr), ivpu_bo_vaddr(bo), info_size)) + ret = -EFAULT; + + args->data_size = info_size; +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) +{ + struct ivpu_ms_instance *ms, *tmp; + struct ivpu_device *vdev = file_priv->vdev; + + pm_runtime_get_sync(vdev->drm.dev); + + mutex_lock(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) { + ivpu_bo_free(file_priv->ms_info_bo); + file_priv->ms_info_bo = NULL; + } + + list_for_each_entry_safe(ms, tmp, &file_priv->ms_instance_list, ms_instance_node) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); + + pm_runtime_put_autosuspend(vdev->drm.dev); +} + +void ivpu_ms_cleanup_all(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + ivpu_ms_cleanup(file_priv); + + mutex_unlock(&vdev->context_list_lock); +} diff --git a/drivers/accel/ivpu/ivpu_ms.h b/drivers/accel/ivpu/ivpu_ms.h new file mode 100644 index 000000000000..fbd5ebebc3d9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ +#ifndef __IVPU_MS_H__ +#define __IVPU_MS_H__ + +#include <linux/list.h> + +struct drm_device; +struct drm_file; +struct ivpu_bo; +struct ivpu_device; +struct ivpu_file_priv; + +struct ivpu_ms_instance { + struct ivpu_bo *bo; + struct list_head ms_instance_node; + u64 mask; + u64 buff_size; + u64 active_buff_vpu_addr; + u64 inactive_buff_vpu_addr; + void *active_buff_ptr; + void *inactive_buff_ptr; + u64 leftover_bytes; + void *leftover_addr; +}; + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv); +void ivpu_ms_cleanup_all(struct ivpu_device *vdev); + +#endif /* __IVPU_MS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 553bcbd787b3..480c075d87f6 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/highmem.h> @@ -9,18 +9,33 @@ #include <linux/pm_runtime.h> #include <linux/reboot.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_drv.h" -#include "ivpu_hw.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" +#include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" static bool ivpu_disable_recovery; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); -MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); +MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected"); +#endif + +static unsigned long ivpu_tdr_timeout_ms; +module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); +MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); + +static unsigned long ivpu_inference_timeout_ms; +module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644); +MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default"); #define PM_RESCHEDULE_LIMIT 5 @@ -30,14 +45,16 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_cmdq_reset_all_contexts(vdev); ivpu_ipc_reset(vdev); + ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; + fw->last_heartbeat = 0; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; - struct vpu_boot_params *bp = fw->mem->kvaddr; + struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem_bp); if (!bp->save_restore_ret_address) { ivpu_pm_prepare_cold_boot(vdev); @@ -52,11 +69,11 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; + ivpu_prepare_for_reset(vdev); + ret = ivpu_shutdown(vdev); - if (ret) { - ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - return ret; - } + if (ret) + ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret); return ret; } @@ -66,100 +83,186 @@ static int ivpu_resume(struct ivpu_device *vdev) int ret; retry: + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); + pci_restore_state(to_pci_dev(vdev->drm.dev)); + ret = ivpu_hw_power_up(vdev); if (ret) { ivpu_err(vdev, "Failed to power up HW: %d\n", ret); - return ret; + goto err_power_down; } ret = ivpu_mmu_enable(vdev); if (ret) { ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); - ivpu_hw_power_down(vdev); - return ret; + goto err_power_down; } ret = ivpu_boot(vdev); - if (ret) { - ivpu_mmu_disable(vdev); - ivpu_hw_power_down(vdev); - if (!ivpu_fw_is_cold_boot(vdev)) { - ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret); - ivpu_pm_prepare_cold_boot(vdev); - goto retry; - } else { - ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); - } + if (ret) + goto err_mmu_disable; + + return 0; + +err_mmu_disable: + ivpu_mmu_disable(vdev); +err_power_down: + ivpu_hw_power_down(vdev); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + + if (!ivpu_fw_is_cold_boot(vdev)) { + ivpu_pm_prepare_cold_boot(vdev); + goto retry; + } else { + ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); } return ret; } +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) +{ + pm_runtime_disable(vdev->drm.dev); + + atomic_inc(&vdev->pm->reset_counter); + atomic_set(&vdev->pm->reset_pending, 1); + down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; + + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); + + ret = ivpu_resume(vdev); + if (ret) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } + + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); + + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + static void ivpu_pm_recovery_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; + struct ivpu_device *vdev = pm->vdev; char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret) - ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) { - struct ivpu_pm_info *pm = vdev->pm; + ivpu_err(vdev, "Recovery triggered by %s\n", reason); if (ivpu_disable_recovery) { ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); return; } - if (ivpu_is_fpga(vdev)) { - ivpu_err(vdev, "Recovery not available on FPGA\n"); - return; + /* Trigger recovery if it's not in progress */ + if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) { + ivpu_hw_diagnose_failure(vdev); + ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */ + queue_work(system_dfl_wq, &vdev->pm->recovery_work); + } +} + +static void ivpu_job_timeout_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); + struct ivpu_device *vdev = pm->vdev; + unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms : + vdev->timeout.inference; + u64 inference_max_retries; + u64 heartbeat; + + if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) { + ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n"); + goto recovery; } - /* Schedule recovery if it's not in progress */ - if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { - ivpu_hw_irq_disable(vdev); - queue_work(system_long_wq, &pm->recovery_work); + inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms); + if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) { + ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n", + inference_max_retries); + goto recovery; } + + vdev->fw->last_heartbeat = heartbeat; + ivpu_start_job_timeout_detection(vdev); + return; + +recovery: + atomic_set(&vdev->job_timeout_counter, 0); + ivpu_pm_trigger_recovery(vdev, "TDR"); +} + +void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) +{ + unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + + /* No-op if already queued */ + queue_delayed_work(system_percpu_wq, &vdev->pm->job_timeout_work, + msecs_to_jiffies(timeout_ms)); +} + +void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) +{ + cancel_delayed_work_sync(&vdev->pm->job_timeout_work); + atomic_set(&vdev->job_timeout_counter, 0); } int ivpu_pm_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - int ret; + unsigned long timeout; + trace_pm("suspend"); ivpu_dbg(vdev, PM, "Suspend..\n"); - ret = ivpu_suspend(vdev); - if (ret && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EBUSY; - } else if (!vdev->pm->suspend_reschedule_counter) { - ivpu_warn(vdev, "Failed to enter idle, force suspend\n"); - ivpu_pm_prepare_cold_boot(vdev); - } else { - ivpu_pm_prepare_warm_boot(vdev); + timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); + while (!ivpu_hw_is_idle(vdev)) { + cond_resched(); + if (time_after_eq(jiffies, timeout)) { + ivpu_err(vdev, "Failed to enter idle on system suspend\n"); + return -EBUSY; + } } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + ivpu_jsm_pwr_d0i3_enter(vdev); - pci_save_state(to_pci_dev(dev)); - pci_set_power_state(to_pci_dev(dev), PCI_D3hot); + ivpu_suspend(vdev); + ivpu_pm_prepare_warm_boot(vdev); ivpu_dbg(vdev, PM, "Suspend done.\n"); + trace_pm("suspend done"); - return ret; + return 0; } int ivpu_pm_resume_cb(struct device *dev) @@ -168,16 +271,15 @@ int ivpu_pm_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("resume"); ivpu_dbg(vdev, PM, "Resume..\n"); - pci_set_power_state(to_pci_dev(dev), PCI_D0); - pci_restore_state(to_pci_dev(dev)); - ret = ivpu_resume(vdev); if (ret) ivpu_err(vdev, "Failed to resume: %d\n", ret); ivpu_dbg(vdev, PM, "Resume done.\n"); + trace_pm("resume done"); return ret; } @@ -186,32 +288,40 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - int ret; + int ret, ret_d0i3; + bool is_idle; + + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + trace_pm("runtime suspend"); ivpu_dbg(vdev, PM, "Runtime suspend..\n"); - if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EAGAIN; - } + ivpu_mmu_disable(vdev); + + is_idle = ivpu_hw_is_idle(vdev) || vdev->pm->dct_active_percent; + if (!is_idle) + ivpu_err(vdev, "NPU is not idle before autosuspend\n"); + + ret_d0i3 = ivpu_jsm_pwr_d0i3_enter(vdev); + if (ret_d0i3) + ivpu_err(vdev, "Failed to prepare for d0i3: %d\n", ret_d0i3); ret = ivpu_suspend(vdev); if (ret) - ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); + ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret); - if (!vdev->pm->suspend_reschedule_counter) { - ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); + if (!is_idle || ret_d0i3) { + ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); + atomic_inc(&vdev->pm->reset_counter); + ivpu_dev_coredump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + trace_pm("runtime suspend done"); return 0; } @@ -222,6 +332,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("runtime resume"); ivpu_dbg(vdev, PM, "Runtime resume..\n"); ret = ivpu_resume(vdev); @@ -229,6 +340,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + trace_pm("runtime resume done"); return ret; } @@ -237,20 +349,17 @@ int ivpu_rpm_get(struct ivpu_device *vdev) { int ret; - ivpu_dbg(vdev, RPM, "rpm_get count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (!drm_WARN_ON(&vdev->drm, ret < 0)) - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } void ivpu_rpm_put(struct ivpu_device *vdev) { - ivpu_dbg(vdev, RPM, "rpm_put count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); - - pm_runtime_mark_last_busy(vdev->drm.dev); pm_runtime_put_autosuspend(vdev->drm.dev); } @@ -258,72 +367,145 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - pm_runtime_get_sync(vdev->drm.dev); - ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_set(&vdev->pm->in_reset, 1); - ivpu_shutdown(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - atomic_set(&vdev->pm->in_reset, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } -int ivpu_pm_init(struct ivpu_device *vdev) +void ivpu_pm_init(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; struct ivpu_pm_info *pm = vdev->pm; + int delay; pm->vdev = vdev; - pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - atomic_set(&pm->in_reset, 0); - INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); + init_rwsem(&pm->reset_lock); + atomic_set(&pm->reset_pending, 0); + atomic_set(&pm->reset_counter, 0); - pm_runtime_use_autosuspend(dev); + INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); + INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work); if (ivpu_disable_recovery) - pm_runtime_set_autosuspend_delay(dev, -1); - else if (ivpu_is_silicon(vdev)) - pm_runtime_set_autosuspend_delay(dev, 100); + delay = -1; else - pm_runtime_set_autosuspend_delay(dev, 60000); + delay = vdev->timeout.autosuspend; - return 0; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, delay); + pm_runtime_set_active(dev); + + ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); +} + +void ivpu_pm_disable_recovery(struct ivpu_device *vdev) +{ + drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work)); + disable_work_sync(&vdev->pm->recovery_work); } void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; - pm_runtime_set_active(dev); pm_runtime_allow(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); - - ivpu_dbg(vdev, RPM, "Enable RPM count %d\n", atomic_read(&dev->power.usage_count)); } void ivpu_pm_disable(struct ivpu_device *vdev) { - struct device *dev = vdev->drm.dev; - - ivpu_dbg(vdev, RPM, "Disable RPM count %d\n", atomic_read(&dev->power.usage_count)); - pm_runtime_get_noresume(vdev->drm.dev); pm_runtime_forbid(vdev->drm.dev); } + +int ivpu_pm_dct_init(struct ivpu_device *vdev) +{ + if (vdev->pm->dct_active_percent) + return ivpu_pm_dct_enable(vdev, vdev->pm->dct_active_percent); + + return 0; +} + +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) +{ + u32 active_us, inactive_us; + int ret; + + if (active_percent == 0 || active_percent > 100) + return -EINVAL; + + active_us = (DCT_PERIOD_US * active_percent) / 100; + inactive_us = DCT_PERIOD_US - active_us; + + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); + return ret; + } + + return 0; +} + +int ivpu_pm_dct_disable(struct ivpu_device *vdev) +{ + int ret; + + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + + ret = ivpu_jsm_dct_disable(vdev); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); + return ret; + } + + return 0; +} + +void ivpu_pm_irq_dct_work_fn(struct work_struct *work) +{ + struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_dct_work); + bool enable; + int ret; + + if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) + return; + + if (enable) + ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); + else + ret = ivpu_pm_dct_disable(vdev); + + if (!ret) { + /* Convert percent to U1.7 format */ + u8 val = DIV_ROUND_CLOSEST(vdev->pm->dct_active_percent * 128, 100); + + ivpu_hw_btrs_dct_set_status(vdev, enable, val); + } + +} diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index dc1b3758e13f..a2aa7a27f32e 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -1,26 +1,31 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_PM_H__ #define __IVPU_PM_H__ +#include <linux/rwsem.h> #include <linux/types.h> struct ivpu_device; struct ivpu_pm_info { struct ivpu_device *vdev; + struct delayed_work job_timeout_work; struct work_struct recovery_work; - atomic_t in_reset; + struct rw_semaphore reset_lock; + atomic_t reset_counter; + atomic_t reset_pending; bool is_warmboot; - u32 suspend_reschedule_counter; + u8 dct_active_percent; }; -int ivpu_pm_init(struct ivpu_device *vdev); +void ivpu_pm_init(struct ivpu_device *vdev); void ivpu_pm_enable(struct ivpu_device *vdev); void ivpu_pm_disable(struct ivpu_device *vdev); +void ivpu_pm_disable_recovery(struct ivpu_device *vdev); int ivpu_pm_suspend_cb(struct device *dev); int ivpu_pm_resume_cb(struct device *dev); @@ -33,6 +38,13 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev); int __must_check ivpu_rpm_get(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason); +void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); +void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); + +int ivpu_pm_dct_init(struct ivpu_device *vdev); +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent); +int ivpu_pm_dct_disable(struct ivpu_device *vdev); +void ivpu_pm_irq_dct_work_fn(struct work_struct *work); #endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c new file mode 100644 index 000000000000..d250a10caca9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 Intel Corporation + */ + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/pm_runtime.h> +#include <linux/units.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_sysfs.h" + +/** + * DOC: npu_busy_time_us + * + * npu_busy_time_us is the time that the device spent executing jobs. + * The time is counted when and only when there are jobs submitted to firmware. + * + * This time can be used to measure the utilization of NPU, either by calculating + * npu_busy_time_us difference between two timepoints (i.e. measuring the time + * that the NPU was active during some workload) or monitoring utilization percentage + * by reading npu_busy_time_us periodically. + * + * When reading the value periodically, it shouldn't be read too often as it may have + * an impact on job submission performance. Recommended period is 1 second. + */ +static ssize_t +npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + ktime_t total, now = 0; + + mutex_lock(&vdev->submitted_jobs_lock); + + total = vdev->busy_time; + if (!xa_empty(&vdev->submitted_jobs_xa)) + now = ktime_sub(ktime_get(), vdev->busy_start_ts); + mutex_unlock(&vdev->submitted_jobs_lock); + + return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); +} + +static DEVICE_ATTR_RO(npu_busy_time_us); + +/** + * DOC: npu_memory_utilization + * + * The npu_memory_utilization is used to report in bytes a current NPU memory utilization. + * + */ +static ssize_t +npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + struct ivpu_bo *bo; + u64 total_npu_memory = 0; + + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) + if (ivpu_bo_is_resident(bo)) + total_npu_memory += ivpu_bo_size(bo); + mutex_unlock(&vdev->bo_list_lock); + + return sysfs_emit(buf, "%lld\n", total_npu_memory); +} + +static DEVICE_ATTR_RO(npu_memory_utilization); + +/** + * DOC: sched_mode + * + * The sched_mode is used to report current NPU scheduling mode. + * + * It returns following strings: + * - "HW" - Hardware Scheduler mode + * - "OS" - Operating System Scheduler mode + * + */ +static ssize_t +sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + + return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS"); +} + +static DEVICE_ATTR_RO(sched_mode); + +/** + * DOC: npu_max_frequency + * + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data + * processing unit + */ +static ssize_t +npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_max_frequency_mhz); + +/** + * DOC: npu_current_frequency_mhz + * + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's + * data processing unit + */ +static ssize_t +npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = 0; + + /* Read frequency only if device is active, otherwise frequency is 0 */ + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { + freq = ivpu_hw_dpu_freq_get(vdev); + + pm_runtime_put_autosuspend(vdev->drm.dev); + } + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_current_frequency_mhz); + +static struct attribute *ivpu_dev_attrs[] = { + &dev_attr_npu_busy_time_us.attr, + &dev_attr_npu_memory_utilization.attr, + &dev_attr_sched_mode.attr, + &dev_attr_npu_max_frequency_mhz.attr, + &dev_attr_npu_current_frequency_mhz.attr, + NULL, +}; + +static struct attribute_group ivpu_dev_attr_group = { + .attrs = ivpu_dev_attrs, +}; + +void ivpu_sysfs_init(struct ivpu_device *vdev) +{ + int ret; + + ret = devm_device_add_group(vdev->drm.dev, &ivpu_dev_attr_group); + if (ret) + ivpu_warn(vdev, "Failed to add group to device, ret %d", ret); +} diff --git a/drivers/accel/ivpu/ivpu_sysfs.h b/drivers/accel/ivpu/ivpu_sysfs.h new file mode 100644 index 000000000000..9836f09b35a3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Intel Corporation + */ + +#ifndef __IVPU_SYSFS_H__ +#define __IVPU_SYSFS_H__ + +#include "ivpu_drv.h" + +void ivpu_sysfs_init(struct ivpu_device *vdev); + +#endif /* __IVPU_SYSFS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h new file mode 100644 index 000000000000..eb792038e701 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __IVPU_TRACE_H__ + +#include <linux/tracepoint.h> +#include "ivpu_drv.h" +#include "ivpu_job.h" +#include "vpu_jsm_api.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ipc.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vpu +#define TRACE_INCLUDE_FILE ivpu_trace + +TRACE_EVENT(pm, + TP_PROTO(const char *event), + TP_ARGS(event), + TP_STRUCT__entry(__field(const char *, event)), + TP_fast_assign(__entry->event = event;), + TP_printk("%s", __entry->event) +); + +TRACE_EVENT(job, + TP_PROTO(const char *event, struct ivpu_job *job), + TP_ARGS(event, job), + TP_STRUCT__entry(__field(const char *, event) + __field(u32, ctx_id) + __field(u32, engine_id) + __field(u32, job_id) + ), + TP_fast_assign(__entry->event = event; + __entry->ctx_id = job->file_priv->ctx.id; + __entry->engine_id = job->engine_idx; + __entry->job_id = job->job_id;), + TP_printk("%s context:%d engine:%d job:%d", + __entry->event, + __entry->ctx_id, + __entry->engine_id, + __entry->job_id) +); + +TRACE_EVENT(jsm, + TP_PROTO(const char *event, struct vpu_jsm_msg *msg), + TP_ARGS(event, msg), + TP_STRUCT__entry(__field(const char *, event) + __field(const char *, type) + __field(enum vpu_ipc_msg_status, status) + __field(u32, request_id) + __field(u32, result) + ), + TP_fast_assign(__entry->event = event; + __entry->type = ivpu_jsm_msg_type_to_str(msg->type); + __entry->status = msg->status; + __entry->request_id = msg->request_id; + __entry->result = msg->result;), + TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x", + __entry->event, + __entry->type, + __entry->status, + __entry->request_id, + __entry->result) +); + +#endif /* __IVPU_TRACE_H__ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c new file mode 100644 index 000000000000..f8fb99de0de3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "ivpu_trace.h" +#endif diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 6b71be92ba65..218468bbbcad 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,17 +1,19 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2024, Intel Corporation. */ #ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H /* - * =========== FW API version information beginning ================ - * The bellow values will be used to construct the version info this way: + * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; - * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes. + * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes + * This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py + * If a header is missing this info we ignore the header, if a header is missing or contains + * partial info a build error will be generated. */ /* @@ -24,21 +26,20 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 12 +#define VPU_BOOT_API_VER_MINOR 28 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 2 +#define VPU_BOOT_API_VER_PATCH 3 /* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 -/* ------------ FW API version information end ---------------------*/ -#pragma pack(push, 1) +#pragma pack(push, 4) /* * Firmware image header format @@ -63,6 +64,32 @@ struct vpu_firmware_header { /* Size of memory require for firmware execution */ u32 runtime_size; u32 shave_nn_fw_size; + /* + * Size of primary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ + u32 preemption_buffer_1_size; + /* + * Size of secondary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ + u32 preemption_buffer_2_size; + /* + * Maximum preemption buffer size that the FW can use: no need for the host + * driver to allocate more space than that specified by these fields. + * A value of 0 means no declared limit. + */ + u32 preemption_buffer_1_max_size; + u32 preemption_buffer_2_max_size; + /* Space reserved for future preemption-related fields. */ + u32 preemption_reserved[4]; + /* FW image read only section start address, 4KB aligned */ + u64 ro_section_start_address; + /* FW image read only section size, 4KB aligned */ + u32 ro_section_size; + u32 reserved; }; /* @@ -89,6 +116,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE { VPU_BOOT_L2_CACHE_CFG_NUM = 2 }; +/** VPU MCA ECC signalling mode. By default, no signalling is used */ +enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE { + VPU_BOOT_MCA_ECC_NONE = 0, + VPU_BOOT_MCA_ECC_CORR = 1, + VPU_BOOT_MCA_ECC_FATAL = 2, + VPU_BOOT_MCA_ECC_BOTH = 3 +}; + /** * Logging destinations. * @@ -106,7 +141,7 @@ enum vpu_trace_destination { /* * Processor bit shifts (for loggable HW components). */ -#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_RESERVED 0 #define VPU_TRACE_PROC_BIT_LRT 1 #define VPU_TRACE_PROC_BIT_LNN 2 #define VPU_TRACE_PROC_BIT_SHV_0 3 @@ -131,9 +166,9 @@ enum vpu_trace_destination { #define VPU_TRACE_PROC_BIT_ACT_SHV_3 22 #define VPU_TRACE_PROC_NO_OF_HW_DEVS 23 -/* KMB HW component IDs are sequential, so define first and last IDs. */ -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15 +/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ +#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT +#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 struct vpu_boot_l2_cache_config { u8 use; @@ -148,6 +183,36 @@ struct vpu_warm_boot_section { u32 is_clear_op; }; +/* + * When HW scheduling mode is enabled, a present period is defined. + * It will be used by VPU to swap between normal and focus priorities + * to prevent starving of normal priority band (when implemented). + * Host must provide a valid value at boot time in + * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the + * defined range a default value will be used. Here we define the min. and max. + * allowed values and the and default value of the present period. Units are milliseconds. + */ +#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50 +#define VPU_PRESENT_CALL_PERIOD_MS_MIN 16 +#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000 + +/** + * Macros to enable various power profiles within the NPU. + * To be defined as part of 32 bit mask. + */ +#define POWER_PROFILE_SURVIVABILITY 0x1 + +/** + * Enum for dvfs_mode boot param. + */ +enum vpu_governor { + VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ + VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ + VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ + VPU_GOV_POWER_SAVE = 3, /* Power save governor */ + VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ +}; + struct vpu_boot_params { u32 magic; u32 vpu_id; @@ -218,6 +283,7 @@ struct vpu_boot_params { * the threshold will not be logged); applies to every enabled logging * destination and loggable HW component. See 'mvLog_t' enum for acceptable * values. + * TODO: EISW-33556: Move log level definition (mvLog_t) to this file. */ u32 default_trace_level; u32 boot_type; @@ -249,7 +315,58 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - u32 pad4[28]; + /** + * DVFS Mode: + * 0 - Default, DVFS mode selected by the firmware + * 1 - Max Performance + * 2 - On Demand + * 3 - Power Save + * 4 - On Demand Priority Aware + */ + u32 dvfs_mode; + /** + * Depending on DVFS Mode: + * On-demand: Default if 0. + * Bit 0-7 - uint8_t: Highest residency percent + * Bit 8-15 - uint8_t: High residency percent + * Bit 16-23 - uint8_t: Low residency percent + * Bit 24-31 - uint8_t: Lowest residency percent + * Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency + * Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency + * Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency + * Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency + * Bit 48-55 - uint8_t: Period (ms) for residency decisions + * Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal) + * Power Save/Max Performance: Unused + */ + u64 dvfs_param; + /** + * D0i3 delayed entry + * Bit0: Disable CPU state save on D0i2 entry flow. + * 0: Every D0i2 entry saves state. Save state IPC message ignored. + * 1: IPC message required to save state on D0i3 entry flow. + */ + u32 d0i3_delayed_entry; + /* Time spent by VPU in D0i3 state */ + u64 d0i3_residency_time_us; + /* Value of VPU perf counter at the time of entering D0i3 state . */ + u64 d0i3_entry_vpu_ts; + /* + * The system time of the host operating system in microseconds. + * E.g the number of microseconds since 1st of January 1970, or whatever + * date the host operating system uses to maintain system time. + * This value will be used to track system time on the VPU. + * The KMD is required to update this value on every VPU reset. + */ + u64 system_time_us; + u32 pad4[2]; + /* + * The delta between device monotonic time and the current value of the + * HW timestamp register, in ticks. Written by the firmware during boot. + * Can be used by the KMD to calculate device time. + */ + u64 device_time_delta_ticks; + u32 pad7[14]; /* Warm boot information: 0x400 - 0x43F */ u32 warm_boot_sections_count; u32 warm_boot_start_address_reference; @@ -274,14 +391,19 @@ struct vpu_boot_params { u32 vpu_scheduling_mode; /* Present call period in milliseconds. */ u32 vpu_focus_present_timer_ms; - /* Unused/reserved: 0x478 - 0xFFF */ - u32 pad6[738]; + /* VPU ECC Signaling */ + u32 vpu_uses_ecc_mca_signal; + /* Values defined by POWER_PROFILE* macros */ + u32 power_profile; + /* Microsecond value for DCT active cycle */ + u32 dct_active_us; + /* Microsecond value for DCT inactive cycle */ + u32 dct_inactive_us; + /* Unused/reserved: 0x488 - 0xFFF */ + u32 pad6[734]; }; -/* - * Magic numbers set between host and vpu to detect corruptio of tracing init - */ - +/* Magic numbers set between host and vpu to detect corruption of tracing init */ #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) /* Tracing buffer message format definitions */ @@ -301,7 +423,9 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - u32 pad_to_cache_line_size_0[14]; + /* keeps track of wrapping on the reader side */ + u32 read_wrap_count; + u32 pad_to_cache_line_size_0[13]; /* End of first cache line */ /** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 1096cab0334e..bca6a44dc041 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -1,15 +1,16 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2025, Intel Corporation. + */ + +/** + * @addtogroup Jsm + * @{ */ /** * @file * @brief JSM shared definitions - * - * @ingroup Jsm - * @brief JSM shared definitions - * @{ */ #ifndef VPU_JSM_API_H #define VPU_JSM_API_H @@ -17,17 +18,17 @@ /* * Major version changes that break backward compatibility */ -#define VPU_JSM_API_VER_MAJOR 2 +#define VPU_JSM_API_VER_MAJOR 3 /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 10 +#define VPU_JSM_API_VER_MINOR 33 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_JSM_API_VER_PATCH 1 +#define VPU_JSM_API_VER_PATCH 0 /* * Index in the API version table @@ -36,22 +37,24 @@ /* * Number of Priority Bands for Hardware Scheduling - * Bands: RealTime, Focus, Normal, Idle + * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) */ #define VPU_HWS_NUM_PRIORITY_BANDS 4 /* Max number of impacted contexts that can be dealt with the engine reset command */ #define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 -/** Pack the API structures for now, once alignment issues are fixed this can be removed */ -#pragma pack(push, 1) +/* + * Pack the API structures to enforce binary compatibility + * Align to 8 bytes for optimal performance + */ +#pragma pack(push, 8) /* * Engine indexes. */ #define VPU_ENGINE_COMPUTE 0 -#define VPU_ENGINE_COPY 1 -#define VPU_ENGINE_NB 2 +#define VPU_ENGINE_NB 1 /* * VPU status values. @@ -69,8 +72,15 @@ #define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES 0xAU #define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED 0xBU #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU -/* Job status returned when the job was preempted mid-inference */ +/* @deprecated (use VPU_JSM_STATUS_PREEMPTED_MID_COMMAND instead) */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU +/* Job status returned when the job was preempted mid-command */ +#define VPU_JSM_STATUS_PREEMPTED_MID_COMMAND 0xDU +/* Range of status codes that require engine reset */ +#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN 0xEU +#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU +#define VPU_JSM_STATUS_MVNCI_PREEMPTION_TIMED_OUT 0xFU +#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX 0x1FU /* * Host <-> VPU IPC channels. @@ -83,16 +93,70 @@ /* * Job flags bit masks. */ -#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 +enum { + /* + * Null submission mask. + * When set, batch buffer's commands are not processed but returned as + * successful immediately, except fences and timestamps. + * When cleared, batch buffer's commands are processed normally. + * Used for testing and profiling purposes. + */ + VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), + /* + * Inline command mask. + * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). + * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). + */ + VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), + /* + * VPU private data mask. + * Reserved for the VPU to store private data about the job (or inline command) + * while being processed. + */ + VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U +}; /* - * Sizes of the reserved areas in jobs, in bytes. + * Job queue flags bit masks. */ -#define VPU_JOB_RESERVED_BYTES 16 -/* - * Sizes of the reserved areas in job queues, in bytes. - */ -#define VPU_JOB_QUEUE_RESERVED_BYTES 52 +enum { + /* + * No job done notification mask. + * When set, indicates that no job done notification should be sent for any + * job from this queue. When cleared, indicates that job done notification + * should be sent for every job completed from this queue. + */ + VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), + /* + * Native fence usage mask. + * When set, indicates that job queue uses native fences (as inline commands + * in job queue). Such queues may also use legacy fences (as commands in batch buffers). + * When cleared, indicates the job queue only uses legacy fences. + * NOTES: + * 1. For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + * 2. Native fence queues are only supported on VPU 40xx onwards. + */ + VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), + /* + * Enable turbo mode for testing NPU performance; not recommended for regular usage. + */ + VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U), + /* + * Queue error detection mode flag + * For 'interactive' queues (this bit not set), the FW will identify queues that have not + * completed a job inside the TDR timeout as in error as part of engine reset sequence. + * For 'non-interactive' queues (this bit set), the FW will identify queues that have not + * progressed the heartbeat inside the non-interactive no-progress timeout as in error as + * part of engine reset sequence. Additionally, there is an upper limit applied to these + * queues: even if they progress the heartbeat, if they run longer than non-interactive + * timeout, then the FW will also identify them as in error. + */ + VPU_JOB_QUEUE_FLAGS_NON_INTERACTIVE = (1 << 3U) +}; /* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -103,33 +167,173 @@ /* * Max length (including trailing NULL char) of a dyndbg command. * - * NOTE: 112 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is + * NOTE: 96 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is * 128 bytes (multiple of 64 bytes, the cache line size). */ -#define VPU_DYNDBG_CMD_MAX_LEN 112 +#define VPU_DYNDBG_CMD_MAX_LEN 96 + +/* + * For HWS command queue scheduling, we can prioritise command queues inside the + * same process with a relative in-process priority. Valid values for relative + * priority are given below - max and min. + */ +#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7 +#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7 + +/* + * For HWS priority scheduling, we can have multiple realtime priority bands. + * They are numbered 0 to a MAX. + */ +#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U + +/* + * vpu_jsm_engine_reset_context flag definitions + */ +#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK BIT(0) +#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0 +#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1 /* + * Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group + * in this API. + */ +#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL + +/* + * Inline commands types. + */ +/* + * NOP. + * VPU does nothing other than consuming the inline command object. + */ +#define VPU_INLINE_CMD_TYPE_NOP 0x0 +/* + * Fence wait. + * VPU waits for the fence current value to reach monitored value. + * Fence wait operations are executed upon job dispatching. While waiting for + * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. + * Jobs present in the queue prior to the fence wait object may be processed + * concurrently. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 +/* + * Fence signal. + * VPU sets the fence current value to the provided value. If new current value + * is equal to or higher than monitored value, VPU sends fence signalled notification + * to the host. Fence signal operations are executed upon completion of all the jobs + * present in the queue prior to them, and in-order relative to each other in the queue. + * But jobs in-between them may be processed concurrently and may complete out-of-order. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 + +/** + * Job scheduling priority bands for both hardware scheduling and OS scheduling. + */ +enum vpu_job_scheduling_priority_band { + VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, + VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, + VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, + VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, + VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, +}; + +/** * Job format. + * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ - u32 job_id; /**< Job ID */ - u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u64 root_page_table_addr; /**< Address of root page table to use for this job */ - u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */ - u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */ - u8 reserved[VPU_JOB_RESERVED_BYTES]; + /** Address of VPU commands batch buffer */ + u64 batch_buf_addr; + /** Job ID */ + u32 job_id; + /** Flags bit field, see VPU_JOB_FLAGS_* above */ + u32 flags; + /** + * Doorbell ring timestamp taken by KMD from SoC's global system clock, in + * microseconds. NPU can convert this value to its own fixed clock's timebase, + * to match other profiling timestamps. + */ + u64 doorbell_timestamp; + /** Extra id for job tracking, used only in the firmware perf traces */ + u64 host_tracking_id; + /** Address of the primary preemption buffer to use for this job */ + u64 primary_preempt_buf_addr; + /** Size of the primary preemption buffer to use for this job */ + u32 primary_preempt_buf_size; + /** Size of secondary preemption buffer to use for this job */ + u32 secondary_preempt_buf_size; + /** Address of secondary preemption buffer to use for this job */ + u64 secondary_preempt_buf_addr; + u64 reserved_0; }; -/* +/** + * Inline command format. + * Inline commands are the commands executed at scheduler level (typically, + * synchronization directives). Inline command and job objects must be of + * the same size and have flags field at same offset. + */ +struct vpu_inline_cmd { + u64 reserved_0; + /** Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ + u32 type; + /** Flags bit field, see VPU_JOB_FLAGS_* above. */ + u32 flags; + /** Inline command payload. Depends on inline command type. */ + union payload { + /** Fence (wait and signal) commands' payload. */ + struct fence { + /** Fence object handle. */ + u64 fence_handle; + /** User VA of the current fence value. */ + u64 current_value_va; + /** User VA of the monitored fence value (read-only). */ + u64 monitored_value_va; + /** Value to wait for or write in fence location. */ + u64 value; + /** User VA of the log buffer in which to add log entry on completion. */ + u64 log_buffer_va; + /** NPU private data. */ + u64 npu_private_data; + } fence; + /** + * Other commands do not have a payload: + * Payload definition for future inline commands can be inserted here. + */ + u64 reserved_1[6]; + } payload; +}; + +/** + * Job queue slots can be populated either with job objects or inline command objects. + */ +union vpu_jobq_slot { + struct vpu_job_queue_entry job; + struct vpu_inline_cmd inline_cmd; +}; + +/** * Job queue control registers. */ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u8 reserved[VPU_JOB_QUEUE_RESERVED_BYTES]; + u32 flags; + /** Set to 1 to indicate priority_band field is valid */ + u32 priority_band_valid; + /** + * Priority for the work of this job queue, valid only if the HWS is NOT used + * and the @ref priority_band_valid is set to 1. It is applied only during + * the @ref VPU_JSM_MSG_REGISTER_DB message processing. + * The device firmware might use the priority_band to optimize the power + * management logic, but it will not affect the order of jobs. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ + u32 priority_band; + /** Inside realtime band assigns a further priority, limited to 0..31 range */ + u32 realtime_priority_level; + u32 reserved_0[9]; }; /* @@ -137,7 +341,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - struct vpu_job_queue_entry job[]; + union vpu_jobq_slot slot[]; }; /** @@ -152,17 +356,145 @@ enum vpu_trace_entity_type { VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2, }; +/** + * HWS specific log buffer header details. + * Total size is 32 bytes. + */ +struct vpu_hws_log_buffer_header { + /** Written by VPU after adding a log entry. Initialised by host to 0. */ + u32 first_free_entry_index; + /** Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ + u32 wraparound_count; + /** + * This is the number of buffers that can be stored in the log buffer provided by the host. + * It is written by host before passing buffer to VPU. VPU should consider it read-only. + */ + u64 num_of_entries; + u64 reserved[2]; +}; + +/** + * HWS specific log buffer entry details. + * Total size is 32 bytes. + */ +struct vpu_hws_log_buffer_entry { + /** VPU timestamp must be an invariant timer tick (not impacted by DVFS) */ + u64 vpu_timestamp; + /** + * Operation type: + * 0 - context state change + * 1 - queue new work + * 2 - queue unwait sync object + * 3 - queue no more work + * 4 - queue wait sync object + */ + u32 operation_type; + u32 reserved; + /** Operation data depends on operation type */ + u64 operation_data[2]; +}; + +/* Native fence log buffer types. */ +enum vpu_hws_native_fence_log_type { + VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, + VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 +}; + +/** HWS native fence log buffer header. */ +struct vpu_hws_native_fence_log_header { + union { + struct { + /** Index of the first free entry in buffer. */ + u32 first_free_entry_idx; + /** + * Incremented whenever the NPU wraps around the buffer and writes + * to the first entry again. + */ + u32 wraparound_count; + }; + /** Field allowing atomic update of both fields above. */ + u64 atomic_wraparound_and_entry_idx; + }; + /** Log buffer type, see enum vpu_hws_native_fence_log_type. */ + u64 type; + /** Allocated number of entries in the log buffer. */ + u64 entry_nb; + u64 reserved[2]; +}; + +/** Native fence log operation types. */ +enum vpu_hws_native_fence_log_op { + VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, + VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 +}; + +/** HWS native fence log entry. */ +struct vpu_hws_native_fence_log_entry { + /** Newly signaled/unblocked fence value. */ + u64 fence_value; + /** Native fence object handle to which this operation belongs. */ + u64 fence_handle; + /** Operation type, see enum vpu_hws_native_fence_log_op. */ + u64 op_type; + u64 reserved_0; + /** + * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence + * wait was started (in NPU SysTime). + */ + u64 fence_wait_start_ts; + u64 reserved_1; + /** Timestamp at which fence operation was completed (in NPU SysTime). */ + u64 fence_end_ts; +}; + +/** Native fence log buffer. */ +struct vpu_hws_native_fence_log_buffer { + struct vpu_hws_native_fence_log_header header; + struct vpu_hws_native_fence_log_entry entry[]; +}; + /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { + /** Unsupported command */ VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, - /* IPC Host -> Device, Async commands */ + + /** IPC Host -> Device, base id for async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, + /** + * Reset engine. The NPU cancels all the jobs currently executing on the target + * engine making the engine become idle and then does a HW reset, before returning + * to the host. + * @see struct vpu_ipc_msg_payload_engine_reset + */ VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + /** + * Preempt engine. The NPU stops (preempts) all the jobs currently + * executing on the target engine making the engine become idle and ready to + * execute new jobs. + * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of + * the target engine, but it stops processing them (until the queue doorbell + * is rung again); the host is responsible to reset the job queue, either + * after preemption or when resubmitting jobs to the queue. + * @see vpu_ipc_msg_payload_engine_preempt + */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, + /** + * OS scheduling doorbell register command + * @see vpu_ipc_msg_payload_register_db + */ VPU_JSM_MSG_REGISTER_DB = 0x1102, + /** + * OS scheduling doorbell unregister command + * @see vpu_ipc_msg_payload_unregister_db + */ VPU_JSM_MSG_UNREGISTER_DB = 0x1103, + /** + * Query engine heartbeat. Heartbeat is expected to increase monotonically + * and increase while work is being progressed by NPU. + * @see vpu_ipc_msg_payload_query_engine_hb + */ VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104, VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105, VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106, @@ -188,6 +520,7 @@ enum vpu_ipc_msg_type { * aborted and removed from internal scheduling queues. All doorbells assigned * to the host_ssid are unregistered and any internal FW resources belonging to * the host_ssid are released. + * @see vpu_ipc_msg_payload_ssid_release */ VPU_JSM_MSG_SSID_RELEASE = 0x110e, /** @@ -215,35 +548,114 @@ enum vpu_ipc_msg_type { * @see vpu_jsm_metric_streamer_start */ VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112, - /** Control command: Priority band setup */ + /** + * Control command: Priority band setup + * @see vpu_ipc_msg_payload_hws_priority_band_setup + */ VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113, - /** Control command: Create command queue */ + /** + * Control command: Create command queue + * @see vpu_ipc_msg_payload_hws_create_cmdq + */ VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114, - /** Control command: Destroy command queue */ + /** + * Control command: Destroy command queue + * @see vpu_ipc_msg_payload_hws_destroy_cmdq + */ VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115, - /** Control command: Set context scheduling properties */ + /** + * Control command: Set context scheduling properties + * @see vpu_ipc_msg_payload_hws_set_context_sched_properties + */ VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116, - /* + /** * Register a doorbell to notify VPU of new work. The doorbell may later be * deallocated or reassigned to another context. + * @see vpu_jsm_hws_register_db */ VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117, - /* IPC Host -> Device, General commands */ + /** + * Control command: Log buffer setting + * @see vpu_ipc_msg_payload_hws_set_scheduling_log + */ + VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118, + /** + * Control command: Suspend command queue. + * @see vpu_ipc_msg_payload_hws_suspend_cmdq + */ + VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119, + /** + * Control command: Resume command queue + * @see vpu_ipc_msg_payload_hws_resume_cmdq + */ + VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a, + /** + * Control command: Resume engine after reset + * @see vpu_ipc_msg_payload_hws_resume_engine + */ + VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b, + /** + * Control command: Enable survivability/DCT mode + * @see vpu_ipc_msg_payload_pwr_dct_control + */ + VPU_JSM_MSG_DCT_ENABLE = 0x111c, + /** + * Control command: Disable survivability/DCT mode + * This command has no payload + */ + VPU_JSM_MSG_DCT_DISABLE = 0x111d, + /** + * Dump VPU state. To be used for debug purposes only. + * This command has no payload. + * NOTE: Please introduce new ASYNC commands before this one. + */ + VPU_JSM_MSG_STATE_DUMP = 0x11FF, + + /** IPC Host -> Device, base id for general commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + /** Unsupported command */ + VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to - * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. + * Linux command: + * @verbatim echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control @endverbatim + * @see vpu_ipc_msg_payload_dyndbg_control */ VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201, - /* IPC Device -> Host, Job completion */ + /** + * Perform the save procedure for the D0i3 entry + */ + VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, + + /** + * IPC Device -> Host, Job completion + * @see struct vpu_ipc_msg_payload_job_done + */ VPU_JSM_MSG_JOB_DONE = 0x2100, + /** + * IPC Device -> Host, Fence signalled + * @see vpu_ipc_msg_payload_native_fence_signalled + */ + VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, + /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, + /** + * IPC Device -> Host, engine reset complete + * @see vpu_ipc_msg_payload_engine_reset_done + */ VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, + /** + * Preempt complete message + * @see vpu_ipc_msg_payload_engine_preempt_done + */ VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201, VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202, VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203, + /** + * Response to query engine heartbeat. + * @see vpu_ipc_msg_payload_query_engine_hb_done + */ VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204, VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205, VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206, @@ -260,7 +672,10 @@ enum vpu_ipc_msg_type { VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c, /** Response to VPU_JSM_MSG_TRACE_GET_NAME. */ VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d, - /** Response to VPU_JSM_MSG_SSID_RELEASE. */ + /** + * Response to VPU_JSM_MSG_SSID_RELEASE. + * @see vpu_ipc_msg_payload_ssid_release + */ VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e, /** * Response to VPU_JSM_MSG_METRIC_STREAMER_START. @@ -290,73 +705,149 @@ enum vpu_ipc_msg_type { /** * Asynchronous event sent from the VPU to the host either when the current * metric buffer is full or when the VPU has collected a multiple of - * @notify_sample_count samples as indicated through the start command - * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected - * metric data. + * @ref vpu_jsm_metric_streamer_start::notify_sample_count samples as indicated + * through the start command (VPU_JSM_MSG_METRIC_STREAMER_START). Returns + * information about collected metric data. * @see vpu_jsm_metric_streamer_done */ VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213, - /** Response to control command: Priority band setup */ + /** + * Response to control command: Priority band setup + * @see vpu_ipc_msg_payload_hws_priority_band_setup + */ VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214, - /** Response to control command: Create command queue */ + /** + * Response to control command: Create command queue + * @see vpu_ipc_msg_payload_hws_create_cmdq_rsp + */ VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215, - /** Response to control command: Destroy command queue */ + /** + * Response to control command: Destroy command queue + * @see vpu_ipc_msg_payload_hws_destroy_cmdq + */ VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216, - /** Response to control command: Set context scheduling properties */ + /** + * Response to control command: Set context scheduling properties + * @see vpu_ipc_msg_payload_hws_set_context_sched_properties + */ VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217, + /** + * Response to control command: Log buffer setting + * @see vpu_ipc_msg_payload_hws_set_scheduling_log + */ + VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218, + /** + * IPC Device -> Host, HWS notify index entry of log buffer written + * @see vpu_ipc_msg_payload_hws_scheduling_log_notification + */ + VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219, + /** + * IPC Device -> Host, HWS completion of a context suspend request + * @see vpu_ipc_msg_payload_hws_suspend_cmdq + */ + VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a, + /** + * Response to control command: Resume command queue + * @see vpu_ipc_msg_payload_hws_resume_cmdq + */ + VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b, + /** + * Response to control command: Resume engine command response + * @see vpu_ipc_msg_payload_hws_resume_engine + */ + VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c, + /** + * Response to control command: Enable survivability/DCT mode + * This command has no payload + */ + VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d, + /** + * Response to control command: Disable survivability/DCT mode + * This command has no payload + */ + VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e, + /** + * Response to state dump control command. + * This command has no payload. + * NOTE: Please introduce new ASYNC responses before this one. + */ + VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, + /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, /** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */ VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301, + /** + * Acknowledgment of completion of the save procedure initiated by + * VPU_JSM_MSG_PWR_D0I3_ENTER + */ + VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302, }; enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED }; -/* - * Host <-> LRT IPC message payload definitions +/** + * Engine reset request payload + * @see VPU_JSM_MSG_ENGINE_RESET */ struct vpu_ipc_msg_payload_engine_reset { - /* Engine to be reset. */ + /** Engine to be reset. */ u32 engine_idx; + /** Reserved */ + u32 reserved_0; }; +/** + * Engine preemption request struct + * @see VPU_JSM_MSG_ENGINE_PREEMPT + */ struct vpu_ipc_msg_payload_engine_preempt { - /* Engine to be preempted. */ + /** Engine to be preempted. */ u32 engine_idx; - /* ID of the preemption request. */ + /** ID of the preemption request. */ u32 preempt_id; }; -/* - * @brief Register doorbell command structure. +/** + * Register doorbell command structure. * This structure supports doorbell registration for only OS scheduling. * @see VPU_JSM_MSG_REGISTER_DB */ struct vpu_ipc_msg_payload_register_db { - /* Index of the doorbell to register. */ + /** Index of the doorbell to register. */ u32 db_idx; - /* Virtual address in Global GTT pointing to the start of job queue. */ + /** Reserved */ + u32 reserved_0; + /** Virtual address in Global GTT pointing to the start of job queue. */ u64 jobq_base; - /* Size of the job queue in bytes. */ + /** Size of the job queue in bytes. */ u32 jobq_size; - /* Host sub-stream ID for the context assigned to the doorbell. */ + /** Host sub-stream ID for the context assigned to the doorbell. */ u32 host_ssid; }; /** - * @brief Unregister doorbell command structure. + * Unregister doorbell command structure. * Request structure to unregister a doorbell for both HW and OS scheduling. * @see VPU_JSM_MSG_UNREGISTER_DB */ struct vpu_ipc_msg_payload_unregister_db { - /* Index of the doorbell to unregister. */ + /** Index of the doorbell to unregister. */ u32 db_idx; + /** Reserved */ + u32 reserved_0; }; +/** + * Heartbeat request structure + * @see VPU_JSM_MSG_QUERY_ENGINE_HB + */ struct vpu_ipc_msg_payload_query_engine_hb { - /* Engine to return heartbeat value. */ + /** Engine to return heartbeat value. */ u32 engine_idx; + /** Reserved */ + u32 reserved_0; }; struct vpu_ipc_msg_payload_power_level { @@ -371,11 +862,19 @@ struct vpu_ipc_msg_payload_power_level { * considered to be valid. */ u32 power_level; + /* Reserved */ + u32 reserved_0; }; +/** + * Structure for requesting ssid release + * @see VPU_JSM_MSG_SSID_RELEASE + */ struct vpu_ipc_msg_payload_ssid_release { - /* Host sub-stream ID for the context to be released. */ + /** Host sub-stream ID for the context to be released. */ u32 host_ssid; + /** Reserved */ + u32 reserved_0; }; /** @@ -400,7 +899,7 @@ struct vpu_jsm_metric_streamer_start { u64 sampling_rate; /** * If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message - * after every @notify_sample_count samples is collected or dropped by the VPU. + * after every @ref notify_sample_count samples is collected or dropped by the VPU. * If set to UINT_MAX the VPU will only generate a notification when the metric * buffer is full. If set to 0 the VPU will never generate a notification. */ @@ -410,9 +909,9 @@ struct vpu_jsm_metric_streamer_start { * Address and size of the buffer where the VPU will write metric data. The * VPU writes all counters from enabled metric groups one after another. If * there is no space left to write data at the next sample period the VPU - * will switch to the next buffer (@see next_buffer_addr) and will optionally - * send a notification to the host driver if @notify_sample_count is non-zero. - * If @next_buffer_addr is NULL the VPU will stop collecting metric data. + * will switch to the next buffer (@ref next_buffer_addr) and will optionally + * send a notification to the host driver if @ref notify_sample_count is non-zero. + * If @ref next_buffer_addr is NULL the VPU will stop collecting metric data. */ u64 buffer_addr; u64 buffer_size; @@ -425,9 +924,6 @@ struct vpu_jsm_metric_streamer_start { u64 next_buffer_size; }; -static_assert(sizeof(struct vpu_jsm_metric_streamer_start) % 8 == 0, - "vpu_jsm_metric_streamer_start is misaligned"); - /** * @brief Metric streamer stop command structure. * @see VPU_JSM_MSG_METRIC_STREAMER_STOP @@ -437,9 +933,6 @@ struct vpu_jsm_metric_streamer_stop { u64 metric_group_mask; }; -static_assert(sizeof(struct vpu_jsm_metric_streamer_stop) % 8 == 0, - "vpu_jsm_metric_streamer_stop is misaligned"); - /** * Provide VPU FW with buffers to write metric data. * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE @@ -448,12 +941,22 @@ struct vpu_jsm_metric_streamer_update { /** Metric group mask that identifies metric streamer instance. */ u64 metric_group_mask; /** - * Address and size of the buffer where the VPU will write metric data. If - * the buffer address is 0 or same as the currently used buffer the VPU will - * continue writing metric data to the current buffer. In this case the - * buffer size is ignored and the size of the current buffer is unchanged. - * If the address is non-zero and differs from the current buffer address the - * VPU will immediately switch data collection to the new buffer. + * Address and size of the buffer where the VPU will write metric data. + * This member dictates how the update operation should perform: + * 1. client needs information about the number of collected samples and the + * amount of data written to the current buffer + * 2. client wants to switch to a new buffer + * + * Case 1. is identified by the buffer address being 0 or the same as the + * currently used buffer address. In this case the buffer size is ignored and + * the size of the current buffer is unchanged. The VPU will return an update + * in the vpu_jsm_metric_streamer_done structure. The internal writing position + * into the buffer is not changed. + * + * Case 2. is identified by the address being non-zero and differs from the + * current buffer address. The VPU will immediately switch data collection to + * the new buffer. Then the VPU will return an update in the + * vpu_jsm_metric_streamer_done structure. */ u64 buffer_addr; u64 buffer_size; @@ -471,56 +974,80 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; }; -static_assert(sizeof(struct vpu_jsm_metric_streamer_update) % 8 == 0, - "vpu_jsm_metric_streamer_update is misaligned"); - -struct vpu_ipc_msg_payload_blob_deinit { - /* 64-bit unique ID for the blob to be de-initialized. */ - u64 blob_id; -}; - +/** + * Device -> host job completion message. + * @see VPU_JSM_MSG_JOB_DONE + */ struct vpu_ipc_msg_payload_job_done { - /* Engine to which the job was submitted. */ + /** Engine to which the job was submitted. */ u32 engine_idx; - /* Index of the doorbell to which the job was submitted */ + /** Index of the doorbell to which the job was submitted */ u32 db_idx; - /* ID of the completed job */ + /** ID of the completed job */ u32 job_id; - /* Status of the completed job */ + /** Status of the completed job */ u32 job_status; - /* Host SSID */ + /** Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; - /* Command queue id */ + /** Zero Padding */ + u32 reserved_0; + /** Command queue id */ + u64 cmdq_id; +}; + +/** + * Notification message upon native fence signalling. + * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED + */ +struct vpu_ipc_msg_payload_native_fence_signalled { + /** Engine ID. */ + u32 engine_idx; + /** Host SSID. */ + u32 host_ssid; + /** CMDQ ID */ u64 cmdq_id; + /** Fence object handle. */ + u64 fence_handle; }; +/** + * vpu_ipc_msg_payload_engine_reset_done will contain an array of this structure + * which contains which queues caused reset if FW was able to detect any error. + * @see vpu_ipc_msg_payload_engine_reset_done + */ struct vpu_jsm_engine_reset_context { - /* Host SSID */ + /** Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; - /* Command queue id */ + /** Zero Padding */ + u32 reserved_0; + /** Command queue id */ u64 cmdq_id; - /* Flags: 0: cause of hang; 1: collateral damage of reset */ + /** See VPU_ENGINE_RESET_CONTEXT_* defines */ u64 flags; }; +/** + * Engine reset response. + * @see VPU_JSM_MSG_ENGINE_RESET_DONE + */ struct vpu_ipc_msg_payload_engine_reset_done { - /* Engine ordinal */ + /** Engine ordinal */ u32 engine_idx; - /* Number of impacted contexts */ + /** Number of impacted contexts */ u32 num_impacted_contexts; - /* Array of impacted command queue ids and their flags */ + /** Array of impacted command queue ids and their flags */ struct vpu_jsm_engine_reset_context impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS]; }; +/** + * Preemption response struct + * @see VPU_JSM_MSG_ENGINE_PREEMPT_DONE + */ struct vpu_ipc_msg_payload_engine_preempt_done { - /* Engine preempted. */ + /** Engine preempted. */ u32 engine_idx; - /* ID of the preemption request. */ + /** ID of the preemption request. */ u32 preempt_id; }; @@ -533,6 +1060,8 @@ struct vpu_ipc_msg_payload_engine_preempt_done { struct vpu_ipc_msg_payload_register_db_done { /* Index of the registered doorbell. */ u32 db_idx; + /* Reserved */ + u32 reserved_0; }; /** @@ -543,12 +1072,20 @@ struct vpu_ipc_msg_payload_register_db_done { struct vpu_ipc_msg_payload_unregister_db_done { /* Index of the unregistered doorbell. */ u32 db_idx; + /* Reserved */ + u32 reserved_0; }; +/** + * Structure for heartbeat response + * @see VPU_JSM_MSG_QUERY_ENGINE_HB_DONE + */ struct vpu_ipc_msg_payload_query_engine_hb_done { - /* Engine returning heartbeat value. */ + /** Engine returning heartbeat value. */ u32 engine_idx; - /* Heartbeat value. */ + /** Reserved */ + u32 reserved_0; + /** Heartbeat value. */ u64 heartbeat; }; @@ -559,6 +1096,8 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { * implementations. */ u32 power_level_count; + /* Reserved */ + u32 reserved_0; /** * Power consumption limit for each supported power level in * [0-100%] range relative to power level 0. @@ -566,99 +1105,140 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; }; -struct vpu_ipc_msg_payload_blob_deinit_done { - /* 64-bit unique ID for the blob de-initialized. */ - u64 blob_id; -}; - -/* HWS priority band setup request / response */ +/** + * HWS priority band setup request / response + * @see VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP + */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* * Grace period in 100ns units when preempting another priority band for * this priority band */ - u64 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; /* * Default quantum in 100ns units for scheduling across processes * within a priority band + * Minimum value supported by NPU is 1ms (10000 in 100ns units). */ - u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* * Default grace period in 100ns units for processes that preempt each * other within a priority band */ - u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; /* * For normal priority band, specifies the target VPU percentage * in situations when it's starved by the focus band. */ u32 normal_band_percentage; + /* + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. + */ + u32 tdr_timeout; + /* Non-interactive queue timeout for no progress of heartbeat in milliseconds. + * Default value of 0 meaning no timeout. + */ + u32 non_interactive_no_progress_timeout; + /* + * Non-interactive queue upper limit timeout value in milliseconds. Default + * value of 0 meaning no timeout. + */ + u32 non_interactive_timeout; }; -/* HWS create command queue request */ +/** + * @brief HWS create command queue request. + * Host will create a command queue via this command. + * Note: Cmdq group is a handle of an object which + * may contain one or more command queues. + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE + */ struct vpu_ipc_msg_payload_hws_create_cmdq { /* Process id */ u64 process_id; /* Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; + /* Engine for which queue is being created */ + u32 engine_idx; + /* Cmdq group: only used for HWS logging of state changes */ + u64 cmdq_group; /* Command queue id */ u64 cmdq_id; /* Command queue base */ u64 cmdq_base; /* Command queue size */ u32 cmdq_size; + /* Zero padding */ + u32 reserved_0; }; -/* HWS create command queue response */ +/** + * HWS create command queue response. + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP + */ struct vpu_ipc_msg_payload_hws_create_cmdq_rsp { - /* Process id */ + /** Process id */ u64 process_id; - /* Host SSID */ + /** Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; - /* Command queue id */ + /** Engine for which queue is being created */ + u32 engine_idx; + /** Command queue group */ + u64 cmdq_group; + /** Command queue id */ u64 cmdq_id; }; -/* HWS destroy command queue request / response */ +/** + * HWS destroy command queue request / response + * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE + * @see VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP + */ struct vpu_ipc_msg_payload_hws_destroy_cmdq { - /* Host SSID */ + /** Host SSID */ u32 host_ssid; - /* Zero Padding */ + /** Zero Padding */ u32 reserved; - /* Command queue id */ + /** Command queue id */ u64 cmdq_id; }; -/* HWS set context scheduling properties request / response */ +/** + * HWS set context scheduling properties request / response + * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES + * @see VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP + */ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { - /* Host SSID */ + /** Host SSID */ u32 host_ssid; - /* Zero Padding */ + /** Zero Padding */ u32 reserved_0; - /* Command queue id */ + /** Command queue id */ u64 cmdq_id; - /* Priority band to assign to work of this context */ + /** + * Priority band to assign to work of this context. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ u32 priority_band; - /* Inside realtime band assigns a further priority */ + /** Inside realtime band assigns a further priority */ u32 realtime_priority_level; - /* Priority relative to other contexts in the same process */ - u32 in_process_priority; - /* Zero padding / Reserved */ + /** Priority relative to other contexts in the same process */ + s32 in_process_priority; + /** Zero padding / Reserved */ u32 reserved_1; - /* Context quantum relative to other contexts of same priority in the same process */ + /** + * Context quantum relative to other contexts of same priority in the same process + * Minimum value supported by NPU is 1ms (10000 in 100ns units). + */ u64 context_quantum; - /* Grace period when preempting context of the same priority within the same process */ + /** Grace period when preempting context of the same priority within the same process */ u64 grace_period_same_priority; - /* Grace period when preempting context of a lower priority within the same process */ + /** Grace period when preempting context of a lower priority within the same process */ u64 grace_period_lower_priority; }; -/* - * @brief Register doorbell command structure. +/** + * Register doorbell command structure. * This structure supports doorbell registration for both HW and OS scheduling. * Note: Queue base and size are added here so that the same structure can be used for * OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored @@ -667,19 +1247,142 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { * @see VPU_JSM_MSG_HWS_REGISTER_DB */ struct vpu_jsm_hws_register_db { - /* Index of the doorbell to register. */ + /** Index of the doorbell to register. */ u32 db_id; - /* Host sub-stream ID for the context assigned to the doorbell. */ + /** Host sub-stream ID for the context assigned to the doorbell. */ u32 host_ssid; - /* ID of the command queue associated with the doorbell. */ + /** ID of the command queue associated with the doorbell. */ u64 cmdq_id; - /* Virtual address pointing to the start of command queue. */ + /** Virtual address pointing to the start of command queue. */ u64 cmdq_base; - /* Size of the command queue in bytes. */ + /** Size of the command queue in bytes. */ u64 cmdq_size; }; /** + * Structure to set another buffer to be used for scheduling-related logging. + * The size of the logging buffer and the number of entries is defined as part of the + * buffer itself as described next. + * The log buffer received from the host is made up of; + * - header: 32 bytes in size, as shown in @ref vpu_hws_log_buffer_header. + * The header contains the number of log entries in the buffer. + * - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in + * @ref vpu_hws_log_buffer_entry. + * The entry contains the VPU timestamp, operation type and data. + * The host should provide the notify index value of log buffer to VPU. This is a + * value defined within the log buffer and when written to will generate the + * scheduling log notification. + * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging + * for a particular engine. + * VPU will handle one log buffer for each of supported engines. + * VPU should allow the logging to consume one host_ssid. + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP + * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + */ +struct vpu_ipc_msg_payload_hws_set_scheduling_log { + /** Engine ordinal */ + u32 engine_idx; + /** Host SSID */ + u32 host_ssid; + /** + * VPU log buffer virtual address. + * Set to 0 to disable logging for this engine. + */ + u64 vpu_log_buffer_va; + /** + * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + * is generated when an event log is written to this index. + */ + u64 notify_index; + /** + * Field is now deprecated, will be removed when KMD is updated to support removal + */ + u32 enable_extra_events; + /** Zero Padding */ + u32 reserved_0; +}; + +/** + * The scheduling log notification is generated by VPU when it writes + * an event into the log buffer at the notify_index. VPU notifies host with + * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous + * message from VPU to host. + * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG + */ +struct vpu_ipc_msg_payload_hws_scheduling_log_notification { + /** Engine ordinal */ + u32 engine_idx; + /** Zero Padding */ + u32 reserved_0; +}; + +/** + * HWS suspend command queue request and done structure. + * Host will request the suspend of contexts and VPU will; + * - Suspend all work on this context + * - Preempt any running work + * - Asynchronously perform the above and return success immediately once + * all items above are started successfully + * - Notify the host of completion of these operations via + * VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE + * - Reject any other context operations on a context with an in-flight + * suspend request running + * Same structure used when VPU notifies host of completion of a context suspend + * request. The ids and suspend fence value reported in this command will match + * the one in the request from the host to suspend the context. Once suspend is + * complete, VPU will not access any data relating to this command queue until + * it is resumed. + * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ + * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE + */ +struct vpu_ipc_msg_payload_hws_suspend_cmdq { + /** Host SSID */ + u32 host_ssid; + /** Zero Padding */ + u32 reserved_0; + /** Command queue id */ + u64 cmdq_id; + /** + * Suspend fence value - reported by the VPU suspend context + * completed once suspend is complete. + */ + u64 suspend_fence_value; +}; + +/** + * HWS Resume command queue request / response structure. + * Host will request the resume of a context; + * - VPU will resume all work on this context + * - Scheduler will allow this context to be scheduled + * @see VPU_JSM_MSG_HWS_RESUME_CMDQ + * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP + */ +struct vpu_ipc_msg_payload_hws_resume_cmdq { + /** Host SSID */ + u32 host_ssid; + /** Zero Padding */ + u32 reserved_0; + /** Command queue id */ + u64 cmdq_id; +}; + +/** + * HWS Resume engine request / response structure. + * After a HWS engine reset, all scheduling is stopped on VPU until an engine resume. + * Host shall send this command to resume scheduling of any valid queue. + * @see VPU_JSM_MSG_HWS_ENGINE_RESUME + * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE + */ +struct vpu_ipc_msg_payload_hws_resume_engine { + /** Engine to be resumed */ + u32 engine_idx; + /** Reserved */ + u32 reserved_0; +}; + +/** * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages. * @@ -806,13 +1509,10 @@ struct vpu_jsm_metric_streamer_done { u64 bytes_written; }; -static_assert(sizeof(struct vpu_jsm_metric_streamer_done) % 8 == 0, - "vpu_jsm_metric_streamer_done is misaligned"); - /** * Metric group description placed in the metric buffer after successful completion * of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more - * @vpu_jsm_metric_counter_descriptor records. + * @ref vpu_jsm_metric_counter_descriptor records. * @see VPU_JSM_MSG_METRIC_STREAMER_INFO */ struct vpu_jsm_metric_group_descriptor { @@ -848,16 +1548,13 @@ struct vpu_jsm_metric_group_descriptor { u32 name_string_size; /** Counter description string size, @see name_string_size */ u32 description_string_size; - u32 reserved_0[2]; + u64 reserved_0; /** * Right after this structure, the VPU writes name and description of * the metric group. */ }; -static_assert(sizeof(struct vpu_jsm_metric_group_descriptor) % 8 == 0, - "vpu_jsm_metric_group_descriptor is misaligned"); - /** * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor. * @see VPU_JSM_MSG_METRIC_STREAMER_INFO @@ -894,44 +1591,65 @@ struct vpu_jsm_metric_counter_descriptor { u32 component_string_size; /** Counter string size, @see name_string_size */ u32 units_string_size; - u32 reserved_0[2]; + u64 reserved_0; /** * Right after this structure, the VPU writes name, description * component and unit strings. */ }; -static_assert(sizeof(struct vpu_jsm_metric_counter_descriptor) % 8 == 0, - "vpu_jsm_metric_counter_descriptor is misaligned"); - /** - * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests. + * Payload for @ref VPU_JSM_MSG_DYNDBG_CONTROL requests. * - * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug - * feature, which allows developers to selectively enable / disable MVLOG_DEBUG - * messages. This is equivalent to the Dynamic Debug functionality provided by - * Linux - * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html) - * The host can control Dynamic Debug behavior by sending dyndbg commands, which - * have the same syntax as Linux - * dyndbg commands. + * VPU_JSM_MSG_DYNDBG_CONTROL requests are used to control the VPU FW dynamic debug + * feature, which allows developers to selectively enable/disable code to obtain + * additional FW information. This is equivalent to the dynamic debug functionality + * provided by Linux. The host can control dynamic debug behavior by sending dyndbg + * commands, using the same syntax as for Linux dynamic debug commands. * - * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host - * still has to set the logging level to MVLOG_DEBUG, using the - * VPU_JSM_MSG_TRACE_SET_CONFIG command. + * @see https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html. * - * The host can see the current dynamic debug configuration by executing a - * special 'show' command. The dyndbg configuration will be printed to the - * configured logging destination using MVLOG_INFO logging level. + * NOTE: + * As the dynamic debug feature uses MVLOG messages to provide information, the host + * must first set the logging level to MVLOG_DEBUG, using the @ref VPU_JSM_MSG_TRACE_SET_CONFIG + * command. */ struct vpu_ipc_msg_payload_dyndbg_control { /** - * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated - * string. + * Dyndbg command to be executed. */ char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN]; }; +/** + * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER + * + * This is a bi-directional payload. + */ +struct vpu_ipc_msg_payload_pwr_d0i3_enter { + /** + * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver + * The driver will poll for D0i2 Idle state transitions. + * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete + */ + u32 send_response; + u32 reserved_0; +}; + +/** + * Payload for @ref VPU_JSM_MSG_DCT_ENABLE message. + * + * Default values for DCT active/inactive times are 5.3ms and 30ms respectively, + * corresponding to a 85% duty cycle. This payload allows the host to tune these + * values according to application requirements. + */ +struct vpu_ipc_msg_payload_pwr_dct_control { + /** Duty cycle active time in microseconds */ + u32 dct_active_us; + /** Duty cycle inactive time in microseconds */ + u32 dct_inactive_us; +}; + /* * Payloads union, used to define complete message format. */ @@ -945,10 +1663,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; - struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -956,7 +1674,6 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; - struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name; @@ -968,27 +1685,37 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq; struct vpu_ipc_msg_payload_hws_set_context_sched_properties hws_set_context_sched_properties; + struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log; + struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification; + struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq; + struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq; + struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine; + struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter; + struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control; }; -/* - * Host <-> LRT IPC message base structure. +/** + * Host <-> NPU IPC message base structure. * * NOTE: All instances of this object must be aligned on a 64B boundary * to allow proper handling of VPU cache operations. */ struct vpu_jsm_msg { - /* Message type, see vpu_ipc_msg_type enum. */ + /** Reserved */ + u64 reserved_0; + /** Message type, see @ref vpu_ipc_msg_type. */ u32 type; - /* Buffer status, see vpu_ipc_msg_status enum. */ + /** Buffer status, see @ref vpu_ipc_msg_status. */ u32 status; - /* + /** * Request ID, provided by the host in a request message and passed * back by VPU in the response message. */ u32 request_id; - /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */ + /** Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */ u32 result; - /* Message payload depending on message type, see vpu_ipc_msg_payload union. */ + u64 reserved_1; + /** Message payload depending on message type, see vpu_ipc_msg_payload union. */ union vpu_ipc_msg_payload payload; }; diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig new file mode 100644 index 000000000000..116e42d152ca --- /dev/null +++ b/drivers/accel/qaic/Kconfig @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Qualcomm Cloud AI accelerators driver +# + +config DRM_ACCEL_QAIC + tristate "Qualcomm Cloud AI accelerators" + depends on DRM_ACCEL + depends on PCI && HAS_IOMEM + depends on MHI_BUS + select CRC32 + select WANT_DEV_COREDUMP + help + Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are + designed to accelerate Deep Learning inference workloads. + + The driver manages the PCIe devices and provides an IOCTL interface + for users to submit workloads to the devices. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called qaic. diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile new file mode 100644 index 000000000000..71f727b74da3 --- /dev/null +++ b/drivers/accel/qaic/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for Qualcomm Cloud AI accelerators driver +# + +obj-$(CONFIG_DRM_ACCEL_QAIC) := qaic.o + +qaic-y := \ + mhi_controller.o \ + qaic_control.o \ + qaic_data.o \ + qaic_drv.o \ + qaic_ras.o \ + qaic_ssr.o \ + qaic_sysfs.o \ + qaic_timesync.o \ + sahara.o + +qaic-$(CONFIG_DEBUG_FS) += qaic_debugfs.o diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c new file mode 100644 index 000000000000..13a14c6c6168 --- /dev/null +++ b/drivers/accel/qaic/mhi_controller.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/memblock.h> +#include <linux/mhi.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/sizes.h> + +#include "mhi_controller.h" +#include "qaic.h" + +#define MAX_RESET_TIME_SEC 25 + +static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */ +module_param(mhi_timeout_ms, uint, 0600); +MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value"); + +static const char *fw_image_paths[FAMILY_MAX] = { + [FAMILY_AIC100] = "qcom/aic100/sbl.bin", + [FAMILY_AIC200] = "qcom/aic200/sbl.bin", +}; + +static const struct mhi_channel_config aic100_channels[] = { + { + .name = "QAIC_LOOPBACK", + .num = 0, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOOPBACK", + .num = 1, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 2, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 3, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DIAG", + .num = 4, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DIAG", + .num = 5, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 6, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 7, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_QDSS", + .num = 8, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_QDSS", + .num = 9, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 10, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 11, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 12, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 13, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 14, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 15, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 16, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 17, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DEBUG", + .num = 18, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DEBUG", + .num = 19, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC", + .num = 20, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC", + .num = 21, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 22, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 23, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 24, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 25, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = true, + .wake_capable = false, + }, +}; + +static const struct mhi_channel_config aic200_channels[] = { + { + .name = "QAIC_LOOPBACK", + .num = 0, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOOPBACK", + .num = 1, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 2, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 3, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 6, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 7, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 10, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 11, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 12, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 13, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 14, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 15, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 16, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 17, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 22, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC_PERIODIC", + .num = 23, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 24, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 25, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = true, + .wake_capable = false, + }, +}; + +static struct mhi_event_config aic100_events[] = { + { + .num_elements = 32, + .irq_moderation_ms = 0, + .irq = 0, + .channel = U32_MAX, + .priority = 1, + .mode = MHI_DB_BRST_DISABLE, + .data_type = MHI_ER_CTRL, + .hardware_event = false, + .client_managed = false, + .offload_channel = false, + }, +}; + +static struct mhi_event_config aic200_events[] = { + { + .num_elements = 32, + .irq_moderation_ms = 0, + .irq = 0, + .channel = U32_MAX, + .priority = 1, + .mode = MHI_DB_BRST_DISABLE, + .data_type = MHI_ER_CTRL, + .hardware_event = false, + .client_managed = false, + .offload_channel = false, + }, +}; + +static struct mhi_controller_config mhi_cntrl_configs[] = { + [FAMILY_AIC100] = { + .max_channels = 128, + .timeout_ms = 0, /* controlled by mhi_timeout */ + .buf_len = 0, + .num_channels = ARRAY_SIZE(aic100_channels), + .ch_cfg = aic100_channels, + .num_events = ARRAY_SIZE(aic100_events), + .event_cfg = aic100_events, + .use_bounce_buf = false, + .m2_no_db = false, + }, + [FAMILY_AIC200] = { + .max_channels = 128, + .timeout_ms = 0, /* controlled by mhi_timeout */ + .buf_len = 0, + .num_channels = ARRAY_SIZE(aic200_channels), + .ch_cfg = aic200_channels, + .num_events = ARRAY_SIZE(aic200_events), + .event_cfg = aic200_events, + .use_bounce_buf = false, + .m2_no_db = false, + }, +}; + +static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out) +{ + u32 tmp; + + /* + * SOC_HW_VERSION quirk + * The SOC_HW_VERSION register (offset 0x224) is not reliable and + * may contain uninitialized values, including 0xFFFFFFFF. This could + * cause a false positive link down error. Instead, intercept any + * reads and provide the correct value of the register. + */ + if (addr - mhi_cntrl->regs == 0x224) { + *out = 0x60110200; + return 0; + } + + tmp = readl_relaxed(addr); + if (tmp == U32_MAX) + return -EIO; + + *out = tmp; + + return 0; +} + +static void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 val) +{ + writel_relaxed(val, addr); +} + +static int mhi_runtime_get(struct mhi_controller *mhi_cntrl) +{ + return 0; +} + +static void mhi_runtime_put(struct mhi_controller *mhi_cntrl) +{ +} + +static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback reason) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_cntrl->cntrl_dev)); + + /* this event occurs in atomic context */ + if (reason == MHI_CB_FATAL_ERROR) + pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n"); + /* this event occurs in non-atomic context */ + if (reason == MHI_CB_SYS_ERROR) + qaic_dev_reset_clean_local_state(qdev); +} + +static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl) +{ + u8 time_sec = 1; + int current_ee; + int ret; + + /* Reset the device to bring the device in PBL EE */ + mhi_soc_reset(mhi_cntrl); + + /* + * Keep checking the execution environment(EE) after every 1 second + * interval. + */ + do { + msleep(1000); + current_ee = mhi_get_exec_env(mhi_cntrl); + } while (current_ee != MHI_EE_PBL && time_sec++ <= MAX_RESET_TIME_SEC); + + /* If the device is in PBL EE retry power up */ + if (current_ee == MHI_EE_PBL) + ret = mhi_async_power_up(mhi_cntrl); + else + ret = -EIO; + + return ret; +} + +struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, + int mhi_irq, bool shared_msi, int family) +{ + struct mhi_controller_config mhi_config = mhi_cntrl_configs[family]; + struct mhi_controller *mhi_cntrl; + int ret; + + mhi_cntrl = devm_kzalloc(&pci_dev->dev, sizeof(*mhi_cntrl), GFP_KERNEL); + if (!mhi_cntrl) + return ERR_PTR(-ENOMEM); + + mhi_cntrl->cntrl_dev = &pci_dev->dev; + + /* + * Covers the entire possible physical ram region. Remote side is + * going to calculate a size of this range, so subtract 1 to prevent + * rollover. + */ + mhi_cntrl->iova_start = 0; + mhi_cntrl->iova_stop = PHYS_ADDR_MAX - 1; + mhi_cntrl->status_cb = mhi_status_cb; + mhi_cntrl->runtime_get = mhi_runtime_get; + mhi_cntrl->runtime_put = mhi_runtime_put; + mhi_cntrl->read_reg = mhi_read_reg; + mhi_cntrl->write_reg = mhi_write_reg; + mhi_cntrl->regs = mhi_bar; + mhi_cntrl->reg_len = SZ_4K; + mhi_cntrl->nr_irqs = 1; + mhi_cntrl->irq = devm_kmalloc(&pci_dev->dev, sizeof(*mhi_cntrl->irq), GFP_KERNEL); + + if (!mhi_cntrl->irq) + return ERR_PTR(-ENOMEM); + + mhi_cntrl->irq[0] = mhi_irq; + + if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */ + mhi_cntrl->irq_flags = IRQF_SHARED; + + mhi_cntrl->fw_image = fw_image_paths[family]; + + if (family == FAMILY_AIC200) { + mhi_cntrl->name = "AIC200"; + mhi_cntrl->seg_len = SZ_512K; + } else { + mhi_cntrl->name = "AIC100"; + } + + /* use latest configured timeout */ + mhi_config.timeout_ms = mhi_timeout_ms; + ret = mhi_register_controller(mhi_cntrl, &mhi_config); + if (ret) { + pci_err(pci_dev, "mhi_register_controller failed %d\n", ret); + return ERR_PTR(ret); + } + + ret = mhi_prepare_for_power_up(mhi_cntrl); + if (ret) { + pci_err(pci_dev, "mhi_prepare_for_power_up failed %d\n", ret); + goto prepare_power_up_fail; + } + + ret = mhi_async_power_up(mhi_cntrl); + /* + * If EIO is returned it is possible that device is in SBL EE, which is + * undesired. SOC reset the device and try to power up again. + */ + if (ret == -EIO && MHI_EE_SBL == mhi_get_exec_env(mhi_cntrl)) { + pci_err(pci_dev, "Found device in SBL at MHI init. Attempting a reset.\n"); + ret = mhi_reset_and_async_power_up(mhi_cntrl); + } + + if (ret) { + pci_err(pci_dev, "mhi_async_power_up failed %d\n", ret); + goto power_up_fail; + } + + return mhi_cntrl; + +power_up_fail: + mhi_unprepare_after_power_down(mhi_cntrl); +prepare_power_up_fail: + mhi_unregister_controller(mhi_cntrl); + return ERR_PTR(ret); +} + +void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up) +{ + mhi_power_down(mhi_cntrl, link_up); + mhi_unprepare_after_power_down(mhi_cntrl); + mhi_unregister_controller(mhi_cntrl); +} + +void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl) +{ + mhi_power_down(mhi_cntrl, true); +} + +void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl) +{ + struct pci_dev *pci_dev = container_of(mhi_cntrl->cntrl_dev, struct pci_dev, dev); + int ret; + + ret = mhi_async_power_up(mhi_cntrl); + if (ret) + pci_err(pci_dev, "mhi_async_power_up failed after reset %d\n", ret); +} diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h new file mode 100644 index 000000000000..8939f6ae185e --- /dev/null +++ b/drivers/accel/qaic/mhi_controller.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef MHICONTROLLERQAIC_H_ +#define MHICONTROLLERQAIC_H_ + +struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, + int mhi_irq, bool shared_msi, int family); +void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up); +void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl); +void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl); + +#endif /* MHICONTROLLERQAIC_H_ */ diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h new file mode 100644 index 000000000000..fa7a8155658c --- /dev/null +++ b/drivers/accel/qaic/qaic.h @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QAIC_H_ +#define _QAIC_H_ + +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/mhi.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/srcu.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> + +#define QAIC_DBC_BASE SZ_128K +#define QAIC_DBC_SIZE SZ_4K +#define QAIC_SSR_DBC_SENTINEL U32_MAX /* No ongoing SSR sentinel */ + +#define QAIC_NO_PARTITION -1 + +#define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) + +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) +#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm) +#define to_drm(qddev) (&(qddev)->drm) +#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */ +#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev) + +enum aic_families { + FAMILY_AIC100, + FAMILY_AIC200, + FAMILY_MAX, +}; + +enum __packed dev_states { + /* Device is offline or will be very soon */ + QAIC_OFFLINE, + /* Device is booting, not clear if it's in a usable state */ + QAIC_BOOT, + /* Device is fully operational */ + QAIC_ONLINE, +}; + +enum dbc_states { + /* DBC is free and can be activated */ + DBC_STATE_IDLE, + /* DBC is activated and a workload is running on device */ + DBC_STATE_ASSIGNED, + /* Sub-system associated with this workload has crashed and it will shutdown soon */ + DBC_STATE_BEFORE_SHUTDOWN, + /* Sub-system associated with this workload has crashed and it has shutdown */ + DBC_STATE_AFTER_SHUTDOWN, + /* Sub-system associated with this workload is shutdown and it will be powered up soon */ + DBC_STATE_BEFORE_POWER_UP, + /* Sub-system associated with this workload is now powered up */ + DBC_STATE_AFTER_POWER_UP, + DBC_STATE_MAX, +}; + +extern bool datapath_polling; + +struct qaic_user { + /* Uniquely identifies this user for the device */ + int handle; + struct kref ref_count; + /* Char device opened by this user */ + struct qaic_drm_device *qddev; + /* Node in list of users that opened this drm device */ + struct list_head node; + /* SRCU used to synchronize this user during cleanup */ + struct srcu_struct qddev_lock; + atomic_t chunk_id; +}; + +struct dma_bridge_chan { + /* Pointer to device strcut maintained by driver */ + struct qaic_device *qdev; + /* ID of this DMA bridge channel(DBC) */ + unsigned int id; + /* Synchronizes access to xfer_list */ + spinlock_t xfer_lock; + /* Base address of request queue */ + void *req_q_base; + /* Base address of response queue */ + void *rsp_q_base; + /* + * Base bus address of request queue. Response queue bus address can be + * calculated by adding request queue size to this variable + */ + dma_addr_t dma_addr; + /* Total size of request and response queue in byte */ + u32 total_size; + /* Capacity of request/response queue */ + u32 nelem; + /* The user that opened this DBC */ + struct qaic_user *usr; + /* + * Request ID of next memory handle that goes in request queue. One + * memory handle can enqueue more than one request elements, all + * this requests that belong to same memory handle have same request ID + */ + u16 next_req_id; + /* true: DBC is in use; false: DBC not in use */ + bool in_use; + /* + * Base address of device registers. Used to read/write request and + * response queue's head and tail pointer of this DBC. + */ + void __iomem *dbc_base; + /* Synchronizes access to Request queue's head and tail pointer */ + struct mutex req_lock; + /* Head of list where each node is a memory handle queued in request queue */ + struct list_head xfer_list; + /* Synchronizes DBC readers during cleanup */ + struct srcu_struct ch_lock; + /* + * When this DBC is released, any thread waiting on this wait queue is + * woken up + */ + wait_queue_head_t dbc_release; + /* Head of list where each node is a bo associated with this DBC */ + struct list_head bo_lists; + /* The irq line for this DBC. Used for polling */ + unsigned int irq; + /* Polling work item to simulate interrupts */ + struct work_struct poll_work; + /* Represents various states of this DBC from enum dbc_states */ + unsigned int state; +}; + +struct qaic_device { + /* Pointer to base PCI device struct of our physical device */ + struct pci_dev *pdev; + /* Req. ID of request that will be queued next in MHI control device */ + u32 next_seq_num; + /* Base address of the MHI bar */ + void __iomem *bar_mhi; + /* Base address of the DBCs bar */ + void __iomem *bar_dbc; + /* Controller structure for MHI devices */ + struct mhi_controller *mhi_cntrl; + /* MHI control channel device */ + struct mhi_device *cntl_ch; + /* List of requests queued in MHI control device */ + struct list_head cntl_xfer_list; + /* Synchronizes MHI control device transactions and its xfer list */ + struct mutex cntl_mutex; + /* Array of DBC struct of this device */ + struct dma_bridge_chan *dbc; + /* Work queue for tasks related to MHI control device */ + struct workqueue_struct *cntl_wq; + /* Synchronizes all the users of device during cleanup */ + struct srcu_struct dev_lock; + /* Track the state of the device during resets */ + enum dev_states dev_state; + /* true: single MSI is used to operate device */ + bool single_msi; + /* + * true: A tx MHI transaction has failed and a rx buffer is still queued + * in control device. Such a buffer is considered lost rx buffer + * false: No rx buffer is lost in control device + */ + bool cntl_lost_buf; + /* Maximum number of DBC supported by this device */ + u32 num_dbc; + /* Reference to the drm_device for this device when it is created */ + struct qaic_drm_device *qddev; + /* Generate the CRC of a control message */ + u32 (*gen_crc)(void *msg); + /* Validate the CRC of a control message */ + bool (*valid_crc)(void *msg); + /* MHI "QAIC_TIMESYNC" channel device */ + struct mhi_device *qts_ch; + /* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */ + struct workqueue_struct *qts_wq; + /* MHI "QAIC_TIMESYNC_PERIODIC" channel device */ + struct mhi_device *mqts_ch; + /* Head of list of page allocated by MHI bootlog device */ + struct list_head bootlog; + /* MHI bootlog channel device */ + struct mhi_device *bootlog_ch; + /* Work queue for tasks related to MHI bootlog device */ + struct workqueue_struct *bootlog_wq; + /* Synchronizes access of pages in MHI bootlog device */ + struct mutex bootlog_mutex; + /* MHI RAS channel device */ + struct mhi_device *ras_ch; + /* Correctable error count */ + unsigned int ce_count; + /* Un-correctable error count */ + unsigned int ue_count; + /* Un-correctable non-fatal error count */ + unsigned int ue_nf_count; + /* MHI SSR channel device */ + struct mhi_device *ssr_ch; + /* Work queue for tasks related to MHI SSR device */ + struct workqueue_struct *ssr_wq; + /* Buffer to collect SSR crashdump via SSR MHI channel */ + void *ssr_mhi_buf; + /* DBC which is under SSR. Sentinel U32_MAX would mean that no SSR in progress */ + u32 ssr_dbc; +}; + +struct qaic_drm_device { + /* The drm device struct of this drm device */ + struct drm_device drm; + /* Pointer to the root device struct driven by this driver */ + struct qaic_device *qdev; + /* + * The physical device can be partition in number of logical devices. + * And each logical device is given a partition id. This member stores + * that id. QAIC_NO_PARTITION is a sentinel used to mark that this drm + * device is the actual physical device + */ + s32 partition_id; + /* Head in list of users who have opened this drm device */ + struct list_head users; + /* Synchronizes access to users list */ + struct mutex users_mutex; + /* Pointer to array of DBC sysfs attributes */ + void *sysfs_attrs; +}; + +struct qaic_bo { + struct drm_gem_object base; + /* Scatter/gather table for allocate/imported BO */ + struct sg_table *sgt; + /* Head in list of slices of this BO */ + struct list_head slices; + /* Total nents, for all slices of this BO */ + int total_slice_nents; + /* + * Direction of transfer. It can assume only two value DMA_TO_DEVICE and + * DMA_FROM_DEVICE. + */ + int dir; + /* The pointer of the DBC which operates on this BO */ + struct dma_bridge_chan *dbc; + /* Number of slice that belongs to this buffer */ + u32 nr_slice; + /* Number of slice that have been transferred by DMA engine */ + u32 nr_slice_xfer_done; + /* + * If true then user has attached slicing information to this BO by + * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl. + */ + bool sliced; + /* Request ID of this BO if it is queued for execution */ + u16 req_id; + /* Wait on this for completion of DMA transfer of this BO */ + struct completion xfer_done; + /* + * Node in linked list where head is dbc->xfer_list. + * This link list contain BO's that are queued for DMA transfer. + */ + struct list_head xfer_list; + /* + * Node in linked list where head is dbc->bo_lists. + * This link list contain BO's that are associated with the DBC it is + * linked to. + */ + struct list_head bo_list; + struct { + /* + * Latest timestamp(ns) at which kernel received a request to + * execute this BO + */ + u64 req_received_ts; + /* + * Latest timestamp(ns) at which kernel enqueued requests of + * this BO for execution in DMA queue + */ + u64 req_submit_ts; + /* + * Latest timestamp(ns) at which kernel received a completion + * interrupt for requests of this BO + */ + u64 req_processed_ts; + /* + * Number of elements already enqueued in DMA queue before + * enqueuing requests of this BO + */ + u32 queue_level_before; + } perf_stats; + /* Synchronizes BO operations */ + struct mutex lock; +}; + +struct bo_slice { + /* Mapped pages */ + struct sg_table *sgt; + /* Number of requests required to queue in DMA queue */ + int nents; + /* See enum dma_data_direction */ + int dir; + /* Actual requests that will be copied in DMA queue */ + struct dbc_req *reqs; + struct kref ref_count; + /* true: No DMA transfer required */ + bool no_xfer; + /* Pointer to the parent BO handle */ + struct qaic_bo *bo; + /* Node in list of slices maintained by parent BO */ + struct list_head slice; + /* Size of this slice in bytes */ + u64 size; + /* Offset of this slice in buffer */ + u64 offset; +}; + +int get_dbc_req_elem_size(void); +int get_dbc_rsp_elem_size(void); +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor); +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); + +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); + +int qaic_control_open(struct qaic_device *qdev); +void qaic_control_close(struct qaic_device *qdev); +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr); + +irqreturn_t dbc_irq_threaded_fn(int irq, void *data); +irqreturn_t dbc_irq_handler(int irq, void *data); +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); +void release_dbc(struct qaic_device *qdev, u32 dbc_id); +void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail); + +void wake_all_cntl(struct qaic_device *qdev); +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev); + +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); + +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void qaic_irq_polling_work(struct work_struct *work); +void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id); +void qaic_dbc_exit_ssr(struct qaic_device *qdev); + +/* qaic_sysfs.c */ +int qaic_sysfs_init(struct qaic_drm_device *qddev); +void qaic_sysfs_remove(struct qaic_drm_device *qddev); +void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state); + +#endif /* _QAIC_H_ */ diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c new file mode 100644 index 000000000000..428d8f65bff3 --- /dev/null +++ b/drivers/accel/qaic/qaic_control.c @@ -0,0 +1,1560 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <asm/byteorder.h> +#include <linux/completion.h> +#include <linux/crc32.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/mhi.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <linux/mutex.h> +#include <linux/overflow.h> +#include <linux/pci.h> +#include <linux/scatterlist.h> +#include <linux/sched/signal.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/workqueue.h> +#include <linux/wait.h> +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <uapi/drm/qaic_accel.h> + +#include "qaic.h" + +#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */ +#define QAIC_DBC_Q_GAP SZ_256 +#define QAIC_DBC_Q_BUF_ALIGN SZ_4K +#define QAIC_MANAGE_WIRE_MSG_LENGTH SZ_64K /* Max DMA message length */ +#define QAIC_WRAPPER_MAX_SIZE SZ_4K +#define QAIC_MHI_RETRY_WAIT_MS 100 +#define QAIC_MHI_RETRY_MAX 20 + +static unsigned int control_resp_timeout_s = 60; /* 60 sec default */ +module_param(control_resp_timeout_s, uint, 0600); +MODULE_PARM_DESC(control_resp_timeout_s, "Timeout for NNC responses from QSM"); + +struct manage_msg { + u32 len; + u32 count; + u8 data[]; +}; + +/* + * wire encoding structures for the manage protocol. + * All fields are little endian on the wire + */ +struct wire_msg_hdr { + __le32 crc32; /* crc of everything following this field in the message */ + __le32 magic_number; + __le32 sequence_number; + __le32 len; /* length of this message */ + __le32 count; /* number of transactions in this message */ + __le32 handle; /* unique id to track the resources consumed */ + __le32 partition_id; /* partition id for the request (signed) */ + __le32 padding; /* must be 0 */ +} __packed; + +struct wire_msg { + struct wire_msg_hdr hdr; + u8 data[]; +} __packed; + +struct wire_trans_hdr { + __le32 type; + __le32 len; +} __packed; + +/* Each message sent from driver to device are organized in a list of wrapper_msg */ +struct wrapper_msg { + struct list_head list; + struct kref ref_count; + u32 len; /* length of data to transfer */ + struct wrapper_list *head; + union { + struct wire_msg msg; + struct wire_trans_hdr trans; + }; +}; + +struct wrapper_list { + struct list_head list; + spinlock_t lock; /* Protects the list state during additions and removals */ +}; + +struct wire_trans_passthrough { + struct wire_trans_hdr hdr; + u8 data[]; +} __packed; + +struct wire_addr_size_pair { + __le64 addr; + __le64 size; +} __packed; + +struct wire_trans_dma_xfer { + struct wire_trans_hdr hdr; + __le32 tag; + __le32 count; + __le32 dma_chunk_id; + __le32 padding; + struct wire_addr_size_pair data[]; +} __packed; + +/* Initiated by device to continue the DMA xfer of a large piece of data */ +struct wire_trans_dma_xfer_cont { + struct wire_trans_hdr hdr; + __le32 dma_chunk_id; + __le32 padding; + __le64 xferred_size; +} __packed; + +struct wire_trans_activate_to_dev { + struct wire_trans_hdr hdr; + __le64 req_q_addr; + __le64 rsp_q_addr; + __le32 req_q_size; + __le32 rsp_q_size; + __le32 buf_len; + __le32 options; /* unused, but BIT(16) has meaning to the device */ +} __packed; + +struct wire_trans_activate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 dbc_id; + __le64 options; /* unused */ +} __packed; + +struct wire_trans_deactivate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 dbc_id; +} __packed; + +struct wire_trans_terminate_to_dev { + struct wire_trans_hdr hdr; + __le32 handle; + __le32 padding; +} __packed; + +struct wire_trans_terminate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 padding; +} __packed; + +struct wire_trans_status_to_dev { + struct wire_trans_hdr hdr; +} __packed; + +struct wire_trans_status_from_dev { + struct wire_trans_hdr hdr; + __le16 major; + __le16 minor; + __le32 status; + __le64 status_flags; +} __packed; + +struct wire_trans_validate_part_to_dev { + struct wire_trans_hdr hdr; + __le32 part_id; + __le32 padding; +} __packed; + +struct wire_trans_validate_part_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 padding; +} __packed; + +struct xfer_queue_elem { + /* + * Node in list of ongoing transfer request on control channel. + * Maintained by root device struct. + */ + struct list_head list; + /* Sequence number of this transfer request */ + u32 seq_num; + /* This is used to wait on until completion of transfer request */ + struct completion xfer_done; + /* Received data from device */ + void *buf; +}; + +struct dma_xfer { + /* Node in list of DMA transfers which is used for cleanup */ + struct list_head list; + /* SG table of memory used for DMA */ + struct sg_table *sgt; + /* Array pages used for DMA */ + struct page **page_list; + /* Number of pages used for DMA */ + unsigned long nr_pages; +}; + +struct ioctl_resources { + /* List of all DMA transfers which is used later for cleanup */ + struct list_head dma_xfers; + /* Base address of request queue which belongs to a DBC */ + void *buf; + /* + * Base bus address of request queue which belongs to a DBC. Response + * queue base bus address can be calculated by adding size of request + * queue to base bus address of request queue. + */ + dma_addr_t dma_addr; + /* Total size of request queue and response queue in byte */ + u32 total_size; + /* Total number of elements that can be queued in each of request and response queue */ + u32 nelem; + /* Base address of response queue which belongs to a DBC */ + void *rsp_q_base; + /* Status of the NNC message received */ + u32 status; + /* DBC id of the DBC received from device */ + u32 dbc_id; + /* + * DMA transfer request messages can be big in size and it may not be + * possible to send them in one shot. In such cases the messages are + * broken into chunks, this field stores ID of such chunks. + */ + u32 dma_chunk_id; + /* Total number of bytes transferred for a DMA xfer request */ + u64 xferred_dma_size; + /* Header of transaction message received from user. Used during DMA xfer request. */ + void *trans_hdr; +}; + +struct resp_work { + struct work_struct work; + struct qaic_device *qdev; + void *buf; +}; + +/* + * Since we're working with little endian messages, its useful to be able to + * increment without filling a whole line with conversions back and forth just + * to add one(1) to a message count. + */ +static __le32 incr_le32(__le32 val) +{ + return cpu_to_le32(le32_to_cpu(val) + 1); +} + +static u32 gen_crc(void *msg) +{ + struct wrapper_list *wrappers = msg; + struct wrapper_msg *w; + u32 crc = ~0; + + list_for_each_entry(w, &wrappers->list, list) + crc = crc32(crc, &w->msg, w->len); + + return crc ^ ~0; +} + +static u32 gen_crc_stub(void *msg) +{ + return 0; +} + +static bool valid_crc(void *msg) +{ + struct wire_msg_hdr *hdr = msg; + bool ret; + u32 crc; + + /* + * The output of this algorithm is always converted to the native + * endianness. + */ + crc = le32_to_cpu(hdr->crc32); + hdr->crc32 = 0; + ret = (crc32(~0, msg, le32_to_cpu(hdr->len)) ^ ~0) == crc; + hdr->crc32 = cpu_to_le32(crc); + return ret; +} + +static bool valid_crc_stub(void *msg) +{ + return true; +} + +static void free_wrapper(struct kref *ref) +{ + struct wrapper_msg *wrapper = container_of(ref, struct wrapper_msg, ref_count); + + list_del(&wrapper->list); + kfree(wrapper); +} + +static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + u32 dbc_id = resources->dbc_id; + + if (resources->buf) { + wait_event_interruptible(qdev->dbc[dbc_id].dbc_release, !qdev->dbc[dbc_id].in_use); + qdev->dbc[dbc_id].req_q_base = resources->buf; + qdev->dbc[dbc_id].rsp_q_base = resources->rsp_q_base; + qdev->dbc[dbc_id].dma_addr = resources->dma_addr; + qdev->dbc[dbc_id].total_size = resources->total_size; + qdev->dbc[dbc_id].nelem = resources->nelem; + enable_dbc(qdev, dbc_id, usr); + qdev->dbc[dbc_id].in_use = true; + resources->buf = NULL; + set_dbc_state(qdev, dbc_id, DBC_STATE_ASSIGNED); + } +} + +static void free_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources) +{ + if (resources->buf) + dma_free_coherent(&qdev->pdev->dev, resources->total_size, resources->buf, + resources->dma_addr); + resources->buf = NULL; +} + +static void free_dma_xfers(struct qaic_device *qdev, struct ioctl_resources *resources) +{ + struct dma_xfer *xfer; + struct dma_xfer *x; + int i; + + list_for_each_entry_safe(xfer, x, &resources->dma_xfers, list) { + dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0); + sg_free_table(xfer->sgt); + kfree(xfer->sgt); + for (i = 0; i < xfer->nr_pages; ++i) + put_page(xfer->page_list[i]); + kfree(xfer->page_list); + list_del(&xfer->list); + kfree(xfer); + } +} + +static struct wrapper_msg *add_wrapper(struct wrapper_list *wrappers, u32 size) +{ + struct wrapper_msg *w = kzalloc(size, GFP_KERNEL); + + if (!w) + return NULL; + list_add_tail(&w->list, &wrappers->list); + kref_init(&w->ref_count); + w->head = wrappers; + return w; +} + +static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len) +{ + struct qaic_manage_trans_passthrough *in_trans = trans; + struct wire_trans_passthrough *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 msg_hdr_len; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (in_trans->hdr.len % 8 != 0) + return -EINVAL; + + if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH) + return -ENOSPC; + + trans_wrapper = add_wrapper(wrappers, + offsetof(struct wrapper_msg, trans) + in_trans->hdr.len); + if (!trans_wrapper) + return -ENOMEM; + trans_wrapper->len = in_trans->hdr.len; + out_trans = (struct wire_trans_passthrough *)&trans_wrapper->trans; + + memcpy(out_trans->data, in_trans->data, in_trans->hdr.len - sizeof(in_trans->hdr)); + msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len); + msg->hdr.count = incr_le32(msg->hdr.count); + *user_len += in_trans->hdr.len; + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_PASSTHROUGH_TO_DEV); + out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len); + + return 0; +} + +/* returns error code for failure, 0 if enough pages alloc'd, 1 if dma_cont is needed */ +static int find_and_map_user_pages(struct qaic_device *qdev, + struct qaic_manage_trans_dma_xfer *in_trans, + struct ioctl_resources *resources, struct dma_xfer *xfer) +{ + u64 xfer_start_addr, remaining, end, total; + unsigned long need_pages; + struct page **page_list; + unsigned long nr_pages; + struct sg_table *sgt; + int ret; + int i; + + if (check_add_overflow(in_trans->addr, resources->xferred_dma_size, &xfer_start_addr)) + return -EINVAL; + + if (in_trans->size < resources->xferred_dma_size) + return -EINVAL; + remaining = in_trans->size - resources->xferred_dma_size; + if (remaining == 0) + return -EINVAL; + + if (check_add_overflow(xfer_start_addr, remaining, &end)) + return -EINVAL; + + total = remaining + offset_in_page(xfer_start_addr); + if (total >= SIZE_MAX) + return -EINVAL; + + need_pages = DIV_ROUND_UP(total, PAGE_SIZE); + + nr_pages = need_pages; + + while (1) { + page_list = kmalloc_array(nr_pages, sizeof(*page_list), GFP_KERNEL | __GFP_NOWARN); + if (!page_list) { + nr_pages = nr_pages / 2; + if (!nr_pages) + return -ENOMEM; + } else { + break; + } + } + + ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list); + if (ret < 0) + goto free_page_list; + if (ret != nr_pages) { + nr_pages = ret; + ret = -EFAULT; + goto put_pages; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto put_pages; + } + + ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages, + offset_in_page(xfer_start_addr), + remaining, GFP_KERNEL); + if (ret) { + ret = -ENOMEM; + goto free_sgt; + } + + ret = dma_map_sgtable(&qdev->pdev->dev, sgt, DMA_TO_DEVICE, 0); + if (ret) + goto free_table; + + xfer->sgt = sgt; + xfer->page_list = page_list; + xfer->nr_pages = nr_pages; + + return need_pages > nr_pages ? 1 : 0; + +free_table: + sg_free_table(sgt); +free_sgt: + kfree(sgt); +put_pages: + for (i = 0; i < nr_pages; ++i) + put_page(page_list[i]); +free_page_list: + kfree(page_list); + return ret; +} + +/* returns error code for failure, 0 if everything was encoded, 1 if dma_cont is needed */ +static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wrappers, + struct ioctl_resources *resources, u32 msg_hdr_len, u32 *size, + struct wire_trans_dma_xfer **out_trans) +{ + struct wrapper_msg *trans_wrapper; + struct sg_table *sgt = xfer->sgt; + struct wire_addr_size_pair *asp; + struct scatterlist *sg; + struct wrapper_msg *w; + unsigned int dma_len; + u64 dma_chunk_len; + void *boundary; + int nents_dma; + int nents; + int i; + + nents = sgt->nents; + nents_dma = nents; + *size = QAIC_MANAGE_WIRE_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans); + for_each_sgtable_dma_sg(sgt, sg, i) { + *size -= sizeof(*asp); + /* Save 1K for possible follow-up transactions. */ + if (*size < SZ_1K) { + nents_dma = i; + break; + } + } + + trans_wrapper = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE); + if (!trans_wrapper) + return -ENOMEM; + *out_trans = (struct wire_trans_dma_xfer *)&trans_wrapper->trans; + + asp = (*out_trans)->data; + boundary = (void *)trans_wrapper + QAIC_WRAPPER_MAX_SIZE; + *size = 0; + + dma_len = 0; + w = trans_wrapper; + dma_chunk_len = 0; + for_each_sg(sgt->sgl, sg, nents_dma, i) { + asp->size = cpu_to_le64(dma_len); + dma_chunk_len += dma_len; + if (dma_len) { + asp++; + if ((void *)asp + sizeof(*asp) > boundary) { + w->len = (void *)asp - (void *)&w->msg; + *size += w->len; + w = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE); + if (!w) + return -ENOMEM; + boundary = (void *)w + QAIC_WRAPPER_MAX_SIZE; + asp = (struct wire_addr_size_pair *)&w->msg; + } + } + asp->addr = cpu_to_le64(sg_dma_address(sg)); + dma_len = sg_dma_len(sg); + } + /* finalize the last segment */ + asp->size = cpu_to_le64(dma_len); + w->len = (void *)asp + sizeof(*asp) - (void *)&w->msg; + *size += w->len; + dma_chunk_len += dma_len; + resources->xferred_dma_size += dma_chunk_len; + + return nents_dma < nents ? 1 : 0; +} + +static void cleanup_xfer(struct qaic_device *qdev, struct dma_xfer *xfer) +{ + int i; + + dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0); + sg_free_table(xfer->sgt); + kfree(xfer->sgt); + for (i = 0; i < xfer->nr_pages; ++i) + put_page(xfer->page_list[i]); + kfree(xfer->page_list); +} + +static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len, struct ioctl_resources *resources, struct qaic_user *usr) +{ + struct qaic_manage_trans_dma_xfer *in_trans = trans; + struct wire_trans_dma_xfer *out_trans; + struct wrapper_msg *wrapper; + struct dma_xfer *xfer; + struct wire_msg *msg; + bool need_cont_dma; + u32 msg_hdr_len; + u32 size; + int ret; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + /* There should be enough space to hold at least one ASP entry. */ + if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) > + QAIC_MANAGE_WIRE_MSG_LENGTH) + return -ENOMEM; + + xfer = kmalloc(sizeof(*xfer), GFP_KERNEL); + if (!xfer) + return -ENOMEM; + + ret = find_and_map_user_pages(qdev, in_trans, resources, xfer); + if (ret < 0) + goto free_xfer; + + need_cont_dma = (bool)ret; + + ret = encode_addr_size_pairs(xfer, wrappers, resources, msg_hdr_len, &size, &out_trans); + if (ret < 0) + goto cleanup_xfer; + + need_cont_dma = need_cont_dma || (bool)ret; + + msg->hdr.len = cpu_to_le32(msg_hdr_len + size); + msg->hdr.count = incr_le32(msg->hdr.count); + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV); + out_trans->hdr.len = cpu_to_le32(size); + out_trans->tag = cpu_to_le32(in_trans->tag); + out_trans->count = cpu_to_le32((size - sizeof(*out_trans)) / + sizeof(struct wire_addr_size_pair)); + + *user_len += in_trans->hdr.len; + + if (resources->dma_chunk_id) { + out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id); + } else if (need_cont_dma) { + while (resources->dma_chunk_id == 0) + resources->dma_chunk_id = atomic_inc_return(&usr->chunk_id); + + out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id); + } + resources->trans_hdr = trans; + + list_add(&xfer->list, &resources->dma_xfers); + return 0; + +cleanup_xfer: + cleanup_xfer(qdev, xfer); +free_xfer: + kfree(xfer); + return ret; +} + +static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len, struct ioctl_resources *resources) +{ + struct qaic_manage_trans_activate_to_dev *in_trans = trans; + struct wire_trans_activate_to_dev *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + dma_addr_t dma_addr; + u32 msg_hdr_len; + void *buf; + u32 nelem; + u32 size; + int ret; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_WIRE_MSG_LENGTH) + return -ENOSPC; + + if (!in_trans->queue_size) + return -EINVAL; + + if (in_trans->pad) + return -EINVAL; + + nelem = in_trans->queue_size; + if (check_mul_overflow((u32)(get_dbc_req_elem_size() + get_dbc_rsp_elem_size()), + nelem, + &size)) + return -EINVAL; + + if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size) + return -EINVAL; + + size = ALIGN((size + QAIC_DBC_Q_GAP), QAIC_DBC_Q_BUF_ALIGN); + + buf = dma_alloc_coherent(&qdev->pdev->dev, size, &dma_addr, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + trans_wrapper = add_wrapper(wrappers, + offsetof(struct wrapper_msg, trans) + sizeof(*out_trans)); + if (!trans_wrapper) { + ret = -ENOMEM; + goto free_dma; + } + trans_wrapper->len = sizeof(*out_trans); + out_trans = (struct wire_trans_activate_to_dev *)&trans_wrapper->trans; + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_ACTIVATE_TO_DEV); + out_trans->hdr.len = cpu_to_le32(sizeof(*out_trans)); + out_trans->buf_len = cpu_to_le32(size); + out_trans->req_q_addr = cpu_to_le64(dma_addr); + out_trans->req_q_size = cpu_to_le32(nelem); + out_trans->rsp_q_addr = cpu_to_le64(dma_addr + size - nelem * get_dbc_rsp_elem_size()); + out_trans->rsp_q_size = cpu_to_le32(nelem); + out_trans->options = cpu_to_le32(in_trans->options); + + *user_len += in_trans->hdr.len; + msg->hdr.len = cpu_to_le32(msg_hdr_len + sizeof(*out_trans)); + msg->hdr.count = incr_le32(msg->hdr.count); + + resources->buf = buf; + resources->dma_addr = dma_addr; + resources->total_size = size; + resources->nelem = nelem; + resources->rsp_q_base = buf + size - nelem * get_dbc_rsp_elem_size(); + return 0; + +free_dma: + dma_free_coherent(&qdev->pdev->dev, size, buf, dma_addr); + return ret; +} + +static int encode_deactivate(struct qaic_device *qdev, void *trans, + u32 *user_len, struct qaic_user *usr) +{ + struct qaic_manage_trans_deactivate *in_trans = trans; + + if (in_trans->dbc_id >= qdev->num_dbc || in_trans->pad) + return -EINVAL; + + *user_len += in_trans->hdr.len; + + return disable_dbc(qdev, in_trans->dbc_id, usr); +} + +static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len) +{ + struct qaic_manage_trans_status_to_dev *in_trans = trans; + struct wire_trans_status_to_dev *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 msg_hdr_len; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_WIRE_MSG_LENGTH) + return -ENOSPC; + + trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper)); + if (!trans_wrapper) + return -ENOMEM; + + trans_wrapper->len = sizeof(*out_trans); + out_trans = (struct wire_trans_status_to_dev *)&trans_wrapper->trans; + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_STATUS_TO_DEV); + out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len); + msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len); + msg->hdr.count = incr_le32(msg->hdr.count); + *user_len += in_trans->hdr.len; + + return 0; +} + +static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg, + struct wrapper_list *wrappers, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + struct qaic_manage_trans_hdr *trans_hdr; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 user_len = 0; + int ret; + int i; + + if (!user_msg->count || + user_msg->len < sizeof(*trans_hdr)) { + ret = -EINVAL; + goto out; + } + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + + msg->hdr.len = cpu_to_le32(sizeof(msg->hdr)); + + if (resources->dma_chunk_id) { + ret = encode_dma(qdev, resources->trans_hdr, wrappers, &user_len, resources, usr); + msg->hdr.count = cpu_to_le32(1); + goto out; + } + + for (i = 0; i < user_msg->count; ++i) { + if (user_len > user_msg->len - sizeof(*trans_hdr)) { + ret = -EINVAL; + break; + } + trans_hdr = (struct qaic_manage_trans_hdr *)(user_msg->data + user_len); + if (trans_hdr->len < sizeof(trans_hdr) || + size_add(user_len, trans_hdr->len) > user_msg->len) { + ret = -EINVAL; + break; + } + + switch (trans_hdr->type) { + case QAIC_TRANS_PASSTHROUGH_FROM_USR: + ret = encode_passthrough(qdev, trans_hdr, wrappers, &user_len); + break; + case QAIC_TRANS_DMA_XFER_FROM_USR: + ret = encode_dma(qdev, trans_hdr, wrappers, &user_len, resources, usr); + break; + case QAIC_TRANS_ACTIVATE_FROM_USR: + ret = encode_activate(qdev, trans_hdr, wrappers, &user_len, resources); + break; + case QAIC_TRANS_DEACTIVATE_FROM_USR: + ret = encode_deactivate(qdev, trans_hdr, &user_len, usr); + break; + case QAIC_TRANS_STATUS_FROM_USR: + ret = encode_status(qdev, trans_hdr, wrappers, &user_len); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto out; + } + + if (user_len != user_msg->len) + ret = -EINVAL; +out: + if (ret) { + free_dma_xfers(qdev, resources); + free_dbc_buf(qdev, resources); + return ret; + } + + return 0; +} + +static int decode_passthrough(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *msg_len) +{ + struct qaic_manage_trans_passthrough *out_trans; + struct wire_trans_passthrough *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (len % 8 != 0) + return -EINVAL; + + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + memcpy(out_trans->data, in_trans->data, len - sizeof(in_trans->hdr)); + user_msg->len += len; + *msg_len += len; + out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type); + out_trans->hdr.len = len; + + return 0; +} + +static int decode_activate(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *msg_len, struct ioctl_resources *resources, struct qaic_user *usr) +{ + struct qaic_manage_trans_activate_from_dev *out_trans; + struct wire_trans_activate_from_dev *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + user_msg->len += len; + *msg_len += len; + out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type); + out_trans->hdr.len = len; + out_trans->status = le32_to_cpu(in_trans->status); + out_trans->dbc_id = le32_to_cpu(in_trans->dbc_id); + out_trans->options = le64_to_cpu(in_trans->options); + + if (!resources->buf) + /* how did we get an activate response without a request? */ + return -EINVAL; + + if (out_trans->dbc_id >= qdev->num_dbc) + /* + * The device assigned an invalid resource, which should never + * happen. Return an error so the user can try to recover. + */ + return -ENODEV; + + if (out_trans->status) + /* + * Allocating resources failed on device side. This is not an + * expected behaviour, user is expected to handle this situation. + */ + return -ECANCELED; + + resources->status = out_trans->status; + resources->dbc_id = out_trans->dbc_id; + save_dbc_buf(qdev, resources, usr); + + return 0; +} + +static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len, + struct qaic_user *usr) +{ + struct wire_trans_deactivate_from_dev *in_trans = trans; + u32 dbc_id = le32_to_cpu(in_trans->dbc_id); + u32 status = le32_to_cpu(in_trans->status); + + if (dbc_id >= qdev->num_dbc) + /* + * The device assigned an invalid resource, which should never + * happen. Inject an error so the user can try to recover. + */ + return -ENODEV; + + if (status) { + /* + * Releasing resources failed on the device side, which puts + * us in a bind since they may still be in use, so enable the + * dbc. User is expected to retry deactivation. + */ + enable_dbc(qdev, dbc_id, usr); + return -ECANCELED; + } + + release_dbc(qdev, dbc_id); + set_dbc_state(qdev, dbc_id, DBC_STATE_IDLE); + *msg_len += sizeof(*in_trans); + + return 0; +} + +static int decode_status(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *user_len, struct wire_msg *msg) +{ + struct qaic_manage_trans_status_from_dev *out_trans; + struct wire_trans_status_from_dev *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + out_trans->hdr.type = QAIC_TRANS_STATUS_FROM_DEV; + out_trans->hdr.len = len; + out_trans->major = le16_to_cpu(in_trans->major); + out_trans->minor = le16_to_cpu(in_trans->minor); + out_trans->status_flags = le64_to_cpu(in_trans->status_flags); + out_trans->status = le32_to_cpu(in_trans->status); + *user_len += le32_to_cpu(in_trans->hdr.len); + user_msg->len += len; + + if (out_trans->status) + return -ECANCELED; + if (out_trans->status_flags & BIT(0) && !valid_crc(msg)) + return -EPIPE; + + return 0; +} + +static int decode_message(struct qaic_device *qdev, struct manage_msg *user_msg, + struct wire_msg *msg, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + u32 msg_hdr_len = le32_to_cpu(msg->hdr.len); + struct wire_trans_hdr *trans_hdr; + u32 msg_len = 0; + int ret; + int i; + + if (msg_hdr_len < sizeof(*trans_hdr) || + msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -EINVAL; + + user_msg->len = 0; + user_msg->count = le32_to_cpu(msg->hdr.count); + + for (i = 0; i < user_msg->count; ++i) { + u32 hdr_len; + + if (msg_len > msg_hdr_len - sizeof(*trans_hdr)) + return -EINVAL; + + trans_hdr = (struct wire_trans_hdr *)(msg->data + msg_len); + hdr_len = le32_to_cpu(trans_hdr->len); + if (hdr_len < sizeof(*trans_hdr) || + size_add(msg_len, hdr_len) > msg_hdr_len) + return -EINVAL; + + switch (le32_to_cpu(trans_hdr->type)) { + case QAIC_TRANS_PASSTHROUGH_FROM_DEV: + ret = decode_passthrough(qdev, trans_hdr, user_msg, &msg_len); + break; + case QAIC_TRANS_ACTIVATE_FROM_DEV: + ret = decode_activate(qdev, trans_hdr, user_msg, &msg_len, resources, usr); + break; + case QAIC_TRANS_DEACTIVATE_FROM_DEV: + ret = decode_deactivate(qdev, trans_hdr, &msg_len, usr); + break; + case QAIC_TRANS_STATUS_FROM_DEV: + ret = decode_status(qdev, trans_hdr, user_msg, &msg_len, msg); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + } + + if (msg_len != (msg_hdr_len - sizeof(msg->hdr))) + return -EINVAL; + + return 0; +} + +static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 seq_num, + bool ignore_signal) +{ + struct xfer_queue_elem elem; + struct wire_msg *out_buf; + struct wrapper_msg *w; + long ret = -EAGAIN; + int xfer_count = 0; + int retry_count; + + /* Allow QAIC_BOOT state since we need to check control protocol version */ + if (qdev->dev_state == QAIC_OFFLINE) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(-ENODEV); + } + + /* Attempt to avoid a partial commit of a message */ + list_for_each_entry(w, &wrappers->list, list) + xfer_count++; + + for (retry_count = 0; retry_count < QAIC_MHI_RETRY_MAX; retry_count++) { + if (xfer_count <= mhi_get_free_desc_count(qdev->cntl_ch, DMA_TO_DEVICE)) { + ret = 0; + break; + } + msleep_interruptible(QAIC_MHI_RETRY_WAIT_MS); + if (signal_pending(current)) + break; + } + + if (ret) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(ret); + } + + elem.seq_num = seq_num; + elem.buf = NULL; + init_completion(&elem.xfer_done); + if (likely(!qdev->cntl_lost_buf)) { + /* + * The max size of request to device is QAIC_MANAGE_WIRE_MSG_LENGTH. + * The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH. + */ + out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL); + if (!out_buf) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(-ENOMEM); + } + + ret = mhi_queue_buf(qdev->cntl_ch, DMA_FROM_DEVICE, out_buf, + QAIC_MANAGE_MAX_MSG_LENGTH, MHI_EOT); + if (ret) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(ret); + } + } else { + /* + * we lost a buffer because we queued a recv buf, but then + * queuing the corresponding tx buf failed. To try to avoid + * a memory leak, lets reclaim it and use it for this + * transaction. + */ + qdev->cntl_lost_buf = false; + } + + list_for_each_entry(w, &wrappers->list, list) { + kref_get(&w->ref_count); + ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len, + list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN); + if (ret) { + qdev->cntl_lost_buf = true; + kref_put(&w->ref_count, free_wrapper); + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(ret); + } + } + + list_add_tail(&elem.list, &qdev->cntl_xfer_list); + mutex_unlock(&qdev->cntl_mutex); + + if (ignore_signal) + ret = wait_for_completion_timeout(&elem.xfer_done, control_resp_timeout_s * HZ); + else + ret = wait_for_completion_interruptible_timeout(&elem.xfer_done, + control_resp_timeout_s * HZ); + /* + * not using _interruptable because we have to cleanup or we'll + * likely cause memory corruption + */ + mutex_lock(&qdev->cntl_mutex); + if (!list_empty(&elem.list)) + list_del(&elem.list); + if (!ret && !elem.buf) + ret = -ETIMEDOUT; + else if (ret > 0 && !elem.buf) + ret = -EIO; + mutex_unlock(&qdev->cntl_mutex); + + if (ret < 0) { + kfree(elem.buf); + return ERR_PTR(ret); + } else if (!qdev->valid_crc(elem.buf)) { + kfree(elem.buf); + return ERR_PTR(-EPIPE); + } + + return elem.buf; +} + +/* Add a transaction to abort the outstanding DMA continuation */ +static int abort_dma_cont(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 dma_chunk_id) +{ + struct wire_trans_dma_xfer *out_trans; + u32 size = sizeof(*out_trans); + struct wrapper_msg *wrapper; + struct wrapper_msg *w; + struct wire_msg *msg; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + + /* Remove all but the first wrapper which has the msg header */ + list_for_each_entry_safe(wrapper, w, &wrappers->list, list) + if (!list_is_first(&wrapper->list, &wrappers->list)) + kref_put(&wrapper->ref_count, free_wrapper); + + wrapper = add_wrapper(wrappers, sizeof(*wrapper)); + + if (!wrapper) + return -ENOMEM; + + out_trans = (struct wire_trans_dma_xfer *)&wrapper->trans; + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV); + out_trans->hdr.len = cpu_to_le32(size); + out_trans->tag = cpu_to_le32(0); + out_trans->count = cpu_to_le32(0); + out_trans->dma_chunk_id = cpu_to_le32(dma_chunk_id); + + msg->hdr.len = cpu_to_le32(size + sizeof(*msg)); + msg->hdr.count = cpu_to_le32(1); + wrapper->len = size; + + return 0; +} + +static struct wrapper_list *alloc_wrapper_list(void) +{ + struct wrapper_list *wrappers; + + wrappers = kmalloc(sizeof(*wrappers), GFP_KERNEL); + if (!wrappers) + return NULL; + INIT_LIST_HEAD(&wrappers->list); + spin_lock_init(&wrappers->lock); + + return wrappers; +} + +static int qaic_manage_msg_xfer(struct qaic_device *qdev, struct qaic_user *usr, + struct manage_msg *user_msg, struct ioctl_resources *resources, + struct wire_msg **rsp) +{ + struct wrapper_list *wrappers; + struct wrapper_msg *wrapper; + struct wrapper_msg *w; + bool all_done = false; + struct wire_msg *msg; + int ret; + + wrappers = alloc_wrapper_list(); + if (!wrappers) + return -ENOMEM; + + wrapper = add_wrapper(wrappers, sizeof(*wrapper)); + if (!wrapper) { + kfree(wrappers); + return -ENOMEM; + } + + msg = &wrapper->msg; + wrapper->len = sizeof(*msg); + + ret = encode_message(qdev, user_msg, wrappers, resources, usr); + if (ret && resources->dma_chunk_id) + ret = abort_dma_cont(qdev, wrappers, resources->dma_chunk_id); + if (ret) + goto encode_failed; + + ret = mutex_lock_interruptible(&qdev->cntl_mutex); + if (ret) + goto lock_failed; + + msg->hdr.magic_number = MANAGE_MAGIC_NUMBER; + msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++); + + if (usr) { + msg->hdr.handle = cpu_to_le32(usr->handle); + msg->hdr.partition_id = cpu_to_le32(usr->qddev->partition_id); + } else { + msg->hdr.handle = 0; + msg->hdr.partition_id = cpu_to_le32(QAIC_NO_PARTITION); + } + + msg->hdr.padding = cpu_to_le32(0); + msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers)); + + /* msg_xfer releases the mutex */ + *rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, false); + if (IS_ERR(*rsp)) + ret = PTR_ERR(*rsp); + +lock_failed: + free_dma_xfers(qdev, resources); +encode_failed: + spin_lock(&wrappers->lock); + list_for_each_entry_safe(wrapper, w, &wrappers->list, list) + kref_put(&wrapper->ref_count, free_wrapper); + all_done = list_empty(&wrappers->list); + spin_unlock(&wrappers->lock); + if (all_done) + kfree(wrappers); + + return ret; +} + +static int qaic_manage(struct qaic_device *qdev, struct qaic_user *usr, struct manage_msg *user_msg) +{ + struct wire_trans_dma_xfer_cont *dma_cont = NULL; + struct ioctl_resources resources; + struct wire_msg *rsp = NULL; + int ret; + + memset(&resources, 0, sizeof(struct ioctl_resources)); + + INIT_LIST_HEAD(&resources.dma_xfers); + + if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH || + user_msg->count > QAIC_MANAGE_MAX_MSG_LENGTH / sizeof(struct qaic_manage_trans_hdr)) + return -EINVAL; + +dma_xfer_continue: + ret = qaic_manage_msg_xfer(qdev, usr, user_msg, &resources, &rsp); + if (ret) + return ret; + /* dma_cont should be the only transaction if present */ + if (le32_to_cpu(rsp->hdr.count) == 1) { + dma_cont = (struct wire_trans_dma_xfer_cont *)rsp->data; + if (le32_to_cpu(dma_cont->hdr.type) != QAIC_TRANS_DMA_XFER_CONT) + dma_cont = NULL; + } + if (dma_cont) { + if (le32_to_cpu(dma_cont->dma_chunk_id) == resources.dma_chunk_id && + le64_to_cpu(dma_cont->xferred_size) == resources.xferred_dma_size) { + kfree(rsp); + goto dma_xfer_continue; + } + + ret = -EINVAL; + goto dma_cont_failed; + } + + ret = decode_message(qdev, user_msg, rsp, &resources, usr); + +dma_cont_failed: + free_dbc_buf(qdev, &resources); + kfree(rsp); + return ret; +} + +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_manage_msg *user_msg = data; + struct qaic_device *qdev; + struct manage_msg *msg; + struct qaic_user *usr; + u8 __user *user_data; + int qdev_rcu_id; + int usr_rcu_id; + int ret; + + if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -EINVAL; + + usr = file_priv->driver_priv; + + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return -ENODEV; + } + + qdev = usr->qddev->qdev; + + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return -ENODEV; + } + + msg = kzalloc(QAIC_MANAGE_MAX_MSG_LENGTH + sizeof(*msg), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg->len = user_msg->len; + msg->count = user_msg->count; + + user_data = u64_to_user_ptr(user_msg->data); + + if (copy_from_user(msg->data, user_data, user_msg->len)) { + ret = -EFAULT; + goto free_msg; + } + + ret = qaic_manage(qdev, usr, msg); + + /* + * If the qaic_manage() is successful then we copy the message onto + * userspace memory but we have an exception for -ECANCELED. + * For -ECANCELED, it means that device has NACKed the message with a + * status error code which userspace would like to know. + */ + if (ret == -ECANCELED || !ret) { + if (copy_to_user(user_data, msg->data, msg->len)) { + ret = -EFAULT; + } else { + user_msg->len = msg->len; + user_msg->count = msg->count; + } + } + +free_msg: + kfree(msg); +out: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor) +{ + struct qaic_manage_trans_status_from_dev *status_result; + struct qaic_manage_trans_status_to_dev *status_query; + struct manage_msg *user_msg; + int ret; + + user_msg = kmalloc(sizeof(*user_msg) + sizeof(*status_result), GFP_KERNEL); + if (!user_msg) { + ret = -ENOMEM; + goto out; + } + user_msg->len = sizeof(*status_query); + user_msg->count = 1; + + status_query = (struct qaic_manage_trans_status_to_dev *)user_msg->data; + status_query->hdr.type = QAIC_TRANS_STATUS_FROM_USR; + status_query->hdr.len = sizeof(status_query->hdr); + + ret = qaic_manage(qdev, usr, user_msg); + if (ret) + goto kfree_user_msg; + status_result = (struct qaic_manage_trans_status_from_dev *)user_msg->data; + *major = status_result->major; + *minor = status_result->minor; + + if (status_result->status_flags & BIT(0)) { /* device is using CRC */ + /* By default qdev->gen_crc is programmed to generate CRC */ + qdev->valid_crc = valid_crc; + } else { + /* By default qdev->valid_crc is programmed to bypass CRC */ + qdev->gen_crc = gen_crc_stub; + } + +kfree_user_msg: + kfree(user_msg); +out: + return ret; +} + +static void resp_worker(struct work_struct *work) +{ + struct resp_work *resp = container_of(work, struct resp_work, work); + struct qaic_device *qdev = resp->qdev; + struct wire_msg *msg = resp->buf; + struct xfer_queue_elem *elem; + struct xfer_queue_elem *i; + bool found = false; + + mutex_lock(&qdev->cntl_mutex); + list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) { + if (elem->seq_num == le32_to_cpu(msg->hdr.sequence_number)) { + found = true; + list_del_init(&elem->list); + elem->buf = msg; + complete_all(&elem->xfer_done); + break; + } + } + mutex_unlock(&qdev->cntl_mutex); + + if (!found) + /* request must have timed out, drop packet */ + kfree(msg); + + kfree(resp); +} + +static void free_wrapper_from_list(struct wrapper_list *wrappers, struct wrapper_msg *wrapper) +{ + bool all_done = false; + + spin_lock(&wrappers->lock); + kref_put(&wrapper->ref_count, free_wrapper); + all_done = list_empty(&wrappers->list); + spin_unlock(&wrappers->lock); + + if (all_done) + kfree(wrappers); +} + +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct wire_msg *msg = mhi_result->buf_addr; + struct wrapper_msg *wrapper = container_of(msg, struct wrapper_msg, msg); + + free_wrapper_from_list(wrapper->head, wrapper); +} + +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct wire_msg *msg = mhi_result->buf_addr; + struct resp_work *resp; + + if (mhi_result->transaction_status || msg->hdr.magic_number != MANAGE_MAGIC_NUMBER) { + kfree(msg); + return; + } + + resp = kmalloc(sizeof(*resp), GFP_ATOMIC); + if (!resp) { + kfree(msg); + return; + } + + INIT_WORK(&resp->work, resp_worker); + resp->qdev = qdev; + resp->buf = msg; + queue_work(qdev->cntl_wq, &resp->work); +} + +int qaic_control_open(struct qaic_device *qdev) +{ + if (!qdev->cntl_ch) + return -ENODEV; + + qdev->cntl_lost_buf = false; + /* + * By default qaic should assume that device has CRC enabled. + * Qaic comes to know if device has CRC enabled or disabled during the + * device status transaction, which is the first transaction performed + * on control channel. + * + * So CRC validation of first device status transaction response is + * ignored (by calling valid_crc_stub) and is done later during decoding + * if device has CRC enabled. + * Now that qaic knows whether device has CRC enabled or not it acts + * accordingly. + */ + qdev->gen_crc = gen_crc; + qdev->valid_crc = valid_crc_stub; + + return mhi_prepare_for_transfer(qdev->cntl_ch); +} + +void qaic_control_close(struct qaic_device *qdev) +{ + mhi_unprepare_from_transfer(qdev->cntl_ch); +} + +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr) +{ + struct wire_trans_terminate_to_dev *trans; + struct wrapper_list *wrappers; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + struct wire_msg *rsp; + + wrappers = alloc_wrapper_list(); + if (!wrappers) + return; + + wrapper = add_wrapper(wrappers, sizeof(*wrapper) + sizeof(*msg) + sizeof(*trans)); + if (!wrapper) + return; + + msg = &wrapper->msg; + + trans = (struct wire_trans_terminate_to_dev *)msg->data; + + trans->hdr.type = cpu_to_le32(QAIC_TRANS_TERMINATE_TO_DEV); + trans->hdr.len = cpu_to_le32(sizeof(*trans)); + trans->handle = cpu_to_le32(usr->handle); + + mutex_lock(&qdev->cntl_mutex); + wrapper->len = sizeof(msg->hdr) + sizeof(*trans); + msg->hdr.magic_number = MANAGE_MAGIC_NUMBER; + msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++); + msg->hdr.len = cpu_to_le32(wrapper->len); + msg->hdr.count = cpu_to_le32(1); + msg->hdr.handle = cpu_to_le32(usr->handle); + msg->hdr.padding = cpu_to_le32(0); + msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers)); + + /* + * msg_xfer releases the mutex + * We don't care about the return of msg_xfer since we will not do + * anything different based on what happens. + * We ignore pending signals since one will be set if the user is + * killed, and we need give the device a chance to cleanup, otherwise + * DMA may still be in progress when we return. + */ + rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, true); + if (!IS_ERR(rsp)) + kfree(rsp); + free_wrapper_from_list(wrappers, wrapper); +} + +void wake_all_cntl(struct qaic_device *qdev) +{ + struct xfer_queue_elem *elem; + struct xfer_queue_elem *i; + + mutex_lock(&qdev->cntl_mutex); + list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) { + list_del_init(&elem->list); + complete_all(&elem->xfer_done); + } + mutex_unlock(&qdev->cntl_mutex); +} diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c new file mode 100644 index 000000000000..60cb4d65d48e --- /dev/null +++ b/drivers/accel/qaic/qaic_data.c @@ -0,0 +1,2072 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/math64.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <linux/scatterlist.h> +#include <linux/spinlock.h> +#include <linux/srcu.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/wait.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_prime.h> +#include <drm/drm_print.h> +#include <uapi/drm/qaic_accel.h> + +#include "qaic.h" + +#define SEM_VAL_MASK GENMASK_ULL(11, 0) +#define SEM_INDEX_MASK GENMASK_ULL(4, 0) +#define BULK_XFER BIT(3) +#define GEN_COMPLETION BIT(4) +#define INBOUND_XFER 1 +#define OUTBOUND_XFER 2 +#define REQHP_OFF 0x0 /* we read this */ +#define REQTP_OFF 0x4 /* we write this */ +#define RSPHP_OFF 0x8 /* we write this */ +#define RSPTP_OFF 0xc /* we read this */ + +#define ENCODE_SEM(val, index, sync, cmd, flags) \ + ({ \ + FIELD_PREP(GENMASK(11, 0), (val)) | \ + FIELD_PREP(GENMASK(20, 16), (index)) | \ + FIELD_PREP(BIT(22), (sync)) | \ + FIELD_PREP(GENMASK(26, 24), (cmd)) | \ + FIELD_PREP(GENMASK(30, 29), (flags)) | \ + FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \ + }) +#define NUM_EVENTS 128 +#define NUM_DELAYS 10 +#define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size()) + +static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */ +module_param(wait_exec_default_timeout_ms, uint, 0600); +MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO"); + +static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */ +module_param(datapath_poll_interval_us, uint, 0600); +MODULE_PARM_DESC(datapath_poll_interval_us, + "Amount of time to sleep between activity when datapath polling is enabled"); + +struct dbc_req { + /* + * A request ID is assigned to each memory handle going in DMA queue. + * As a single memory handle can enqueue multiple elements in DMA queue + * all of them will have the same request ID. + */ + __le16 req_id; + /* Future use */ + __u8 seq_id; + /* + * Special encoded variable + * 7 0 - Do not force to generate MSI after DMA is completed + * 1 - Force to generate MSI after DMA is completed + * 6:5 Reserved + * 4 1 - Generate completion element in the response queue + * 0 - No Completion Code + * 3 0 - DMA request is a Link list transfer + * 1 - DMA request is a Bulk transfer + * 2 Reserved + * 1:0 00 - No DMA transfer involved + * 01 - DMA transfer is part of inbound transfer + * 10 - DMA transfer has outbound transfer + * 11 - NA + */ + __u8 cmd; + __le32 resv; + /* Source address for the transfer */ + __le64 src_addr; + /* Destination address for the transfer */ + __le64 dest_addr; + /* Length of transfer request */ + __le32 len; + __le32 resv2; + /* Doorbell address */ + __le64 db_addr; + /* + * Special encoded variable + * 7 1 - Doorbell(db) write + * 0 - No doorbell write + * 6:2 Reserved + * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary + * 01 - 16 bit access, db address must be aligned to 16bit-boundary + * 10 - 8 bit access, db address must be aligned to 8bit-boundary + * 11 - Reserved + */ + __u8 db_len; + __u8 resv3; + __le16 resv4; + /* 32 bit data written to doorbell address */ + __le32 db_data; + /* + * Special encoded variable + * All the fields of sem_cmdX are passed from user and all are ORed + * together to form sem_cmd. + * 0:11 Semaphore value + * 15:12 Reserved + * 20:16 Semaphore index + * 21 Reserved + * 22 Semaphore Sync + * 23 Reserved + * 26:24 Semaphore command + * 28:27 Reserved + * 29 Semaphore DMA out bound sync fence + * 30 Semaphore DMA in bound sync fence + * 31 Enable semaphore command + */ + __le32 sem_cmd0; + __le32 sem_cmd1; + __le32 sem_cmd2; + __le32 sem_cmd3; +} __packed; + +struct dbc_rsp { + /* Request ID of the memory handle whose DMA transaction is completed */ + __le16 req_id; + /* Status of the DMA transaction. 0 : Success otherwise failure */ + __le16 status; +} __packed; + +static inline bool bo_queued(struct qaic_bo *bo) +{ + return !list_empty(&bo->xfer_list); +} + +inline int get_dbc_req_elem_size(void) +{ + return sizeof(struct dbc_req); +} + +inline int get_dbc_rsp_elem_size(void) +{ + return sizeof(struct dbc_rsp); +} + +static void free_slice(struct kref *kref) +{ + struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count); + + slice->bo->total_slice_nents -= slice->nents; + list_del(&slice->slice); + drm_gem_object_put(&slice->bo->base); + sg_free_table(slice->sgt); + kfree(slice->sgt); + kvfree(slice->reqs); + kfree(slice); +} + +static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out, + struct sg_table *sgt_in, u64 size, u64 offset) +{ + struct scatterlist *sg, *sgn, *sgf, *sgl; + unsigned int len, nents, offf, offl; + struct sg_table *sgt; + size_t total_len; + int ret, j; + + /* find out number of relevant nents needed for this mem */ + total_len = 0; + sgf = NULL; + sgl = NULL; + nents = 0; + offf = 0; + offl = 0; + + size = size ? size : PAGE_SIZE; + for_each_sgtable_dma_sg(sgt_in, sg, j) { + len = sg_dma_len(sg); + + if (!len) + continue; + if (offset >= total_len && offset < total_len + len) { + sgf = sg; + offf = offset - total_len; + } + if (sgf) + nents++; + if (offset + size >= total_len && + offset + size <= total_len + len) { + sgl = sg; + offl = offset + size - total_len; + break; + } + total_len += len; + } + + if (!sgf || !sgl) { + ret = -EINVAL; + goto out; + } + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto out; + } + + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) + goto free_sgt; + + /* copy relevant sg node and fix page and length */ + sgn = sgf; + for_each_sgtable_dma_sg(sgt, sg, j) { + memcpy(sg, sgn, sizeof(*sg)); + if (sgn == sgf) { + sg_dma_address(sg) += offf; + sg_dma_len(sg) -= offf; + sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf); + } else { + offf = 0; + } + if (sgn == sgl) { + sg_dma_len(sg) = offl - offf; + sg_set_page(sg, sg_page(sgn), offl - offf, offf); + sg_mark_end(sg); + break; + } + sgn = sg_next(sgn); + } + + *sgt_out = sgt; + return ret; + +free_sgt: + kfree(sgt); +out: + *sgt_out = NULL; + return ret; +} + +static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice, + struct qaic_attach_slice_entry *req) +{ + __le64 db_addr = cpu_to_le64(req->db_addr); + __le32 db_data = cpu_to_le32(req->db_data); + struct scatterlist *sg; + __u8 cmd = BULK_XFER; + int presync_sem; + u64 dev_addr; + __u8 db_len; + int i; + + if (!slice->no_xfer) + cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER); + + if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8)) + return -EINVAL; + + presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync; + if (presync_sem > 1) + return -EINVAL; + + presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 | + req->sem2.presync << 2 | req->sem3.presync << 3; + + switch (req->db_len) { + case 32: + db_len = BIT(7); + break; + case 16: + db_len = BIT(7) | 1; + break; + case 8: + db_len = BIT(7) | 2; + break; + case 0: + db_len = 0; /* doorbell is not active for this command */ + break; + default: + return -EINVAL; /* should never hit this */ + } + + /* + * When we end up splitting up a single request (ie a buf slice) into + * multiple DMA requests, we have to manage the sync data carefully. + * There can only be one presync sem. That needs to be on every xfer + * so that the DMA engine doesn't transfer data before the receiver is + * ready. We only do the doorbell and postsync sems after the xfer. + * To guarantee previous xfers for the request are complete, we use a + * fence. + */ + dev_addr = req->dev_addr; + for_each_sgtable_dma_sg(slice->sgt, sg, i) { + slice->reqs[i].cmd = cmd; + slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? + sg_dma_address(sg) : dev_addr); + slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? + dev_addr : sg_dma_address(sg)); + /* + * sg_dma_len(sg) returns size of a DMA segment, maximum DMA + * segment size is set to UINT_MAX by qaic and hence return + * values of sg_dma_len(sg) can never exceed u32 range. So, + * by down sizing we are not corrupting the value. + */ + slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg)); + switch (presync_sem) { + case BIT(0): + slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, + req->sem0.index, + req->sem0.presync, + req->sem0.cmd, + req->sem0.flags)); + break; + case BIT(1): + slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, + req->sem1.index, + req->sem1.presync, + req->sem1.cmd, + req->sem1.flags)); + break; + case BIT(2): + slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, + req->sem2.index, + req->sem2.presync, + req->sem2.cmd, + req->sem2.flags)); + break; + case BIT(3): + slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, + req->sem3.index, + req->sem3.presync, + req->sem3.cmd, + req->sem3.flags)); + break; + } + dev_addr += sg_dma_len(sg); + } + /* add post transfer stuff to last segment */ + i--; + slice->reqs[i].cmd |= GEN_COMPLETION; + slice->reqs[i].db_addr = db_addr; + slice->reqs[i].db_len = db_len; + slice->reqs[i].db_data = db_data; + /* + * Add a fence if we have more than one request going to the hardware + * representing the entirety of the user request, and the user request + * has no presync condition. + * Fences are expensive, so we try to avoid them. We rely on the + * hardware behavior to avoid needing one when there is a presync + * condition. When a presync exists, all requests for that same + * presync will be queued into a fifo. Thus, since we queue the + * post xfer activity only on the last request we queue, the hardware + * will ensure that the last queued request is processed last, thus + * making sure the post xfer activity happens at the right time without + * a fence. + */ + if (i && !presync_sem) + req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ? + QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE); + slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index, + req->sem0.presync, req->sem0.cmd, + req->sem0.flags)); + slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index, + req->sem1.presync, req->sem1.cmd, + req->sem1.flags)); + slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index, + req->sem2.presync, req->sem2.cmd, + req->sem2.flags)); + slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index, + req->sem3.presync, req->sem3.cmd, + req->sem3.flags)); + + return 0; +} + +static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_entry *slice_ent) +{ + struct sg_table *sgt = NULL; + struct bo_slice *slice; + int ret; + + ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset); + if (ret) + goto out; + + slice = kmalloc(sizeof(*slice), GFP_KERNEL); + if (!slice) { + ret = -ENOMEM; + goto free_sgt; + } + + slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL); + if (!slice->reqs) { + ret = -ENOMEM; + goto free_slice; + } + + slice->no_xfer = !slice_ent->size; + slice->sgt = sgt; + slice->nents = sgt->nents; + slice->dir = bo->dir; + slice->bo = bo; + slice->size = slice_ent->size; + slice->offset = slice_ent->offset; + + ret = encode_reqs(qdev, slice, slice_ent); + if (ret) + goto free_req; + + bo->total_slice_nents += sgt->nents; + kref_init(&slice->ref_count); + drm_gem_object_get(&bo->base); + list_add_tail(&slice->slice, &bo->slices); + + return 0; + +free_req: + kvfree(slice->reqs); +free_slice: + kfree(slice); +free_sgt: + sg_free_table(sgt); + kfree(sgt); +out: + return ret; +} + +static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size) +{ + struct scatterlist *sg; + struct sg_table *sgt; + struct page **pages; + int *pages_order; + int buf_extra; + int max_order; + int nr_pages; + int ret = 0; + int i, j, k; + int order; + + if (size) { + nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); + /* + * calculate how much extra we are going to allocate, to remove + * later + */ + buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE; + max_order = min(MAX_PAGE_ORDER, get_order(size)); + } else { + /* allocate a single page for book keeping */ + nr_pages = 1; + buf_extra = 0; + max_order = 0; + } + + pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto out; + } + pages_order = (void *)pages + sizeof(*pages) * nr_pages; + + /* + * Allocate requested memory using alloc_pages. It is possible to allocate + * the requested memory in multiple chunks by calling alloc_pages + * multiple times. Use SG table to handle multiple allocated pages. + */ + i = 0; + while (nr_pages > 0) { + order = min(get_order(nr_pages * PAGE_SIZE), max_order); + while (1) { + pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER | + __GFP_NOWARN | __GFP_ZERO | + (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL), + order); + if (pages[i]) + break; + if (!order--) { + ret = -ENOMEM; + goto free_partial_alloc; + } + } + + max_order = order; + pages_order[i] = order; + + nr_pages -= 1 << order; + if (nr_pages <= 0) + /* account for over allocation */ + buf_extra += abs(nr_pages) * PAGE_SIZE; + i++; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto free_partial_alloc; + } + + if (sg_alloc_table(sgt, i, GFP_KERNEL)) { + ret = -ENOMEM; + goto free_sgt; + } + + /* Populate the SG table with the allocated memory pages */ + sg = sgt->sgl; + for (k = 0; k < i; k++, sg = sg_next(sg)) { + /* Last entry requires special handling */ + if (k < i - 1) { + sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0); + } else { + sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0); + sg_mark_end(sg); + } + } + + kvfree(pages); + *sgt_out = sgt; + return ret; + +free_sgt: + kfree(sgt); +free_partial_alloc: + for (j = 0; j < i; j++) + __free_pages(pages[j], pages_order[j]); + kvfree(pages); +out: + *sgt_out = NULL; + return ret; +} + +static bool invalid_sem(struct qaic_sem *sem) +{ + if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK || + !(sem->presync == 0 || sem->presync == 1) || sem->pad || + sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) || + sem->cmd > QAIC_SEM_WAIT_GT_0) + return true; + return false; +} + +static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent, + u32 count, u64 total_size) +{ + u64 total; + int i; + + for (i = 0; i < count; i++) { + if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 || + slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) || + invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) || + invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3)) + return -EINVAL; + + if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) || + total > total_size) + return -EINVAL; + } + + return 0; +} + +static void qaic_free_sgt(struct sg_table *sgt) +{ + struct scatterlist *sg; + + if (!sgt) + return; + + for (sg = sgt->sgl; sg; sg = sg_next(sg)) + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + sg_free_table(sgt); + kfree(sgt); +} + +static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + + drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir); +} + +static const struct vm_operations_struct drm_vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + unsigned long offset = 0; + struct scatterlist *sg; + int ret = 0; + + if (drm_gem_is_imported(obj)) + return -EINVAL; + + for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { + if (sg_page(sg)) { + ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)), + sg->length, vma->vm_page_prot); + if (ret) + goto out; + offset += sg->length; + } + } + +out: + return ret; +} + +static void qaic_free_object(struct drm_gem_object *obj) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + + if (drm_gem_is_imported(obj)) { + /* DMABUF/PRIME Path */ + drm_prime_gem_destroy(obj, NULL); + } else { + /* Private buffer allocation path */ + qaic_free_sgt(bo->sgt); + } + + mutex_destroy(&bo->lock); + drm_gem_object_release(obj); + kfree(bo); +} + +static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + struct scatterlist *sg, *sg_in; + struct sg_table *sgt, *sgt_in; + int i; + + sgt_in = bo->sgt; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) { + kfree(sgt); + return ERR_PTR(-ENOMEM); + } + + sg = sgt->sgl; + for_each_sgtable_sg(sgt_in, sg_in, i) { + memcpy(sg, sg_in, sizeof(*sg)); + sg = sg_next(sg); + } + + return sgt; +} + +static const struct drm_gem_object_funcs qaic_gem_funcs = { + .free = qaic_free_object, + .get_sg_table = qaic_get_sg_table, + .print_info = qaic_gem_print_info, + .mmap = qaic_gem_object_mmap, + .vm_ops = &drm_vm_ops, +}; + +static void qaic_init_bo(struct qaic_bo *bo, bool reinit) +{ + if (reinit) { + bo->sliced = false; + reinit_completion(&bo->xfer_done); + } else { + mutex_init(&bo->lock); + init_completion(&bo->xfer_done); + } + complete_all(&bo->xfer_done); + INIT_LIST_HEAD(&bo->slices); + INIT_LIST_HEAD(&bo->xfer_list); +} + +static struct qaic_bo *qaic_alloc_init_bo(void) +{ + struct qaic_bo *bo; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + qaic_init_bo(bo, false); + + return bo; +} + +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_create_bo *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + struct qaic_bo *bo; + size_t size; + int ret; + + if (args->pad) + return -EINVAL; + + size = PAGE_ALIGN(args->size); + if (size == 0) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + bo = qaic_alloc_init_bo(); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto unlock_dev_srcu; + } + obj = &bo->base; + + drm_gem_private_object_init(dev, obj, size); + + obj->funcs = &qaic_gem_funcs; + ret = create_sgt(qdev, &bo->sgt, size); + if (ret) + goto free_bo; + + ret = drm_gem_create_mmap_offset(obj); + if (ret) + goto free_bo; + + ret = drm_gem_handle_create(file_priv, obj, &args->handle); + if (ret) + goto free_bo; + + drm_gem_object_put(obj); + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + + return 0; + +free_bo: + drm_gem_object_put(obj); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_mmap_bo *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + int ret = 0; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_dev_srcu; + } + + args->offset = drm_vma_node_offset_addr(&obj->vma_node); + + drm_gem_object_put(obj); + +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct qaic_bo *bo; + int ret; + + bo = qaic_alloc_init_bo(); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto out; + } + + obj = &bo->base; + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto attach_fail; + } + + if (!attach->dmabuf->size) { + ret = -EINVAL; + goto size_align_fail; + } + + drm_gem_private_object_init(dev, obj, attach->dmabuf->size); + /* + * skipping dma_buf_map_attachment() as we do not know the direction + * just yet. Once the direction is known in the subsequent IOCTL to + * attach slicing, we can do it then. + */ + + obj->funcs = &qaic_gem_funcs; + obj->import_attach = attach; + obj->resv = dma_buf->resv; + + return obj; + +size_align_fail: + dma_buf_detach(dma_buf, attach); +attach_fail: + dma_buf_put(dma_buf); + kfree(bo); +out: + return ERR_PTR(ret); +} + +static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr) +{ + struct drm_gem_object *obj = &bo->base; + struct sg_table *sgt; + int ret; + + sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + return ret; + } + + bo->sgt = sgt; + + return 0; +} + +static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr) +{ + int ret; + + ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0); + if (ret) + return -EFAULT; + + return 0; +} + +static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr) +{ + int ret; + + if (drm_gem_is_imported(&bo->base)) + ret = qaic_prepare_import_bo(bo, hdr); + else + ret = qaic_prepare_export_bo(qdev, bo, hdr); + bo->dir = hdr->dir; + bo->dbc = &qdev->dbc[hdr->dbc_id]; + bo->nr_slice = hdr->count; + + return ret; +} + +static void qaic_unprepare_import_bo(struct qaic_bo *bo) +{ + dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir); + bo->sgt = NULL; +} + +static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0); +} + +static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + if (drm_gem_is_imported(&bo->base)) + qaic_unprepare_import_bo(bo); + else + qaic_unprepare_export_bo(qdev, bo); + + bo->dir = 0; + bo->dbc = NULL; + bo->nr_slice = 0; +} + +static void qaic_free_slices_bo(struct qaic_bo *bo) +{ + struct bo_slice *slice, *temp; + + list_for_each_entry_safe(slice, temp, &bo->slices, slice) + kref_put(&slice->ref_count, free_slice); + if (WARN_ON_ONCE(bo->total_slice_nents != 0)) + bo->total_slice_nents = 0; + bo->nr_slice = 0; +} + +static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr, + struct qaic_attach_slice_entry *slice_ent) +{ + int ret, i; + + for (i = 0; i < hdr->count; i++) { + ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]); + if (ret) { + qaic_free_slices_bo(bo); + return ret; + } + } + + if (bo->total_slice_nents > bo->dbc->nelem) { + qaic_free_slices_bo(bo); + return -ENOSPC; + } + + return 0; +} + +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_attach_slice_entry *slice_ent; + struct qaic_attach_slice *args = data; + int rcu_id, usr_rcu_id, qdev_rcu_id; + struct dma_bridge_chan *dbc; + struct drm_gem_object *obj; + struct qaic_device *qdev; + unsigned long arg_size; + struct qaic_user *usr; + u8 __user *user_data; + struct qaic_bo *bo; + int ret; + + if (args->hdr.count == 0) + return -EINVAL; + + if (check_mul_overflow((unsigned long)args->hdr.count, + (unsigned long)sizeof(*slice_ent), + &arg_size)) + return -EINVAL; + + if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) + return -EINVAL; + + if (args->data == 0) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + user_data = u64_to_user_ptr(args->data); + + slice_ent = memdup_user(user_data, arg_size); + if (IS_ERR(slice_ent)) { + ret = PTR_ERR(slice_ent); + goto unlock_dev_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->hdr.handle); + if (!obj) { + ret = -ENOENT; + goto free_slice_ent; + } + + ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size); + if (ret) + goto put_bo; + + bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto put_bo; + + if (bo->sliced) { + ret = -EINVAL; + goto unlock_bo; + } + + dbc = &qdev->dbc[args->hdr.dbc_id]; + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (dbc->usr != usr) { + ret = -EINVAL; + goto unlock_ch_srcu; + } + + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto unlock_ch_srcu; + } + + ret = qaic_prepare_bo(qdev, bo, &args->hdr); + if (ret) + goto unlock_ch_srcu; + + ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent); + if (ret) + goto unprepare_bo; + + if (args->hdr.dir == DMA_TO_DEVICE) + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir); + + bo->sliced = true; + list_add_tail(&bo->bo_list, &bo->dbc->bo_lists); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + mutex_unlock(&bo->lock); + kfree(slice_ent); + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + + return 0; + +unprepare_bo: + qaic_unprepare_bo(qdev, bo); +unlock_ch_srcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_bo: + mutex_unlock(&bo->lock); +put_bo: + drm_gem_object_put(obj); +free_slice_ent: + kfree(slice_ent); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size) +{ + u32 avail = head - tail - 1; + + if (head <= tail) + avail += q_size; + + return avail; +} + +static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id, + u32 head, u32 *ptail) +{ + struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; + struct dbc_req *reqs = slice->reqs; + u32 tail = *ptail; + u32 avail; + + avail = fifo_space_avail(head, tail, dbc->nelem); + if (avail < slice->nents) + return -EAGAIN; + + if (tail + slice->nents > dbc->nelem) { + avail = dbc->nelem - tail; + avail = min_t(u32, avail, slice->nents); + memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail); + reqs += avail; + avail = slice->nents - avail; + if (avail) + memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail); + } else { + memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents); + } + + *ptail = (tail + slice->nents) % dbc->nelem; + + return 0; +} + +static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, + u64 resize, struct dma_bridge_chan *dbc, u32 head, + u32 *ptail) +{ + struct dbc_req *reqs = slice->reqs; + struct dbc_req *last_req; + u32 tail = *ptail; + u64 last_bytes; + u32 first_n; + u32 avail; + + avail = fifo_space_avail(head, tail, dbc->nelem); + + /* + * After this for loop is complete, first_n represents the index + * of the last DMA request of this slice that needs to be + * transferred after resizing and last_bytes represents DMA size + * of that request. + */ + last_bytes = resize; + for (first_n = 0; first_n < slice->nents; first_n++) + if (last_bytes > le32_to_cpu(reqs[first_n].len)) + last_bytes -= le32_to_cpu(reqs[first_n].len); + else + break; + + if (avail < (first_n + 1)) + return -EAGAIN; + + if (first_n) { + if (tail + first_n > dbc->nelem) { + avail = dbc->nelem - tail; + avail = min_t(u32, avail, first_n); + memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail); + last_req = reqs + avail; + avail = first_n - avail; + if (avail) + memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail); + } else { + memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n); + } + } + + /* + * Copy over the last entry. Here we need to adjust len to the left over + * size, and set src and dst to the entry it is copied to. + */ + last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem); + memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs)); + + /* + * last_bytes holds size of a DMA segment, maximum DMA segment size is + * set to UINT_MAX by qaic and hence last_bytes can never exceed u32 + * range. So, by down sizing we are not corrupting the value. + */ + last_req->len = cpu_to_le32((u32)last_bytes); + last_req->src_addr = reqs[first_n].src_addr; + last_req->dest_addr = reqs[first_n].dest_addr; + if (!last_bytes) + /* Disable DMA transfer */ + last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd; + + *ptail = (tail + first_n + 1) % dbc->nelem; + + return 0; +} + +static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv, + struct qaic_execute_entry *exec, unsigned int count, + bool is_partial, struct dma_bridge_chan *dbc, u32 head, + u32 *tail) +{ + struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; + struct drm_gem_object *obj; + struct bo_slice *slice; + unsigned long flags; + struct qaic_bo *bo; + int i, j; + int ret; + + for (i = 0; i < count; i++) { + /* + * ref count will be decremented when the transfer of this + * buffer is complete. It is inside dbc_irq_threaded_fn(). + */ + obj = drm_gem_object_lookup(file_priv, + is_partial ? pexec[i].handle : exec[i].handle); + if (!obj) { + ret = -ENOENT; + goto failed_to_send_bo; + } + + bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto failed_to_send_bo; + + if (!bo->sliced) { + ret = -EINVAL; + goto unlock_bo; + } + + if (is_partial && pexec[i].resize > bo->base.size) { + ret = -EINVAL; + goto unlock_bo; + } + + spin_lock_irqsave(&dbc->xfer_lock, flags); + if (bo_queued(bo)) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + ret = -EINVAL; + goto unlock_bo; + } + + bo->req_id = dbc->next_req_id++; + + list_for_each_entry(slice, &bo->slices, slice) { + for (j = 0; j < slice->nents; j++) + slice->reqs[j].req_id = cpu_to_le16(bo->req_id); + + if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset)) + /* Configure the slice for no DMA transfer */ + ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail); + else if (is_partial && pexec[i].resize < slice->offset + slice->size) + /* Configure the slice to be partially DMA transferred */ + ret = copy_partial_exec_reqs(qdev, slice, + pexec[i].resize - slice->offset, dbc, + head, tail); + else + ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail); + if (ret) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + goto unlock_bo; + } + } + reinit_completion(&bo->xfer_done); + list_add_tail(&bo->xfer_list, &dbc->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir); + mutex_unlock(&bo->lock); + } + + return 0; + +unlock_bo: + mutex_unlock(&bo->lock); +failed_to_send_bo: + if (likely(obj)) + drm_gem_object_put(obj); + for (j = 0; j < i; j++) { + spin_lock_irqsave(&dbc->xfer_lock, flags); + bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list); + obj = &bo->base; + list_del_init(&bo->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + drm_gem_object_put(obj); + } + return ret; +} + +static void update_profiling_data(struct drm_file *file_priv, + struct qaic_execute_entry *exec, unsigned int count, + bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level) +{ + struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; + struct drm_gem_object *obj; + struct qaic_bo *bo; + int i; + + for (i = 0; i < count; i++) { + /* + * Since we already committed the BO to hardware, the only way + * this should fail is a pending signal. We can't cancel the + * submit to hardware, so we have to just skip the profiling + * data. In case the signal is not fatal to the process, we + * return success so that the user doesn't try to resubmit. + */ + obj = drm_gem_object_lookup(file_priv, + is_partial ? pexec[i].handle : exec[i].handle); + if (!obj) + break; + bo = to_qaic_bo(obj); + bo->perf_stats.req_received_ts = received_ts; + bo->perf_stats.req_submit_ts = submit_ts; + bo->perf_stats.queue_level_before = queue_level; + queue_level += bo->total_slice_nents; + drm_gem_object_put(obj); + } +} + +static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv, + bool is_partial) +{ + struct qaic_execute *args = data; + struct qaic_execute_entry *exec; + struct dma_bridge_chan *dbc; + int usr_rcu_id, qdev_rcu_id; + struct qaic_device *qdev; + struct qaic_user *usr; + u64 received_ts; + u32 queue_level; + u64 submit_ts; + int rcu_id; + u32 head; + u32 tail; + u64 size; + int ret; + + received_ts = ktime_get_ns(); + + size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec); + if (args->hdr.count == 0) + return -EINVAL; + + exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size); + if (IS_ERR(exec)) + return PTR_ERR(exec); + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + dbc = &qdev->dbc[args->hdr.dbc_id]; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (!dbc->usr || dbc->usr->handle != usr->handle) { + ret = -EPERM; + goto release_ch_rcu; + } + + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto release_ch_rcu; + } + + ret = mutex_lock_interruptible(&dbc->req_lock); + if (ret) + goto release_ch_rcu; + + head = readl(dbc->dbc_base + REQHP_OFF); + tail = readl(dbc->dbc_base + REQTP_OFF); + + if (head == U32_MAX || tail == U32_MAX) { + /* PCI link error */ + ret = -ENODEV; + goto unlock_req_lock; + } + + queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); + + ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, + head, &tail); + if (ret) + goto unlock_req_lock; + + /* Finalize commit to hardware */ + submit_ts = ktime_get_ns(); + writel(tail, dbc->dbc_base + REQTP_OFF); + mutex_unlock(&dbc->req_lock); + + update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, + submit_ts, queue_level); + + if (datapath_polling) + schedule_work(&dbc->poll_work); + +unlock_req_lock: + if (ret) + mutex_unlock(&dbc->req_lock); +release_ch_rcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + kfree(exec); + return ret; +} + +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + return __qaic_execute_bo_ioctl(dev, data, file_priv, false); +} + +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + return __qaic_execute_bo_ioctl(dev, data, file_priv, true); +} + +/* + * Our interrupt handling is a bit more complicated than a simple ideal, but + * sadly necessary. + * + * Each dbc has a completion queue. Entries in the queue correspond to DMA + * requests which the device has processed. The hardware already has a built + * in irq mitigation. When the device puts an entry into the queue, it will + * only trigger an interrupt if the queue was empty. Therefore, when adding + * the Nth event to a non-empty queue, the hardware doesn't trigger an + * interrupt. This means the host doesn't get additional interrupts signaling + * the same thing - the queue has something to process. + * This behavior can be overridden in the DMA request. + * This means that when the host receives an interrupt, it is required to + * drain the queue. + * + * This behavior is what NAPI attempts to accomplish, although we can't use + * NAPI as we don't have a netdev. We use threaded irqs instead. + * + * However, there is a situation where the host drains the queue fast enough + * that every event causes an interrupt. Typically this is not a problem as + * the rate of events would be low. However, that is not the case with + * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of + * lprnet, the host receives roughly 80k interrupts per second from the device + * (per /proc/interrupts). While NAPI documentation indicates the host should + * just chug along, sadly that behavior causes instability in some hosts. + * + * Therefore, we implement an interrupt disable scheme similar to NAPI. The + * key difference is that we will delay after draining the queue for a small + * time to allow additional events to come in via polling. Using the above + * lprnet workload, this reduces the number of interrupts processed from + * ~80k/sec to about 64 in 5 minutes and appears to solve the system + * instability. + */ +irqreturn_t dbc_irq_handler(int irq, void *data) +{ + struct dma_bridge_chan *dbc = data; + int rcu_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + + if (datapath_polling) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + /* + * Normally datapath_polling will not have irqs enabled, but + * when running with only one MSI the interrupt is shared with + * MHI so it cannot be disabled. Return ASAP instead. + */ + return IRQ_HANDLED; + } + + if (!dbc->usr) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_HANDLED; + } + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + if (head == tail) { /* queue empty */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + if (!dbc->qdev->single_msi) + disable_irq_nosync(irq); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_WAKE_THREAD; +} + +void qaic_irq_polling_work(struct work_struct *work) +{ + struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work); + unsigned long flags; + int rcu_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + + while (1) { + if (dbc->qdev->dev_state != QAIC_ONLINE) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + if (!dbc->usr) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + spin_lock_irqsave(&dbc->xfer_lock, flags); + if (list_empty(&dbc->xfer_list)) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + if (head != tail) { + irq_wake_thread(dbc->irq, dbc); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + cond_resched(); + usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us); + } +} + +irqreturn_t dbc_irq_threaded_fn(int irq, void *data) +{ + struct dma_bridge_chan *dbc = data; + int event_count = NUM_EVENTS; + int delay_count = NUM_DELAYS; + struct qaic_device *qdev; + struct qaic_bo *bo, *i; + struct dbc_rsp *rsp; + unsigned long flags; + int rcu_id; + u16 status; + u16 req_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + qdev = dbc->qdev; + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) /* PCI link error */ + goto error_out; + +read_fifo: + + if (!event_count) { + event_count = NUM_EVENTS; + cond_resched(); + } + + /* + * if this channel isn't assigned or gets unassigned during processing + * we have nothing further to do + */ + if (!dbc->usr) + goto error_out; + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) /* PCI link error */ + goto error_out; + + if (head == tail) { /* queue empty */ + if (delay_count) { + --delay_count; + usleep_range(100, 200); + goto read_fifo; /* check for a new event */ + } + goto normal_out; + } + + delay_count = NUM_DELAYS; + while (head != tail) { + if (!event_count) + break; + --event_count; + rsp = dbc->rsp_q_base + head * sizeof(*rsp); + req_id = le16_to_cpu(rsp->req_id); + status = le16_to_cpu(rsp->status); + if (status) + pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status); + spin_lock_irqsave(&dbc->xfer_lock, flags); + /* + * A BO can receive multiple interrupts, since a BO can be + * divided into multiple slices and a buffer receives as many + * interrupts as slices. So until it receives interrupts for + * all the slices we cannot mark that buffer complete. + */ + list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) { + if (bo->req_id == req_id) + bo->nr_slice_xfer_done++; + else + continue; + + if (bo->nr_slice_xfer_done < bo->nr_slice) + break; + + /* + * At this point we have received all the interrupts for + * BO, which means BO execution is complete. + */ + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + bo->nr_slice_xfer_done = 0; + list_del_init(&bo->xfer_list); + bo->perf_stats.req_processed_ts = ktime_get_ns(); + complete_all(&bo->xfer_done); + drm_gem_object_put(&bo->base); + break; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + head = (head + 1) % dbc->nelem; + } + + /* + * Update the head pointer of response queue and let the device know + * that we have consumed elements from the queue. + */ + writel(head, dbc->dbc_base + RSPHP_OFF); + + /* elements might have been put in the queue while we were processing */ + goto read_fifo; + +normal_out: + if (!qdev->single_msi && likely(!datapath_polling)) + enable_irq(irq); + else if (unlikely(datapath_polling)) + schedule_work(&dbc->poll_work); + /* checking the fifo and enabling irqs is a race, missed event check */ + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail != U32_MAX && head != tail) { + if (!qdev->single_msi && likely(!datapath_polling)) + disable_irq_nosync(irq); + goto read_fifo; + } + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_HANDLED; + +error_out: + srcu_read_unlock(&dbc->ch_lock, rcu_id); + if (!qdev->single_msi && likely(!datapath_polling)) + enable_irq(irq); + else if (unlikely(datapath_polling)) + schedule_work(&dbc->poll_work); + + return IRQ_HANDLED; +} + +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_wait *args = data; + int usr_rcu_id, qdev_rcu_id; + struct dma_bridge_chan *dbc; + struct drm_gem_object *obj; + struct qaic_device *qdev; + unsigned long timeout; + struct qaic_user *usr; + struct qaic_bo *bo; + int rcu_id; + int ret; + + if (args->pad != 0) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + dbc = &qdev->dbc[args->dbc_id]; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (dbc->usr != usr) { + ret = -EPERM; + goto unlock_ch_srcu; + } + + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto unlock_ch_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_ch_srcu; + } + + bo = to_qaic_bo(obj); + timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms; + timeout = msecs_to_jiffies(timeout); + ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout); + if (!ret) { + ret = -ETIMEDOUT; + goto put_obj; + } + if (ret > 0) + ret = 0; + + if (!dbc->usr) + ret = -EPERM; + + if (dbc->id == qdev->ssr_dbc) + ret = -EPIPE; + +put_obj: + drm_gem_object_put(obj); +unlock_ch_srcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_perf_stats_entry *ent = NULL; + struct qaic_perf_stats *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + struct qaic_bo *bo; + int ret = 0; + int i; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent)); + if (IS_ERR(ent)) { + ret = PTR_ERR(ent); + goto unlock_dev_srcu; + } + + for (i = 0; i < args->hdr.count; i++) { + obj = drm_gem_object_lookup(file_priv, ent[i].handle); + if (!obj) { + ret = -ENOENT; + goto free_ent; + } + bo = to_qaic_bo(obj); + if (!bo->sliced) { + drm_gem_object_put(obj); + ret = -EINVAL; + goto free_ent; + } + if (bo->dbc->id != args->hdr.dbc_id) { + drm_gem_object_put(obj); + ret = -EINVAL; + goto free_ent; + } + /* + * perf stats ioctl is called before wait ioctl is complete then + * the latency information is invalid. + */ + if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) { + ent[i].device_latency_us = 0; + } else { + ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts - + bo->perf_stats.req_submit_ts), 1000); + } + ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts - + bo->perf_stats.req_received_ts), 1000); + ent[i].queue_level_before = bo->perf_stats.queue_level_before; + ent[i].num_queue_element = bo->total_slice_nents; + drm_gem_object_put(obj); + } + + if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent))) + ret = -EFAULT; + +free_ent: + kfree(ent); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + qaic_free_slices_bo(bo); + qaic_unprepare_bo(qdev, bo); + qaic_init_bo(bo, true); + list_del(&bo->bo_list); + drm_gem_object_put(&bo->base); +} + +int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_detach_slice *args = data; + int rcu_id, usr_rcu_id, qdev_rcu_id; + struct dma_bridge_chan *dbc; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + unsigned long flags; + struct qaic_bo *bo; + int ret; + + if (args->pad != 0) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_dev_srcu; + } + + bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto put_bo; + + if (!bo->sliced) { + ret = -EINVAL; + goto unlock_bo; + } + + dbc = bo->dbc; + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (dbc->usr != usr) { + ret = -EINVAL; + goto unlock_ch_srcu; + } + + /* Check if BO is committed to H/W for DMA */ + spin_lock_irqsave(&dbc->xfer_lock, flags); + if (bo_queued(bo)) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + ret = -EBUSY; + goto unlock_ch_srcu; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + + detach_slice_bo(qdev, bo); + +unlock_ch_srcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_bo: + mutex_unlock(&bo->lock); +put_bo: + drm_gem_object_put(obj); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) +{ + unsigned long flags; + struct qaic_bo *bo; + + spin_lock_irqsave(&dbc->xfer_lock, flags); + while (!list_empty(&dbc->xfer_list)) { + bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list); + list_del_init(&bo->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + bo->nr_slice_xfer_done = 0; + bo->req_id = 0; + bo->perf_stats.req_received_ts = 0; + bo->perf_stats.req_submit_ts = 0; + bo->perf_stats.req_processed_ts = 0; + bo->perf_stats.queue_level_before = 0; + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + complete_all(&bo->xfer_done); + drm_gem_object_put(&bo->base); + spin_lock_irqsave(&dbc->xfer_lock, flags); + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); +} + +static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) +{ + empty_xfer_list(qdev, dbc); + synchronize_srcu(&dbc->ch_lock); + /* + * Threads holding channel lock, may add more elements in the xfer_list. + * Flush out these elements from xfer_list. + */ + empty_xfer_list(qdev, dbc); +} + +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) +{ + if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle) + return -EPERM; + + qdev->dbc[dbc_id].usr = NULL; + synchronize_srcu(&qdev->dbc[dbc_id].ch_lock); + return 0; +} + +/** + * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of + * user. Add user context back to DBC to enable it. This function trusts the + * DBC ID passed and expects the DBC to be disabled. + * @qdev: qaic device handle + * @dbc_id: ID of the DBC + * @usr: User context + */ +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) +{ + qdev->dbc[dbc_id].usr = usr; +} + +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id) +{ + struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; + + dbc->usr = NULL; + sync_empty_xfer_list(qdev, dbc); +} + +void release_dbc(struct qaic_device *qdev, u32 dbc_id) +{ + struct qaic_bo *bo, *bo_temp; + struct dma_bridge_chan *dbc; + + dbc = &qdev->dbc[dbc_id]; + if (!dbc->in_use) + return; + + wakeup_dbc(qdev, dbc_id); + + dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr); + dbc->total_size = 0; + dbc->req_q_base = NULL; + dbc->dma_addr = 0; + dbc->nelem = 0; + dbc->usr = NULL; + + list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) { + drm_gem_object_get(&bo->base); + mutex_lock(&bo->lock); + detach_slice_bo(qdev, bo); + mutex_unlock(&bo->lock); + drm_gem_object_put(&bo->base); + } + + dbc->in_use = false; + wake_up(&dbc->dbc_release); +} + +void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail) +{ + if (!dbc || !head || !tail) + return; + + *head = readl(dbc->dbc_base + REQHP_OFF); + *tail = readl(dbc->dbc_base + REQTP_OFF); +} + +/* + * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID. + * @qdev: qaic device handle + * @dbc_id: ID of the DBC which will enter SSR + * + * The device will automatically deactivate the workload as not + * all errors can be silently recovered. The user will be + * notified and will need to decide the required recovery + * action to take. + */ +void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id) +{ + qdev->ssr_dbc = dbc_id; + release_dbc(qdev, dbc_id); +} + +/* + * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID. + * @qdev: qaic device handle + * + * The DBC returns to an operational state and begins accepting work after exiting SSR. + */ +void qaic_dbc_exit_ssr(struct qaic_device *qdev) +{ + qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL; +} diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c new file mode 100644 index 000000000000..8dc4fe5bb560 --- /dev/null +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/mhi.h> +#include <linux/mutex.h> +#include <linux/overflow.h> +#include <linux/pci.h> +#include <linux/seq_file.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "qaic.h" +#include "qaic_debugfs.h" + +#define BOOTLOG_POOL_SIZE 16 +#define BOOTLOG_MSG_SIZE 512 +#define QAIC_DBC_DIR_NAME 9 + +struct bootlog_msg { + /* Buffer for bootlog messages */ + char str[BOOTLOG_MSG_SIZE]; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Work struct to schedule work coming on QAIC_LOGGING channel */ + struct work_struct work; +}; + +struct bootlog_page { + /* Node in list of bootlog pages maintained by root device struct */ + struct list_head node; + /* Total size of the buffer that holds the bootlogs. It is PAGE_SIZE */ + unsigned int size; + /* Offset for the next bootlog */ + unsigned int offset; +}; + +static int bootlog_show(struct seq_file *s, void *unused) +{ + struct bootlog_page *page; + struct qaic_device *qdev; + void *page_end; + void *log; + + qdev = s->private; + mutex_lock(&qdev->bootlog_mutex); + list_for_each_entry(page, &qdev->bootlog, node) { + log = page + 1; + page_end = (void *)page + page->offset; + while (log < page_end) { + seq_printf(s, "%s", (char *)log); + log += strlen(log) + 1; + } + } + mutex_unlock(&qdev->bootlog_mutex); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(bootlog); + +static int fifo_size_show(struct seq_file *s, void *unused) +{ + struct dma_bridge_chan *dbc = s->private; + + seq_printf(s, "%u\n", dbc->nelem); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(fifo_size); + +static int queued_show(struct seq_file *s, void *unused) +{ + struct dma_bridge_chan *dbc = s->private; + u32 tail = 0, head = 0; + + qaic_data_get_fifo_info(dbc, &head, &tail); + + if (head == U32_MAX || tail == U32_MAX) + seq_printf(s, "%u\n", 0); + else if (head > tail) + seq_printf(s, "%u\n", dbc->nelem - head + tail); + else + seq_printf(s, "%u\n", tail - head); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(queued); + +void qaic_debugfs_init(struct qaic_drm_device *qddev) +{ + struct qaic_device *qdev = qddev->qdev; + struct dentry *debugfs_root; + struct dentry *debugfs_dir; + char name[QAIC_DBC_DIR_NAME]; + u32 i; + + debugfs_root = to_drm(qddev)->debugfs_root; + + debugfs_create_file("bootlog", 0400, debugfs_root, qdev, &bootlog_fops); + /* + * 256 dbcs per device is likely the max we will ever see and lets static checking see a + * reasonable range. + */ + for (i = 0; i < qdev->num_dbc && i < 256; ++i) { + snprintf(name, QAIC_DBC_DIR_NAME, "dbc%03u", i); + debugfs_dir = debugfs_create_dir(name, debugfs_root); + debugfs_create_file("fifo_size", 0400, debugfs_dir, &qdev->dbc[i], &fifo_size_fops); + debugfs_create_file("queued", 0400, debugfs_dir, &qdev->dbc[i], &queued_fops); + } +} + +static struct bootlog_page *alloc_bootlog_page(struct qaic_device *qdev) +{ + struct bootlog_page *page; + + page = (struct bootlog_page *)devm_get_free_pages(&qdev->pdev->dev, GFP_KERNEL, 0); + if (!page) + return page; + + page->size = PAGE_SIZE; + page->offset = sizeof(*page); + list_add_tail(&page->node, &qdev->bootlog); + + return page; +} + +static int reset_bootlog(struct qaic_device *qdev) +{ + struct bootlog_page *page; + struct bootlog_page *i; + + mutex_lock(&qdev->bootlog_mutex); + list_for_each_entry_safe(page, i, &qdev->bootlog, node) { + list_del(&page->node); + devm_free_pages(&qdev->pdev->dev, (unsigned long)page); + } + + page = alloc_bootlog_page(qdev); + mutex_unlock(&qdev->bootlog_mutex); + if (!page) + return -ENOMEM; + + return 0; +} + +static void *bootlog_get_space(struct qaic_device *qdev, unsigned int size) +{ + struct bootlog_page *page; + + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); + + if (size_add(size, sizeof(*page)) > page->size) + return NULL; + + if (page->offset + size > page->size) { + page = alloc_bootlog_page(qdev); + if (!page) + return NULL; + } + + return (void *)page + page->offset; +} + +static void bootlog_commit(struct qaic_device *qdev, unsigned int size) +{ + struct bootlog_page *page; + + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); + + page->offset += size; +} + +static void bootlog_log(struct work_struct *work) +{ + struct bootlog_msg *msg = container_of(work, struct bootlog_msg, work); + unsigned int len = strlen(msg->str) + 1; + struct qaic_device *qdev = msg->qdev; + void *log; + + mutex_lock(&qdev->bootlog_mutex); + log = bootlog_get_space(qdev, len); + if (log) { + memcpy(log, msg, len); + bootlog_commit(qdev, len); + } + mutex_unlock(&qdev->bootlog_mutex); + + if (mhi_queue_buf(qdev->bootlog_ch, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT)) + devm_kfree(&qdev->pdev->dev, msg); +} + +static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct bootlog_msg *msg; + int i, ret; + + qdev->bootlog_wq = alloc_ordered_workqueue("qaic_bootlog", 0); + if (!qdev->bootlog_wq) { + ret = -ENOMEM; + goto out; + } + + ret = reset_bootlog(qdev); + if (ret) + goto destroy_workqueue; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + goto destroy_workqueue; + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->bootlog_ch = mhi_dev; + + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { + msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto mhi_unprepare; + } + + msg->qdev = qdev; + INIT_WORK(&msg->work, bootlog_log); + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT); + if (ret) + goto mhi_unprepare; + } + + return 0; + +mhi_unprepare: + mhi_unprepare_from_transfer(mhi_dev); +destroy_workqueue: + destroy_workqueue(qdev->bootlog_wq); +out: + return ret; +} + +static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + + mhi_unprepare_from_transfer(qdev->bootlog_ch); + destroy_workqueue(qdev->bootlog_wq); + qdev->bootlog_ch = NULL; +} + +static void qaic_bootlog_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ +} + +static void qaic_bootlog_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct bootlog_msg *msg = mhi_result->buf_addr; + + if (mhi_result->transaction_status) { + devm_kfree(&qdev->pdev->dev, msg); + return; + } + + /* Force a null at the end of the transferred string */ + msg->str[mhi_result->bytes_xferd - 1] = 0; + + queue_work(qdev->bootlog_wq, &msg->work); +} + +static const struct mhi_device_id qaic_bootlog_mhi_match_table[] = { + { .chan = "QAIC_LOGGING", }, + {}, +}; + +static struct mhi_driver qaic_bootlog_mhi_driver = { + .id_table = qaic_bootlog_mhi_match_table, + .remove = qaic_bootlog_mhi_remove, + .probe = qaic_bootlog_mhi_probe, + .ul_xfer_cb = qaic_bootlog_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_bootlog_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_bootlog", + }, +}; + +int qaic_bootlog_register(void) +{ + return mhi_driver_register(&qaic_bootlog_mhi_driver); +} + +void qaic_bootlog_unregister(void) +{ + mhi_driver_unregister(&qaic_bootlog_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_debugfs.h b/drivers/accel/qaic/qaic_debugfs.h new file mode 100644 index 000000000000..05e74f84cf9f --- /dev/null +++ b/drivers/accel/qaic/qaic_debugfs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#ifndef __QAIC_DEBUGFS_H__ +#define __QAIC_DEBUGFS_H__ + +#include <drm/drm_file.h> + +#ifdef CONFIG_DEBUG_FS +int qaic_bootlog_register(void); +void qaic_bootlog_unregister(void); +void qaic_debugfs_init(struct qaic_drm_device *qddev); +#else +static inline int qaic_bootlog_register(void) { return 0; } +static inline void qaic_bootlog_unregister(void) {} +static inline void qaic_debugfs_init(struct qaic_drm_device *qddev) {} +#endif /* CONFIG_DEBUG_FS */ +#endif /* __QAIC_DEBUGFS_H__ */ diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c new file mode 100644 index 000000000000..4c70bd949d53 --- /dev/null +++ b/drivers/accel/qaic/qaic_drv.c @@ -0,0 +1,860 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/kobject.h> +#include <linux/kref.h> +#include <linux/mhi.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/wait.h> +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_managed.h> +#include <uapi/drm/qaic_accel.h> + +#include "mhi_controller.h" +#include "qaic.h" +#include "qaic_debugfs.h" +#include "qaic_ras.h" +#include "qaic_ssr.h" +#include "qaic_timesync.h" +#include "sahara.h" + +MODULE_IMPORT_NS("DMA_BUF"); + +#define PCI_DEVICE_ID_QCOM_AIC080 0xa080 +#define PCI_DEVICE_ID_QCOM_AIC100 0xa100 +#define PCI_DEVICE_ID_QCOM_AIC200 0xa110 +#define QAIC_NAME "qaic" +#define QAIC_DESC "Qualcomm Cloud AI Accelerators" +#define CNTL_MAJOR 5 +#define CNTL_MINOR 0 + +struct qaic_device_config { + /* Indicates the AIC family the device belongs to */ + int family; + /* A bitmask representing the available BARs */ + int bar_mask; + /* An index value used to identify the MHI controller BAR */ + unsigned int mhi_bar_idx; + /* An index value used to identify the DBCs BAR */ + unsigned int dbc_bar_idx; +}; + +static const struct qaic_device_config aic080_config = { + .family = FAMILY_AIC100, + .bar_mask = BIT(0) | BIT(2) | BIT(4), + .mhi_bar_idx = 0, + .dbc_bar_idx = 2, +}; + +static const struct qaic_device_config aic100_config = { + .family = FAMILY_AIC100, + .bar_mask = BIT(0) | BIT(2) | BIT(4), + .mhi_bar_idx = 0, + .dbc_bar_idx = 2, +}; + +static const struct qaic_device_config aic200_config = { + .family = FAMILY_AIC200, + .bar_mask = BIT(0) | BIT(1) | BIT(2) | BIT(4), + .mhi_bar_idx = 1, + .dbc_bar_idx = 2, +}; + +bool datapath_polling; +module_param(datapath_polling, bool, 0400); +MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); +static bool link_up; +static DEFINE_IDA(qaic_usrs); + +static void qaicm_wq_release(struct drm_device *dev, void *res) +{ + struct workqueue_struct *wq = res; + + destroy_workqueue(wq); +} + +static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *name) +{ + struct workqueue_struct *wq; + int ret; + + wq = alloc_workqueue("%s", WQ_UNBOUND, 0, name); + if (!wq) + return ERR_PTR(-ENOMEM); + ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq); + if (ret) + return ERR_PTR(ret); + + return wq; +} + +static void qaicm_srcu_release(struct drm_device *dev, void *res) +{ + struct srcu_struct *lock = res; + + cleanup_srcu_struct(lock); +} + +static int qaicm_srcu_init(struct drm_device *dev, struct srcu_struct *lock) +{ + int ret; + + ret = init_srcu_struct(lock); + if (ret) + return ret; + + return drmm_add_action_or_reset(dev, qaicm_srcu_release, lock); +} + +static void qaicm_pci_release(struct drm_device *dev, void *res) +{ + struct qaic_device *qdev = to_qaic_device(dev); + + pci_set_drvdata(qdev->pdev, NULL); +} + +static void free_usr(struct kref *kref) +{ + struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count); + + cleanup_srcu_struct(&usr->qddev_lock); + ida_free(&qaic_usrs, usr->handle); + kfree(usr); +} + +static int qaic_open(struct drm_device *dev, struct drm_file *file) +{ + struct qaic_drm_device *qddev = to_qaic_drm_device(dev); + struct qaic_device *qdev = qddev->qdev; + struct qaic_user *usr; + int rcu_id; + int ret; + + rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + ret = -ENODEV; + goto dev_unlock; + } + + usr = kmalloc(sizeof(*usr), GFP_KERNEL); + if (!usr) { + ret = -ENOMEM; + goto dev_unlock; + } + + usr->handle = ida_alloc(&qaic_usrs, GFP_KERNEL); + if (usr->handle < 0) { + ret = usr->handle; + goto free_usr; + } + usr->qddev = qddev; + atomic_set(&usr->chunk_id, 0); + init_srcu_struct(&usr->qddev_lock); + kref_init(&usr->ref_count); + + ret = mutex_lock_interruptible(&qddev->users_mutex); + if (ret) + goto cleanup_usr; + + list_add(&usr->node, &qddev->users); + mutex_unlock(&qddev->users_mutex); + + file->driver_priv = usr; + + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return 0; + +cleanup_usr: + cleanup_srcu_struct(&usr->qddev_lock); + ida_free(&qaic_usrs, usr->handle); +free_usr: + kfree(usr); +dev_unlock: + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return ret; +} + +static void qaic_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct qaic_user *usr = file->driver_priv; + struct qaic_drm_device *qddev; + struct qaic_device *qdev; + int qdev_rcu_id; + int usr_rcu_id; + int i; + + qddev = usr->qddev; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (qddev) { + qdev = qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state == QAIC_ONLINE) { + qaic_release_usr(qdev, usr); + for (i = 0; i < qdev->num_dbc; ++i) + if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle) + release_dbc(qdev, i); + } + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + + mutex_lock(&qddev->users_mutex); + if (!list_empty(&usr->node)) + list_del_init(&usr->node); + mutex_unlock(&qddev->users_mutex); + } + + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + kref_put(&usr->ref_count, free_usr); + + file->driver_priv = NULL; +} + +DEFINE_DRM_ACCEL_FOPS(qaic_accel_fops); + +static const struct drm_ioctl_desc qaic_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(QAIC_MANAGE, qaic_manage_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_CREATE_BO, qaic_create_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_MMAP_BO, qaic_mmap_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_ATTACH_SLICE_BO, qaic_attach_slice_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_EXECUTE_BO, qaic_execute_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_DETACH_SLICE_BO, qaic_detach_slice_bo_ioctl, 0), +}; + +static const struct drm_driver qaic_accel_driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + + .name = QAIC_NAME, + .desc = QAIC_DESC, + + .fops = &qaic_accel_fops, + .open = qaic_open, + .postclose = qaic_postclose, + + .ioctls = qaic_drm_ioctls, + .num_ioctls = ARRAY_SIZE(qaic_drm_ioctls), + .gem_prime_import = qaic_gem_prime_import, +}; + +static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) +{ + struct qaic_drm_device *qddev = qdev->qddev; + struct drm_device *drm = to_drm(qddev); + int ret; + + /* Hold off implementing partitions until the uapi is determined */ + if (partition_id != QAIC_NO_PARTITION) + return -EINVAL; + + qddev->partition_id = partition_id; + + ret = drm_dev_register(drm, 0); + if (ret) { + pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); + return ret; + } + + ret = qaic_sysfs_init(qddev); + if (ret) { + drm_dev_unregister(drm); + pci_dbg(qdev->pdev, "qaic_sysfs_init failed %d\n", ret); + return ret; + } + + qaic_debugfs_init(qddev); + + return ret; +} + +static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) +{ + struct qaic_drm_device *qddev = qdev->qddev; + struct drm_device *drm = to_drm(qddev); + struct qaic_user *usr; + + qaic_sysfs_remove(qddev); + drm_dev_unregister(drm); + qddev->partition_id = 0; + /* + * Existing users get unresolvable errors till they close FDs. + * Need to sync carefully with users calling close(). The + * list of users can be modified elsewhere when the lock isn't + * held here, but the sync'ing the srcu with the mutex held + * could deadlock. Grab the mutex so that the list will be + * unmodified. The user we get will exist as long as the + * lock is held. Signal that the qcdev is going away, and + * grab a reference to the user so they don't go away for + * synchronize_srcu(). Then release the mutex to avoid + * deadlock and make sure the user has observed the signal. + * With the lock released, we cannot maintain any state of the + * user list. + */ + mutex_lock(&qddev->users_mutex); + while (!list_empty(&qddev->users)) { + usr = list_first_entry(&qddev->users, struct qaic_user, node); + list_del_init(&usr->node); + kref_get(&usr->ref_count); + usr->qddev = NULL; + mutex_unlock(&qddev->users_mutex); + synchronize_srcu(&usr->qddev_lock); + kref_put(&usr->ref_count, free_usr); + mutex_lock(&qddev->users_mutex); + } + mutex_unlock(&qddev->users_mutex); +} + +static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + u16 major = -1, minor = -1; + struct qaic_device *qdev; + int ret; + + /* + * Invoking this function indicates that the control channel to the + * device is available. We use that as a signal to indicate that + * the device side firmware has booted. The device side firmware + * manages the device resources, so we need to communicate with it + * via the control channel in order to utilize the device. Therefore + * we wait until this signal to create the drm dev that userspace will + * use to control the device, because without the device side firmware, + * userspace can't do anything useful. + */ + + qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->cntl_ch = mhi_dev; + + ret = qaic_control_open(qdev); + if (ret) { + pci_dbg(qdev->pdev, "%s: control_open failed %d\n", __func__, ret); + return ret; + } + + qdev->dev_state = QAIC_BOOT; + ret = get_cntl_version(qdev, NULL, &major, &minor); + if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) { + pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n", + __func__, major, minor, CNTL_MAJOR, CNTL_MINOR, ret); + ret = -EINVAL; + goto close_control; + } + qdev->dev_state = QAIC_ONLINE; + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE); + + return ret; + +close_control: + qaic_control_close(qdev); + return ret; +} + +static void qaic_mhi_remove(struct mhi_device *mhi_dev) +{ +/* This is redundant since we have already observed the device crash */ +} + +static void qaic_notify_reset(struct qaic_device *qdev) +{ + int i; + + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE); + qdev->dev_state = QAIC_OFFLINE; + /* wake up any waiters to avoid waiting for timeouts at sync */ + wake_all_cntl(qdev); + for (i = 0; i < qdev->num_dbc; ++i) + wakeup_dbc(qdev, i); + synchronize_srcu(&qdev->dev_lock); +} + +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) +{ + int i; + + qaic_notify_reset(qdev); + + /* start tearing things down */ + qaic_clean_up_ssr(qdev); + for (i = 0; i < qdev->num_dbc; ++i) + release_dbc(qdev, i); +} + +static struct qaic_device *create_qdev(struct pci_dev *pdev, + const struct qaic_device_config *config) +{ + struct device *dev = &pdev->dev; + struct qaic_drm_device *qddev; + struct qaic_device *qdev; + struct drm_device *drm; + int i, ret; + + qdev = devm_kzalloc(dev, sizeof(*qdev), GFP_KERNEL); + if (!qdev) + return NULL; + + qdev->dev_state = QAIC_OFFLINE; + qdev->num_dbc = 16; + qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); + if (!qdev->dbc) + return NULL; + + qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm); + if (IS_ERR(qddev)) + return NULL; + + drm = to_drm(qddev); + pci_set_drvdata(pdev, qdev); + + ret = drmm_mutex_init(drm, &qddev->users_mutex); + if (ret) + return NULL; + ret = drmm_add_action_or_reset(drm, qaicm_pci_release, NULL); + if (ret) + return NULL; + ret = drmm_mutex_init(drm, &qdev->cntl_mutex); + if (ret) + return NULL; + ret = drmm_mutex_init(drm, &qdev->bootlog_mutex); + if (ret) + return NULL; + + qdev->cntl_wq = qaicm_wq_init(drm, "qaic_cntl"); + if (IS_ERR(qdev->cntl_wq)) + return NULL; + qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts"); + if (IS_ERR(qdev->qts_wq)) + return NULL; + qdev->ssr_wq = qaicm_wq_init(drm, "qaic_ssr"); + if (IS_ERR(qdev->ssr_wq)) + return NULL; + + ret = qaicm_srcu_init(drm, &qdev->dev_lock); + if (ret) + return NULL; + + ret = qaic_ssr_init(qdev, drm); + if (ret) + pci_info(pdev, "QAIC SSR crashdump collection not supported.\n"); + + qdev->qddev = qddev; + qdev->pdev = pdev; + qddev->qdev = qdev; + + INIT_LIST_HEAD(&qdev->cntl_xfer_list); + INIT_LIST_HEAD(&qdev->bootlog); + INIT_LIST_HEAD(&qddev->users); + + for (i = 0; i < qdev->num_dbc; ++i) { + spin_lock_init(&qdev->dbc[i].xfer_lock); + qdev->dbc[i].qdev = qdev; + qdev->dbc[i].id = i; + INIT_LIST_HEAD(&qdev->dbc[i].xfer_list); + ret = qaicm_srcu_init(drm, &qdev->dbc[i].ch_lock); + if (ret) + return NULL; + init_waitqueue_head(&qdev->dbc[i].dbc_release); + INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); + ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock); + if (ret) + return NULL; + } + + return qdev; +} + +static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev, + const struct qaic_device_config *config) +{ + int bars; + int ret; + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & 0x3f; + + /* make sure the device has the expected BARs */ + if (bars != config->bar_mask) { + pci_dbg(pdev, "%s: expected BARs %#x not found in device. Found %#x\n", + __func__, config->bar_mask, bars); + return -EINVAL; + } + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return ret; + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + + qdev->bar_mhi = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->mhi_bar_idx]); + if (IS_ERR(qdev->bar_mhi)) + return PTR_ERR(qdev->bar_mhi); + + qdev->bar_dbc = devm_ioremap_resource(&pdev->dev, &pdev->resource[config->dbc_bar_idx]); + if (IS_ERR(qdev->bar_dbc)) + return PTR_ERR(qdev->bar_dbc); + + /* Managed release since we use pcim_enable_device above */ + pci_set_master(pdev); + + return 0; +} + +static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) +{ + int irq_count = qdev->num_dbc + 1; + int mhi_irq; + int ret; + int i; + + /* Managed release since we use pcim_enable_device */ + ret = pci_alloc_irq_vectors(pdev, irq_count, irq_count, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret == -ENOSPC) { + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) + return ret; + + /* + * Operate in one MSI mode. All interrupts will be directed to + * MSI0; every interrupt will wake up all the interrupt handlers + * (MHI and DBC[0-15]). Since the interrupt is now shared, it is + * not disabled during DBC threaded handler, but only one thread + * will be allowed to run per DBC, so while it can be + * interrupted, it shouldn't race with itself. + */ + qdev->single_msi = true; + pci_info(pdev, "Allocating %d MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n", + irq_count); + } else if (ret < 0) { + return ret; + } + + mhi_irq = pci_irq_vector(pdev, 0); + if (mhi_irq < 0) + return mhi_irq; + + for (i = 0; i < qdev->num_dbc; ++i) { + ret = devm_request_threaded_irq(&pdev->dev, + pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1), + dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED, + "qaic_dbc", &qdev->dbc[i]); + if (ret) + return ret; + + if (datapath_polling) { + qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1); + if (!qdev->single_msi) + disable_irq_nosync(qdev->dbc[i].irq); + INIT_WORK(&qdev->dbc[i].poll_work, qaic_irq_polling_work); + } + } + + return mhi_irq; +} + +static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct qaic_device_config *config = (struct qaic_device_config *)id->driver_data; + struct qaic_device *qdev; + int mhi_irq; + int ret; + int i; + + qdev = create_qdev(pdev, config); + if (!qdev) + return -ENOMEM; + + ret = init_pci(qdev, pdev, config); + if (ret) + return ret; + + for (i = 0; i < qdev->num_dbc; ++i) + qdev->dbc[i].dbc_base = qdev->bar_dbc + QAIC_DBC_OFF(i); + + mhi_irq = init_msi(qdev, pdev); + if (mhi_irq < 0) + return mhi_irq; + + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); + if (ret) + return ret; + + qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_mhi, mhi_irq, + qdev->single_msi, config->family); + if (IS_ERR(qdev->mhi_cntrl)) { + ret = PTR_ERR(qdev->mhi_cntrl); + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); + return ret; + } + + return 0; +} + +static void qaic_pci_remove(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + if (!qdev) + return; + + qaic_dev_reset_clean_local_state(qdev); + qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); +} + +static void qaic_pci_shutdown(struct pci_dev *pdev) +{ + /* see qaic_exit for what link_up is doing */ + link_up = true; + qaic_pci_remove(pdev); +} + +static pci_ers_result_t qaic_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error) +{ + return PCI_ERS_RESULT_NEED_RESET; +} + +static void qaic_pci_reset_prepare(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + qaic_notify_reset(qdev); + qaic_mhi_start_reset(qdev->mhi_cntrl); + qaic_dev_reset_clean_local_state(qdev); +} + +static void qaic_pci_reset_done(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + qaic_mhi_reset_done(qdev->mhi_cntrl); +} + +static const struct mhi_device_id qaic_mhi_match_table[] = { + { .chan = "QAIC_CONTROL", }, + {}, +}; + +static struct mhi_driver qaic_mhi_driver = { + .id_table = qaic_mhi_match_table, + .remove = qaic_mhi_remove, + .probe = qaic_mhi_probe, + .ul_xfer_cb = qaic_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_mhi", + }, +}; + +static const struct pci_device_id qaic_ids[] = { + { PCI_DEVICE_DATA(QCOM, AIC080, (kernel_ulong_t)&aic080_config), }, + { PCI_DEVICE_DATA(QCOM, AIC100, (kernel_ulong_t)&aic100_config), }, + { PCI_DEVICE_DATA(QCOM, AIC200, (kernel_ulong_t)&aic200_config), }, + { } +}; +MODULE_DEVICE_TABLE(pci, qaic_ids); + +static const struct pci_error_handlers qaic_pci_err_handler = { + .error_detected = qaic_pci_error_detected, + .reset_prepare = qaic_pci_reset_prepare, + .reset_done = qaic_pci_reset_done, +}; + +static bool qaic_is_under_reset(struct qaic_device *qdev) +{ + int rcu_id; + bool ret; + + rcu_id = srcu_read_lock(&qdev->dev_lock); + ret = qdev->dev_state != QAIC_ONLINE; + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return ret; +} + +static bool qaic_data_path_busy(struct qaic_device *qdev) +{ + bool ret = false; + int dev_rcu_id; + int i; + + dev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + srcu_read_unlock(&qdev->dev_lock, dev_rcu_id); + return false; + } + for (i = 0; i < qdev->num_dbc; i++) { + struct dma_bridge_chan *dbc = &qdev->dbc[i]; + unsigned long flags; + int ch_rcu_id; + + ch_rcu_id = srcu_read_lock(&dbc->ch_lock); + if (!dbc->usr || !dbc->in_use) { + srcu_read_unlock(&dbc->ch_lock, ch_rcu_id); + continue; + } + spin_lock_irqsave(&dbc->xfer_lock, flags); + ret = !list_empty(&dbc->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + srcu_read_unlock(&dbc->ch_lock, ch_rcu_id); + if (ret) + break; + } + srcu_read_unlock(&qdev->dev_lock, dev_rcu_id); + return ret; +} + +static int qaic_pm_suspend(struct device *dev) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + dev_dbg(dev, "Suspending..\n"); + if (qaic_data_path_busy(qdev)) { + dev_dbg(dev, "Device's datapath is busy. Aborting suspend..\n"); + return -EBUSY; + } + if (qaic_is_under_reset(qdev)) { + dev_dbg(dev, "Device is under reset. Aborting suspend..\n"); + return -EBUSY; + } + qaic_mqts_ch_stop_timer(qdev->mqts_ch); + qaic_pci_reset_prepare(qdev->pdev); + pci_save_state(qdev->pdev); + pci_disable_device(qdev->pdev); + pci_set_power_state(qdev->pdev, PCI_D3hot); + return 0; +} + +static int qaic_pm_resume(struct device *dev) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + int ret; + + dev_dbg(dev, "Resuming..\n"); + pci_set_power_state(qdev->pdev, PCI_D0); + pci_restore_state(qdev->pdev); + ret = pci_enable_device(qdev->pdev); + if (ret) { + dev_err(dev, "pci_enable_device failed on resume %d\n", ret); + return ret; + } + pci_set_master(qdev->pdev); + qaic_pci_reset_done(qdev->pdev); + return 0; +} + +static const struct dev_pm_ops qaic_pm_ops = { + SYSTEM_SLEEP_PM_OPS(qaic_pm_suspend, qaic_pm_resume) +}; + +static struct pci_driver qaic_pci_driver = { + .name = QAIC_NAME, + .id_table = qaic_ids, + .probe = qaic_pci_probe, + .remove = qaic_pci_remove, + .shutdown = qaic_pci_shutdown, + .err_handler = &qaic_pci_err_handler, + .driver = { + .pm = pm_sleep_ptr(&qaic_pm_ops), + }, +}; + +static int __init qaic_init(void) +{ + int ret; + + ret = pci_register_driver(&qaic_pci_driver); + if (ret) { + pr_debug("qaic: pci_register_driver failed %d\n", ret); + return ret; + } + + ret = mhi_driver_register(&qaic_mhi_driver); + if (ret) { + pr_debug("qaic: mhi_driver_register failed %d\n", ret); + goto free_pci; + } + + ret = sahara_register(); + if (ret) { + pr_debug("qaic: sahara_register failed %d\n", ret); + goto free_mhi; + } + + ret = qaic_timesync_init(); + if (ret) + pr_debug("qaic: qaic_timesync_init failed %d\n", ret); + + ret = qaic_bootlog_register(); + if (ret) + pr_debug("qaic: qaic_bootlog_register failed %d\n", ret); + + ret = qaic_ras_register(); + if (ret) + pr_debug("qaic: qaic_ras_register failed %d\n", ret); + ret = qaic_ssr_register(); + if (ret) { + pr_debug("qaic: qaic_ssr_register failed %d\n", ret); + goto free_bootlog; + } + + return 0; + +free_bootlog: + qaic_bootlog_unregister(); +free_mhi: + mhi_driver_unregister(&qaic_mhi_driver); +free_pci: + pci_unregister_driver(&qaic_pci_driver); + return ret; +} + +static void __exit qaic_exit(void) +{ + /* + * We assume that qaic_pci_remove() is called due to a hotplug event + * which would mean that the link is down, and thus + * qaic_mhi_free_controller() should not try to access the device during + * cleanup. + * We call pci_unregister_driver() below, which also triggers + * qaic_pci_remove(), but since this is module exit, we expect the link + * to the device to be up, in which case qaic_mhi_free_controller() + * should try to access the device during cleanup to put the device in + * a sane state. + * For that reason, we set link_up here to let qaic_mhi_free_controller + * know the expected link state. Since the module is going to be + * removed at the end of this, we don't need to worry about + * reinitializing the link_up state after the cleanup is done. + */ + link_up = true; + qaic_ssr_unregister(); + qaic_ras_unregister(); + qaic_bootlog_unregister(); + qaic_timesync_deinit(); + sahara_unregister(); + mhi_driver_unregister(&qaic_mhi_driver); + pci_unregister_driver(&qaic_pci_driver); +} + +module_init(qaic_init); +module_exit(qaic_exit); + +MODULE_AUTHOR(QAIC_DESC " Kernel Driver Team"); +MODULE_DESCRIPTION(QAIC_DESC " Accel Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c new file mode 100644 index 000000000000..f1d52a710136 --- /dev/null +++ b/drivers/accel/qaic/qaic_ras.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ +/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ + +#include <asm/byteorder.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/mhi.h> + +#include "qaic.h" +#include "qaic_ras.h" + +#define MAGIC 0x55AA +#define VERSION 0x2 +#define HDR_SZ 12 +#define NUM_TEMP_LVL 3 +#define POWER_BREAK BIT(0) + +enum msg_type { + MSG_PUSH, /* async push from device */ + MSG_REQ, /* sync request to device */ + MSG_RESP, /* sync response from device */ +}; + +enum err_type { + CE, /* correctable error */ + UE, /* uncorrectable error */ + UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */ + ERR_TYPE_MAX, +}; + +static const char * const err_type_str[] = { + [CE] = "Correctable", + [UE] = "Uncorrectable", + [UE_NF] = "Uncorrectable Non-Fatal", +}; + +static const char * const err_class_str[] = { + [CE] = "Warning", + [UE] = "Fatal", + [UE_NF] = "Warning", +}; + +enum err_source { + SOC_MEM, + PCIE, + DDR, + SYS_BUS1, + SYS_BUS2, + NSP_MEM, + TSENS, +}; + +static const char * const err_src_str[TSENS + 1] = { + [SOC_MEM] = "SoC Memory", + [PCIE] = "PCIE", + [DDR] = "DDR", + [SYS_BUS1] = "System Bus source 1", + [SYS_BUS2] = "System Bus source 2", + [NSP_MEM] = "NSP Memory", + [TSENS] = "Temperature Sensors", +}; + +struct ras_data { + /* header start */ + /* Magic number to validate the message */ + u16 magic; + /* RAS version number */ + u16 ver; + u32 seq_num; + /* RAS message type */ + u8 type; + u8 id; + /* Size of RAS message without the header in byte */ + u16 len; + /* header end */ + s32 result; + /* + * Error source + * 0 : SoC Memory + * 1 : PCIE + * 2 : DDR + * 3 : System Bus source 1 + * 4 : System Bus source 2 + * 5 : NSP Memory + * 6 : Temperature Sensors + */ + u32 source; + /* + * Stores the error type, there are three types of error in RAS + * 0 : correctable error (CE) + * 1 : uncorrectable error (UE) + * 2 : uncorrectable error that is non-fatal (UE_NF) + */ + u32 err_type; + u32 err_threshold; + u32 ce_count; + u32 ue_count; + u32 intr_num; + /* Data specific to error source */ + u8 syndrome[64]; +} __packed; + +struct soc_mem_syndrome { + u64 error_address[8]; +} __packed; + +struct nsp_mem_syndrome { + u32 error_address[8]; + u8 nsp_id; +} __packed; + +struct ddr_syndrome { + u32 count; + u32 irq_status; + u32 data_31_0[2]; + u32 data_63_32[2]; + u32 data_95_64[2]; + u32 data_127_96[2]; + u32 addr_lsb; + u16 addr_msb; + u16 parity_bits; + u16 instance; + u16 err_type; +} __packed; + +struct tsens_syndrome { + u32 threshold_type; + s32 temp; +} __packed; + +struct sysbus1_syndrome { + u32 slave; + u32 err_type; + u16 addr[8]; + u8 instance; +} __packed; + +struct sysbus2_syndrome { + u32 lsb3; + u32 msb3; + u32 lsb2; + u32 msb2; + u32 ext_id; + u16 path; + u16 op_type; + u16 len; + u16 redirect; + u8 valid; + u8 word_error; + u8 non_secure; + u8 opc; + u8 error_code; + u8 trans_type; + u8 addr_space; + u8 instance; +} __packed; + +struct pcie_syndrome { + /* CE info */ + u32 bad_tlp; + u32 bad_dllp; + u32 replay_rollover; + u32 replay_timeout; + u32 rx_err; + u32 internal_ce_count; + /* UE_NF info */ + u32 fc_timeout; + u32 poison_tlp; + u32 ecrc_err; + u32 unsupported_req; + u32 completer_abort; + u32 completion_timeout; + /* UE info */ + u32 addr; + u8 index; + /* + * Flag to indicate specific event of PCIe + * BIT(0): Power break (low power) + * BIT(1) to BIT(7): Reserved + */ + u8 flag; +} __packed; + +static const char * const threshold_type_str[NUM_TEMP_LVL] = { + [0] = "lower", + [1] = "upper", + [2] = "critical", +}; + +static void ras_msg_to_cpu(struct ras_data *msg) +{ + struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; + struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; + struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; + struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; + struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; + struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; + struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; + int i; + + le16_to_cpus(&msg->magic); + le16_to_cpus(&msg->ver); + le32_to_cpus(&msg->seq_num); + le16_to_cpus(&msg->len); + le32_to_cpus(&msg->result); + le32_to_cpus(&msg->source); + le32_to_cpus(&msg->err_type); + le32_to_cpus(&msg->err_threshold); + le32_to_cpus(&msg->ce_count); + le32_to_cpus(&msg->ue_count); + le32_to_cpus(&msg->intr_num); + + switch (msg->source) { + case SOC_MEM: + for (i = 0; i < 8; i++) + le64_to_cpus(&soc_syndrome->error_address[i]); + break; + case PCIE: + le32_to_cpus(&pcie_syndrome->bad_tlp); + le32_to_cpus(&pcie_syndrome->bad_dllp); + le32_to_cpus(&pcie_syndrome->replay_rollover); + le32_to_cpus(&pcie_syndrome->replay_timeout); + le32_to_cpus(&pcie_syndrome->rx_err); + le32_to_cpus(&pcie_syndrome->internal_ce_count); + le32_to_cpus(&pcie_syndrome->fc_timeout); + le32_to_cpus(&pcie_syndrome->poison_tlp); + le32_to_cpus(&pcie_syndrome->ecrc_err); + le32_to_cpus(&pcie_syndrome->unsupported_req); + le32_to_cpus(&pcie_syndrome->completer_abort); + le32_to_cpus(&pcie_syndrome->completion_timeout); + le32_to_cpus(&pcie_syndrome->addr); + break; + case DDR: + le16_to_cpus(&ddr_syndrome->instance); + le16_to_cpus(&ddr_syndrome->err_type); + le32_to_cpus(&ddr_syndrome->count); + le32_to_cpus(&ddr_syndrome->irq_status); + le32_to_cpus(&ddr_syndrome->data_31_0[0]); + le32_to_cpus(&ddr_syndrome->data_31_0[1]); + le32_to_cpus(&ddr_syndrome->data_63_32[0]); + le32_to_cpus(&ddr_syndrome->data_63_32[1]); + le32_to_cpus(&ddr_syndrome->data_95_64[0]); + le32_to_cpus(&ddr_syndrome->data_95_64[1]); + le32_to_cpus(&ddr_syndrome->data_127_96[0]); + le32_to_cpus(&ddr_syndrome->data_127_96[1]); + le16_to_cpus(&ddr_syndrome->parity_bits); + le16_to_cpus(&ddr_syndrome->addr_msb); + le32_to_cpus(&ddr_syndrome->addr_lsb); + break; + case SYS_BUS1: + le32_to_cpus(&sysbus1_syndrome->slave); + le32_to_cpus(&sysbus1_syndrome->err_type); + for (i = 0; i < 8; i++) + le16_to_cpus(&sysbus1_syndrome->addr[i]); + break; + case SYS_BUS2: + le16_to_cpus(&sysbus2_syndrome->op_type); + le16_to_cpus(&sysbus2_syndrome->len); + le16_to_cpus(&sysbus2_syndrome->redirect); + le16_to_cpus(&sysbus2_syndrome->path); + le32_to_cpus(&sysbus2_syndrome->ext_id); + le32_to_cpus(&sysbus2_syndrome->lsb2); + le32_to_cpus(&sysbus2_syndrome->msb2); + le32_to_cpus(&sysbus2_syndrome->lsb3); + le32_to_cpus(&sysbus2_syndrome->msb3); + break; + case NSP_MEM: + for (i = 0; i < 8; i++) + le32_to_cpus(&nsp_syndrome->error_address[i]); + break; + case TSENS: + le32_to_cpus(&tsens_syndrome->threshold_type); + le32_to_cpus(&tsens_syndrome->temp); + break; + } +} + +static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) +{ + struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; + struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; + struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; + struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; + struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; + struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; + struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; + char *class; + char *level; + + if (msg->magic != MAGIC) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic); + return; + } + + if (!msg->ver || msg->ver > VERSION) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver); + return; + } + + if (msg->type != MSG_PUSH) { + pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n"); + return; + } + + if (msg->len != sizeof(*msg) - HDR_SZ) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len); + return; + } + + if (msg->err_type >= ERR_TYPE_MAX) { + pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type); + return; + } + + if (msg->err_type == UE) + level = KERN_ERR; + else + level = KERN_WARNING; + + switch (msg->source) { + case SOC_MEM: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + soc_syndrome->error_address[0], + soc_syndrome->error_address[1], + soc_syndrome->error_address[2], + soc_syndrome->error_address[3], + soc_syndrome->error_address[4], + soc_syndrome->error_address[5], + soc_syndrome->error_address[6], + soc_syndrome->error_address[7]); + break; + case PCIE: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold); + + switch (msg->err_type) { + case CE: + /* + * Modeled after AER prints. This continues the dev_printk() from a few + * lines up. We reduce duplication of code, but also avoid re-printing the + * PCI device info so that the end result looks uniform to the log user. + */ + printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"), + pcie_syndrome->bad_tlp, + pcie_syndrome->bad_dllp, + pcie_syndrome->replay_rollover, + pcie_syndrome->replay_timeout, + pcie_syndrome->rx_err, + pcie_syndrome->internal_ce_count); + if (msg->ver > 0x1) + pr_warn(" Power break %s\n", + pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF"); + break; + case UE: + printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"), + pcie_syndrome->index, pcie_syndrome->addr); + break; + case UE_NF: + printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"), + pcie_syndrome->fc_timeout, + pcie_syndrome->poison_tlp, + pcie_syndrome->ecrc_err, + pcie_syndrome->unsupported_req, + pcie_syndrome->completer_abort, + pcie_syndrome->completion_timeout); + break; + default: + break; + } + break; + case DDR: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + ddr_syndrome->instance, + ddr_syndrome->count, + ddr_syndrome->data_31_0[1], + ddr_syndrome->data_31_0[0], + ddr_syndrome->data_63_32[1], + ddr_syndrome->data_63_32[0], + ddr_syndrome->data_95_64[1], + ddr_syndrome->data_95_64[0], + ddr_syndrome->data_127_96[1], + ddr_syndrome->data_127_96[0], + ddr_syndrome->parity_bits, + ddr_syndrome->addr_msb, + ddr_syndrome->addr_lsb); + break; + case SYS_BUS1: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + sysbus1_syndrome->instance, + sysbus1_syndrome->slave ? "Slave" : "Master", + sysbus1_syndrome->err_type, + sysbus1_syndrome->addr[0], + sysbus1_syndrome->addr[1], + sysbus1_syndrome->addr[2], + sysbus1_syndrome->addr[3], + sysbus1_syndrome->addr[4], + sysbus1_syndrome->addr[5], + sysbus1_syndrome->addr[6], + sysbus1_syndrome->addr[7]); + break; + case SYS_BUS2: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + sysbus2_syndrome->instance, + sysbus2_syndrome->valid, + sysbus2_syndrome->word_error, + sysbus2_syndrome->non_secure, + sysbus2_syndrome->opc, + sysbus2_syndrome->error_code, + sysbus2_syndrome->trans_type, + sysbus2_syndrome->addr_space, + sysbus2_syndrome->op_type, + sysbus2_syndrome->len, + sysbus2_syndrome->redirect, + sysbus2_syndrome->path, + sysbus2_syndrome->ext_id, + sysbus2_syndrome->lsb2, + sysbus2_syndrome->msb2, + sysbus2_syndrome->lsb3, + sysbus2_syndrome->msb3); + break; + case NSP_MEM: + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n", + err_class_str[msg->err_type], + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + nsp_syndrome->nsp_id, + nsp_syndrome->error_address[0], + nsp_syndrome->error_address[1], + nsp_syndrome->error_address[2], + nsp_syndrome->error_address[3], + nsp_syndrome->error_address[4], + nsp_syndrome->error_address[5], + nsp_syndrome->error_address[6], + nsp_syndrome->error_address[7]); + break; + case TSENS: + if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) { + pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n", + tsens_syndrome->threshold_type); + break; + } + + if (msg->err_type) + class = "Fatal"; + else if (tsens_syndrome->threshold_type) + class = "Critical"; + else + class = "Warning"; + + dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n", + class, + err_type_str[msg->err_type], + "error from", + err_src_str[msg->source], + msg->err_threshold, + threshold_type_str[tsens_syndrome->threshold_type], + tsens_syndrome->temp); + break; + } + + /* Uncorrectable errors are fatal */ + if (msg->err_type == UE) + mhi_soc_reset(qdev->mhi_cntrl); + + switch (msg->err_type) { + case CE: + if (qdev->ce_count != UINT_MAX) + qdev->ce_count++; + break; + case UE: + if (qdev->ce_count != UINT_MAX) + qdev->ue_count++; + break; + case UE_NF: + if (qdev->ce_count != UINT_MAX) + qdev->ue_nf_count++; + break; + default: + /* not possible */ + break; + } +} + +static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return sysfs_emit(buf, "%d\n", qdev->ce_count); +} + +static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return sysfs_emit(buf, "%d\n", qdev->ue_count); +} + +static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + return sysfs_emit(buf, "%d\n", qdev->ue_nf_count); +} + +static DEVICE_ATTR_RO(ce_count); +static DEVICE_ATTR_RO(ue_count); +static DEVICE_ATTR_RO(ue_nonfatal_count); + +static struct attribute *ras_attrs[] = { + &dev_attr_ce_count.attr, + &dev_attr_ue_count.attr, + &dev_attr_ue_nonfatal_count.attr, + NULL, +}; + +static struct attribute_group ras_group = { + .attrs = ras_attrs, +}; + +static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct ras_data *resp; + int ret; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + mhi_unprepare_from_transfer(mhi_dev); + return -ENOMEM; + } + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT); + if (ret) { + kfree(resp); + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + ret = device_add_group(&qdev->pdev->dev, &ras_group); + if (ret) { + mhi_unprepare_from_transfer(mhi_dev); + pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret); + return ret; + } + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->ras_ch = mhi_dev; + + return ret; +} + +static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + qdev->ras_ch = NULL; + device_remove_group(&qdev->pdev->dev, &ras_group); + mhi_unprepare_from_transfer(mhi_dev); +} + +static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {} + +static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ras_data *msg = mhi_result->buf_addr; + int ret; + + if (mhi_result->transaction_status) { + kfree(msg); + return; + } + + ras_msg_to_cpu(msg); + decode_ras_msg(qdev, msg); + + ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT); + if (ret) { + dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret); + kfree(msg); + } +} + +static const struct mhi_device_id qaic_ras_mhi_match_table[] = { + { .chan = "QAIC_STATUS", }, + {}, +}; + +static struct mhi_driver qaic_ras_mhi_driver = { + .id_table = qaic_ras_mhi_match_table, + .remove = qaic_ras_mhi_remove, + .probe = qaic_ras_mhi_probe, + .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_ras", + }, +}; + +int qaic_ras_register(void) +{ + return mhi_driver_register(&qaic_ras_mhi_driver); +} + +void qaic_ras_unregister(void) +{ + mhi_driver_unregister(&qaic_ras_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_ras.h b/drivers/accel/qaic/qaic_ras.h new file mode 100644 index 000000000000..d44a4eeeb060 --- /dev/null +++ b/drivers/accel/qaic/qaic_ras.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ + +#ifndef __QAIC_RAS_H__ +#define __QAIC_RAS_H__ + +int qaic_ras_register(void); +void qaic_ras_unregister(void); + +#endif /* __QAIC_RAS_H__ */ diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c new file mode 100644 index 000000000000..9b662d690371 --- /dev/null +++ b/drivers/accel/qaic/qaic_ssr.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <asm/byteorder.h> +#include <drm/drm_file.h> +#include <drm/drm_managed.h> +#include <linux/devcoredump.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/mhi.h> +#include <linux/workqueue.h> + +#include "qaic.h" +#include "qaic_ssr.h" + +#define SSR_RESP_MSG_SZ 32 +#define SSR_MHI_BUF_SIZE SZ_64K +#define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump)) +#define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp)) + +#define DEBUG_TRANSFER_INFO BIT(0) +#define DEBUG_TRANSFER_INFO_RSP BIT(1) +#define MEMORY_READ BIT(2) +#define MEMORY_READ_RSP BIT(3) +#define DEBUG_TRANSFER_DONE BIT(4) +#define DEBUG_TRANSFER_DONE_RSP BIT(5) +#define SSR_EVENT BIT(8) +#define SSR_EVENT_RSP BIT(9) + +#define SSR_EVENT_NACK BIT(0) +#define BEFORE_SHUTDOWN BIT(1) +#define AFTER_SHUTDOWN BIT(2) +#define BEFORE_POWER_UP BIT(3) +#define AFTER_POWER_UP BIT(4) + +struct debug_info_table { + /* Save preferences. Default is mandatory */ + u64 save_perf; + /* Base address of the debug region */ + u64 mem_base; + /* Size of debug region in bytes */ + u64 len; + /* Description */ + char desc[20]; + /* Filename of debug region */ + char filename[20]; +}; + +struct _ssr_hdr { + __le32 cmd; + __le32 len; + __le32 dbc_id; +}; + +struct ssr_hdr { + u32 cmd; + u32 len; + u32 dbc_id; +}; + +struct ssr_debug_transfer_info { + struct ssr_hdr hdr; + u32 resv; + u64 tbl_addr; + u64 tbl_len; +} __packed; + +struct ssr_debug_transfer_info_rsp { + struct _ssr_hdr hdr; + __le32 ret; +} __packed; + +struct ssr_memory_read { + struct _ssr_hdr hdr; + __le32 resv; + __le64 addr; + __le64 len; +} __packed; + +struct ssr_memory_read_rsp { + struct _ssr_hdr hdr; + __le32 resv; + u8 data[]; +} __packed; + +struct ssr_debug_transfer_done { + struct _ssr_hdr hdr; + __le32 resv; +} __packed; + +struct ssr_debug_transfer_done_rsp { + struct _ssr_hdr hdr; + __le32 ret; +} __packed; + +struct ssr_event { + struct ssr_hdr hdr; + u32 event; +} __packed; + +struct ssr_event_rsp { + struct _ssr_hdr hdr; + __le32 event; +} __packed; + +struct ssr_resp { + /* Work struct to schedule work coming on QAIC_SSR channel */ + struct work_struct work; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Buffer used by MHI for transfer requests */ + u8 data[] __aligned(8); +}; + +/* SSR crashdump book keeping structure */ +struct ssr_dump_info { + /* DBC associated with this SSR crashdump */ + struct dma_bridge_chan *dbc; + /* + * It will be used when we complete the crashdump download and switch + * to waiting on SSR events + */ + struct ssr_resp *resp; + /* MEMORY READ request MHI buffer.*/ + struct ssr_memory_read *read_buf_req; + /* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */ + bool read_buf_req_queued; + /* Address of table in host */ + void *tbl_addr; + /* Total size of table */ + u64 tbl_len; + /* Offset of table(->tbl_addr) where the new chunk will be dumped */ + u64 tbl_off; + /* Address of table in device/target */ + u64 tbl_addr_dev; + /* Ptr to the entire dump */ + void *dump_addr; + /* Entire crashdump size */ + u64 dump_sz; + /* Offset of crashdump(->dump_addr) where the new chunk will be dumped */ + u64 dump_off; + /* Points to the table entry we are currently downloading */ + struct debug_info_table *tbl_ent; + /* Offset in the current table entry(->tbl_ent) for next chuck */ + u64 tbl_ent_off; +}; + +struct ssr_crashdump { + /* + * Points to a book keeping struct maintained by MHI SSR device while + * downloading a SSR crashdump. It is NULL when crashdump downloading + * not in progress. + */ + struct ssr_dump_info *dump_info; + /* Work struct to schedule work coming on QAIC_SSR channel */ + struct work_struct work; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Buffer used by MHI for transfer requests */ + u8 data[]; +}; + +#define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef +#define QAIC_SSR_DUMP_V1_VER 1 +struct dump_file_meta { + u64 magic; + u64 version; + u64 size; /* Total size of the entire dump */ + u64 tbl_len; /* Length of the table in byte */ +}; + +/* + * Layout of crashdump + * +------------------------------------------+ + * | Crashdump Meta structure | + * | type: struct dump_file_meta | + * +------------------------------------------+ + * | Crashdump Table | + * | type: array of struct debug_info_table | + * | | + * | | + * | | + * +------------------------------------------+ + * | Crashdump | + * | | + * | | + * | | + * | | + * | | + * +------------------------------------------+ + */ + +static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash) +{ + struct ssr_dump_info *dump_info = ssr_crash->dump_info; + + ssr_crash->dump_info = NULL; + if (!dump_info) + return; + if (!dump_info->read_buf_req_queued) + kfree(dump_info->read_buf_req); + vfree(dump_info->tbl_addr); + vfree(dump_info->dump_addr); + kfree(dump_info); +} + +void qaic_clean_up_ssr(struct qaic_device *qdev) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + + if (!ssr_crash) + return; + + qaic_dbc_exit_ssr(qdev); + free_ssr_dump_info(ssr_crash); +} + +static int alloc_dump(struct ssr_dump_info *dump_info) +{ + struct debug_info_table *tbl_ent = dump_info->tbl_addr; + struct dump_file_meta *dump_meta; + u64 tbl_sz_lp = 0; + u64 dump_size = 0; + + while (tbl_sz_lp < dump_info->tbl_len) { + le64_to_cpus(&tbl_ent->save_perf); + le64_to_cpus(&tbl_ent->mem_base); + le64_to_cpus(&tbl_ent->len); + + if (tbl_ent->len == 0) + return -EINVAL; + + dump_size += tbl_ent->len; + tbl_ent++; + tbl_sz_lp += sizeof(*tbl_ent); + } + + dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta); + dump_info->dump_addr = vzalloc(dump_info->dump_sz); + if (!dump_info->dump_addr) + return -ENOMEM; + + /* Copy crashdump meta and table */ + dump_meta = dump_info->dump_addr; + dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC; + dump_meta->version = QAIC_SSR_DUMP_V1_VER; + dump_meta->size = dump_info->dump_sz; + dump_meta->tbl_len = dump_info->tbl_len; + memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len); + /* Offset by crashdump meta and table (copied above) */ + dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta); + + return 0; +} + +static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id) +{ + struct ssr_debug_transfer_done *xfer_done; + int ret; + + xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL); + if (!xfer_done) { + ret = -ENOMEM; + goto out; + } + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) + goto free_xfer_done; + + xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE); + xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done)); + xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT); + if (ret) + goto free_xfer_done; + + return 0; + +free_xfer_done: + kfree(xfer_done); +out: + return ret; +} + +static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + struct ssr_memory_read *read_buf_req; + struct ssr_dump_info *dump_info; + int ret; + + dump_info = ssr_crash->dump_info; + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE, + MHI_EOT); + if (ret) + goto out; + + read_buf_req = dump_info->read_buf_req; + read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ); + read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req)); + read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc); + read_buf_req->addr = cpu_to_le64(dest_addr); + read_buf_req->len = cpu_to_le64(dest_len); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req), + MHI_EOT); + if (!ret) + dump_info->read_buf_req_queued = true; + +out: + return ret; +} + +static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len) +{ + if (len > dump_info->tbl_len - dump_info->tbl_off) + return -EINVAL; + + memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len); + dump_info->tbl_off += len; + + /* Entire table has been downloaded, alloc dump memory */ + if (dump_info->tbl_off == dump_info->tbl_len) { + dump_info->tbl_ent = dump_info->tbl_addr; + return alloc_dump(dump_info); + } + + return 0; +} + +static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len) +{ + struct debug_info_table *tbl_ent; + + tbl_ent = dump_info->tbl_ent; + + if (len > tbl_ent->len - dump_info->tbl_ent_off) + return -EINVAL; + + memcpy(dump_info->dump_addr + dump_info->dump_off, data, len); + dump_info->dump_off += len; + dump_info->tbl_ent_off += len; + + /* + * Current segment (a entry in table) of the crashdump is complete, + * move to next one + */ + if (tbl_ent->len == dump_info->tbl_ent_off) { + dump_info->tbl_ent++; + dump_info->tbl_ent_off = 0; + } + + return 0; +} + +static void ssr_dump_worker(struct work_struct *work) +{ + struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work); + struct qaic_device *qdev = ssr_crash->qdev; + struct ssr_memory_read_rsp *mem_rd_resp; + struct debug_info_table *tbl_ent; + struct ssr_dump_info *dump_info; + u64 dest_addr, dest_len; + struct _ssr_hdr *_hdr; + struct ssr_hdr hdr; + u64 data_len; + int ret; + + mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data; + _hdr = &mem_rd_resp->hdr; + hdr.cmd = le32_to_cpu(_hdr->cmd); + hdr.len = le32_to_cpu(_hdr->len); + hdr.dbc_id = le32_to_cpu(_hdr->dbc_id); + + if (hdr.dbc_id != qdev->ssr_dbc) + goto reset_device; + + dump_info = ssr_crash->dump_info; + if (!dump_info) + goto reset_device; + + if (hdr.cmd != MEMORY_READ_RSP) + goto free_dump_info; + + if (hdr.len > SSR_MEM_READ_DATA_SIZE) + goto free_dump_info; + + data_len = hdr.len - sizeof(*mem_rd_resp); + + if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */ + ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len); + else /* Chunk belongs to crashdump */ + ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len); + + if (ret) + goto free_dump_info; + + if (dump_info->tbl_off < dump_info->tbl_len) { + /* Continue downloading table */ + dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off; + dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off); + ret = mem_read_req(qdev, dest_addr, dest_len); + } else if (dump_info->dump_off < dump_info->dump_sz) { + /* Continue downloading crashdump */ + tbl_ent = dump_info->tbl_ent; + dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off; + dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off); + ret = mem_read_req(qdev, dest_addr, dest_len); + } else { + /* Crashdump download complete */ + ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id); + } + + /* Most likely a MHI xfer has failed */ + if (ret) + goto free_dump_info; + + return; + +free_dump_info: + /* Free the allocated memory */ + free_ssr_dump_info(ssr_crash); +reset_device: + /* + * After subsystem crashes in device crashdump collection begins but + * something went wrong while collecting crashdump, now instead of + * handling this error we just reset the device as the best effort has + * been made + */ + mhi_soc_reset(qdev->mhi_cntrl); +} + +static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev, + struct ssr_debug_transfer_info *debug_info) +{ + struct ssr_dump_info *dump_info; + int ret; + + le64_to_cpus(&debug_info->tbl_len); + le64_to_cpus(&debug_info->tbl_addr); + + if (debug_info->tbl_len == 0 || + debug_info->tbl_len % sizeof(struct debug_info_table) != 0) { + ret = -EINVAL; + goto out; + } + + /* Allocate SSR crashdump book keeping structure */ + dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL); + if (!dump_info) { + ret = -ENOMEM; + goto out; + } + + /* Buffer used to send MEMORY READ request to device via MHI */ + dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL); + if (!dump_info->read_buf_req) { + ret = -ENOMEM; + goto free_dump_info; + } + + /* Crashdump meta table buffer */ + dump_info->tbl_addr = vzalloc(debug_info->tbl_len); + if (!dump_info->tbl_addr) { + ret = -ENOMEM; + goto free_read_buf_req; + } + + dump_info->tbl_addr_dev = debug_info->tbl_addr; + dump_info->tbl_len = debug_info->tbl_len; + + return dump_info; + +free_read_buf_req: + kfree(dump_info->read_buf_req); +free_dump_info: + kfree(dump_info); +out: + return ERR_PTR(ret); +} + +static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc, + struct ssr_debug_transfer_info *debug_info) +{ + struct ssr_debug_transfer_info_rsp *debug_rsp; + struct ssr_crashdump *ssr_crash = NULL; + int ret = 0, ret2; + + debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL); + if (!debug_rsp) + return -ENOMEM; + + if (!qdev->ssr_mhi_buf) { + ret = -ENOMEM; + goto send_rsp; + } + + if (dbc->state != DBC_STATE_BEFORE_POWER_UP) { + ret = -EINVAL; + goto send_rsp; + } + + ssr_crash = qdev->ssr_mhi_buf; + ssr_crash->dump_info = alloc_dump_info(qdev, debug_info); + if (IS_ERR(ssr_crash->dump_info)) { + ret = PTR_ERR(ssr_crash->dump_info); + ssr_crash->dump_info = NULL; + } + +send_rsp: + debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP); + debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp)); + debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id); + /* + * 0 = Return an ACK confirming the host is ready to download crashdump + * 1 = Return an NACK confirming the host is not ready to download crashdump + */ + debug_rsp->ret = cpu_to_le32(ret ? 1 : 0); + + ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT); + if (ret2) { + free_ssr_dump_info(ssr_crash); + kfree(debug_rsp); + return ret2; + } + + return ret; +} + +static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc, + struct ssr_debug_transfer_done_rsp *xfer_rsp) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + u32 status = le32_to_cpu(xfer_rsp->ret); + struct device *dev = &qdev->pdev->dev; + struct ssr_dump_info *dump_info; + + dump_info = ssr_crash->dump_info; + if (!dump_info) + return; + + if (status) { + free_ssr_dump_info(ssr_crash); + return; + } + + dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL); + /* dev_coredumpv will free dump_info->dump_addr */ + dump_info->dump_addr = NULL; + free_ssr_dump_info(ssr_crash); +} + +static void ssr_worker(struct work_struct *work) +{ + struct ssr_resp *resp = container_of(work, struct ssr_resp, work); + struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data; + struct ssr_dump_info *dump_info = NULL; + struct qaic_device *qdev = resp->qdev; + struct ssr_crashdump *ssr_crash; + struct ssr_event_rsp *event_rsp; + struct dma_bridge_chan *dbc; + struct ssr_event *event; + u32 ssr_event_ack; + int ret; + + le32_to_cpus(&hdr->cmd); + le32_to_cpus(&hdr->len); + le32_to_cpus(&hdr->dbc_id); + + if (hdr->len > SSR_RESP_MSG_SZ) + goto out; + + if (hdr->dbc_id >= qdev->num_dbc) + goto out; + + dbc = &qdev->dbc[hdr->dbc_id]; + + switch (hdr->cmd) { + case DEBUG_TRANSFER_INFO: + ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data); + if (ret) + break; + + ssr_crash = qdev->ssr_mhi_buf; + dump_info = ssr_crash->dump_info; + dump_info->dbc = dbc; + dump_info->resp = resp; + + /* Start by downloading debug table */ + ret = mem_read_req(qdev, dump_info->tbl_addr_dev, + min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE)); + if (ret) { + free_ssr_dump_info(ssr_crash); + break; + } + + /* + * Till now everything went fine, which means that we will be + * collecting crashdump chunk by chunk. Do not queue a response + * buffer for SSR cmds till the crashdump is complete. + */ + return; + case SSR_EVENT: + event = (struct ssr_event *)hdr; + le32_to_cpus(&event->event); + ssr_event_ack = event->event; + ssr_crash = qdev->ssr_mhi_buf; + + switch (event->event) { + case BEFORE_SHUTDOWN: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN); + qaic_dbc_enter_ssr(qdev, hdr->dbc_id); + break; + case AFTER_SHUTDOWN: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN); + break; + case BEFORE_POWER_UP: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP); + break; + case AFTER_POWER_UP: + /* + * If dump info is a non NULL value it means that we + * have received this SSR event while downloading a + * crashdump for this DBC is still in progress. NACK + * the SSR event + */ + if (ssr_crash && ssr_crash->dump_info) { + free_ssr_dump_info(ssr_crash); + ssr_event_ack = SSR_EVENT_NACK; + break; + } + + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP); + break; + default: + break; + } + + event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL); + if (!event_rsp) + break; + + event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP); + event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp)); + event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id); + event_rsp->event = cpu_to_le32(ssr_event_ack); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp), + MHI_EOT); + if (ret) + kfree(event_rsp); + + if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) { + qaic_dbc_exit_ssr(qdev); + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE); + } + + break; + case DEBUG_TRANSFER_DONE_RSP: + dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr); + break; + default: + break; + } + +out: + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) + kfree(resp); +} + +static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct ssr_resp *resp; + int ret; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL); + if (!resp) { + mhi_unprepare_from_transfer(mhi_dev); + return -ENOMEM; + } + + resp->qdev = qdev; + INIT_WORK(&resp->work, ssr_worker); + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) { + kfree(resp); + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->ssr_ch = mhi_dev; + + return 0; +} + +static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + mhi_unprepare_from_transfer(qdev->ssr_ch); + qdev->ssr_ch = NULL; +} + +static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + struct _ssr_hdr *hdr = mhi_result->buf_addr; + struct ssr_dump_info *dump_info; + + if (mhi_result->transaction_status) { + kfree(mhi_result->buf_addr); + return; + } + + /* + * MEMORY READ is used to download crashdump. And crashdump is + * downloaded chunk by chunk in a series of MEMORY READ SSR commands. + * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ + * request buffer, we allocate only one such buffer and free it only + * once. + */ + if (le32_to_cpu(hdr->cmd) == MEMORY_READ) { + dump_info = ssr_crash->dump_info; + if (dump_info) { + dump_info->read_buf_req_queued = false; + return; + } + } + + kfree(mhi_result->buf_addr); +} + +static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data); + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + bool memory_read_rsp = false; + + if (ssr_crash && ssr_crash->data == mhi_result->buf_addr) + memory_read_rsp = true; + + if (mhi_result->transaction_status) { + /* Do not free SSR crashdump buffer as it allocated via managed APIs */ + if (!memory_read_rsp) + kfree(resp); + return; + } + + if (memory_read_rsp) + queue_work(qdev->ssr_wq, &ssr_crash->work); + else + queue_work(qdev->ssr_wq, &resp->work); +} + +static const struct mhi_device_id qaic_ssr_mhi_match_table[] = { + { .chan = "QAIC_SSR", }, + {}, +}; + +static struct mhi_driver qaic_ssr_mhi_driver = { + .id_table = qaic_ssr_mhi_match_table, + .remove = qaic_ssr_mhi_remove, + .probe = qaic_ssr_mhi_probe, + .ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_ssr", + }, +}; + +int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm) +{ + struct ssr_crashdump *ssr_crash; + + qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL; + + /* + * Device requests only one SSR at a time. So allocating only one + * buffer to download crashdump is good enough. + */ + ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL); + if (!ssr_crash) + return -ENOMEM; + + ssr_crash->qdev = qdev; + INIT_WORK(&ssr_crash->work, ssr_dump_worker); + qdev->ssr_mhi_buf = ssr_crash; + + return 0; +} + +int qaic_ssr_register(void) +{ + return mhi_driver_register(&qaic_ssr_mhi_driver); +} + +void qaic_ssr_unregister(void) +{ + mhi_driver_unregister(&qaic_ssr_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_ssr.h b/drivers/accel/qaic/qaic_ssr.h new file mode 100644 index 000000000000..97ccff305750 --- /dev/null +++ b/drivers/accel/qaic/qaic_ssr.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __QAIC_SSR_H__ +#define __QAIC_SSR_H__ + +struct drm_device; +struct qaic_device; + +int qaic_ssr_register(void); +void qaic_ssr_unregister(void); +void qaic_clean_up_ssr(struct qaic_device *qdev); +int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm); +#endif /* __QAIC_SSR_H__ */ diff --git a/drivers/accel/qaic/qaic_sysfs.c b/drivers/accel/qaic/qaic_sysfs.c new file mode 100644 index 000000000000..e0afb0ffb589 --- /dev/null +++ b/drivers/accel/qaic/qaic_sysfs.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2025, The Linux Foundation. All rights reserved. */ + +#include <drm/drm_file.h> +#include <drm/drm_managed.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> + +#include "qaic.h" + +#define NAME_LEN 14 + +struct dbc_attribute { + struct device_attribute dev_attr; + u32 dbc_id; + char name[NAME_LEN]; +}; + +static ssize_t dbc_state_show(struct device *dev, struct device_attribute *a, char *buf) +{ + struct dbc_attribute *dbc_attr = container_of(a, struct dbc_attribute, dev_attr); + struct drm_minor *minor = dev_get_drvdata(dev); + struct qaic_device *qdev; + + qdev = to_qaic_device(minor->dev); + return sysfs_emit(buf, "%d\n", qdev->dbc[dbc_attr->dbc_id].state); +} + +void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state) +{ + struct device *kdev = to_accel_kdev(qdev->qddev); + char *envp[3] = {}; + char state_str[16]; + char id_str[12]; + + envp[0] = id_str; + envp[1] = state_str; + + if (state >= DBC_STATE_MAX) + return; + if (dbc_id >= qdev->num_dbc) + return; + if (state == qdev->dbc[dbc_id].state) + return; + + scnprintf(id_str, ARRAY_SIZE(id_str), "DBC_ID=%d", dbc_id); + scnprintf(state_str, ARRAY_SIZE(state_str), "DBC_STATE=%d", state); + + qdev->dbc[dbc_id].state = state; + kobject_uevent_env(&kdev->kobj, KOBJ_CHANGE, envp); +} + +int qaic_sysfs_init(struct qaic_drm_device *qddev) +{ + struct device *kdev = to_accel_kdev(qddev); + struct drm_device *drm = to_drm(qddev); + u32 num_dbc = qddev->qdev->num_dbc; + struct dbc_attribute *dbc_attrs; + int i, ret; + + dbc_attrs = drmm_kcalloc(drm, num_dbc, sizeof(*dbc_attrs), GFP_KERNEL); + if (!dbc_attrs) + return -ENOMEM; + + for (i = 0; i < num_dbc; ++i) { + struct dbc_attribute *dbc_attr = &dbc_attrs[i]; + + sysfs_attr_init(&dbc_attr->dev_attr.attr); + dbc_attr->dbc_id = i; + scnprintf(dbc_attr->name, NAME_LEN, "dbc%d_state", i); + dbc_attr->dev_attr.attr.name = dbc_attr->name; + dbc_attr->dev_attr.attr.mode = 0444; + dbc_attr->dev_attr.show = dbc_state_show; + ret = sysfs_create_file(&kdev->kobj, &dbc_attr->dev_attr.attr); + if (ret) { + int j; + + for (j = 0; j < i; ++j) { + dbc_attr = &dbc_attrs[j]; + sysfs_remove_file(&kdev->kobj, &dbc_attr->dev_attr.attr); + } + drmm_kfree(drm, dbc_attrs); + return ret; + } + } + + qddev->sysfs_attrs = dbc_attrs; + return 0; +} + +void qaic_sysfs_remove(struct qaic_drm_device *qddev) +{ + struct dbc_attribute *dbc_attrs = qddev->sysfs_attrs; + struct device *kdev = to_accel_kdev(qddev); + u32 num_dbc = qddev->qdev->num_dbc; + int i; + + if (!dbc_attrs) + return; + + qddev->sysfs_attrs = NULL; + for (i = 0; i < num_dbc; ++i) + sysfs_remove_file(&kdev->kobj, &dbc_attrs[i].dev_attr.attr); + drmm_kfree(to_drm(qddev), dbc_attrs); +} diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c new file mode 100644 index 000000000000..8af2475f4f36 --- /dev/null +++ b/drivers/accel/qaic/qaic_timesync.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/mhi.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/time64.h> +#include <linux/timer.h> + +#include "qaic.h" +#include "qaic_timesync.h" + +#define QTIMER_REG_OFFSET 0xa28 +#define QAIC_TIMESYNC_SIGNATURE 0x55aa +#define QAIC_CONV_QTIMER_TO_US(qtimer) (mul_u64_u32_div(qtimer, 10, 192)) + +static unsigned int timesync_delay_ms = 1000; /* 1 sec default */ +module_param(timesync_delay_ms, uint, 0600); +MODULE_PARM_DESC(timesync_delay_ms, "Delay in ms between two consecutive timesync operations"); + +enum qts_msg_type { + QAIC_TS_CMD_TO_HOST, + QAIC_TS_SYNC_REQ, + QAIC_TS_ACK_TO_HOST, + QAIC_TS_MSG_TYPE_MAX +}; + +/** + * struct qts_hdr - Timesync message header structure. + * @signature: Unique signature to identify the timesync message. + * @reserved_1: Reserved for future use. + * @reserved_2: Reserved for future use. + * @msg_type: sub-type of the timesync message. + * @reserved_3: Reserved for future use. + */ +struct qts_hdr { + __le16 signature; + __le16 reserved_1; + u8 reserved_2; + u8 msg_type; + __le16 reserved_3; +} __packed; + +/** + * struct qts_timeval - Structure to carry time information. + * @tv_sec: Seconds part of the time. + * @tv_usec: uS (microseconds) part of the time. + */ +struct qts_timeval { + __le64 tv_sec; + __le64 tv_usec; +} __packed; + +/** + * struct qts_host_time_sync_msg_data - Structure to denote the timesync message. + * @header: Header of the timesync message. + * @data: Time information. + */ +struct qts_host_time_sync_msg_data { + struct qts_hdr header; + struct qts_timeval data; +} __packed; + +/** + * struct mqts_dev - MHI QAIC Timesync Control device. + * @qdev: Pointer to the root device struct driven by QAIC driver. + * @mhi_dev: Pointer to associated MHI device. + * @timer: Timer handle used for timesync. + * @qtimer_addr: Device QTimer register pointer. + * @buff_in_use: atomic variable to track if the sync_msg buffer is in use. + * @dev: Device pointer to qdev->pdev->dev stored for easy access. + * @sync_msg: Buffer used to send timesync message over MHI. + */ +struct mqts_dev { + struct qaic_device *qdev; + struct mhi_device *mhi_dev; + struct timer_list timer; + void __iomem *qtimer_addr; + atomic_t buff_in_use; + struct device *dev; + struct qts_host_time_sync_msg_data *sync_msg; +}; + +struct qts_resp_msg { + struct qts_hdr hdr; +} __packed; + +struct qts_resp { + struct qts_resp_msg data; + struct work_struct work; + struct qaic_device *qdev; +}; + +#ifdef readq +static u64 read_qtimer(const volatile void __iomem *addr) +{ + return readq(addr); +} +#else +static u64 read_qtimer(const volatile void __iomem *addr) +{ + u64 low, high; + + low = readl(addr); + high = readl(addr + sizeof(u32)); + return low | (high << 32); +} +#endif + +static void qaic_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + + dev_dbg(mqtsdev->dev, "%s status: %d xfer_len: %zu\n", __func__, + mhi_result->transaction_status, mhi_result->bytes_xferd); + + atomic_set(&mqtsdev->buff_in_use, 0); +} + +static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + + dev_err(mqtsdev->dev, "%s no data expected on dl channel\n", __func__); +} + +static void qaic_timesync_timer(struct timer_list *t) +{ + struct mqts_dev *mqtsdev = timer_container_of(mqtsdev, t, timer); + struct qts_host_time_sync_msg_data *sync_msg; + u64 device_qtimer_us; + u64 device_qtimer; + u64 host_time_us; + u64 offset_us; + u64 host_sec; + int ret; + + if (atomic_read(&mqtsdev->buff_in_use)) { + dev_dbg(mqtsdev->dev, "%s buffer not free, schedule next cycle\n", __func__); + goto mod_timer; + } + atomic_set(&mqtsdev->buff_in_use, 1); + + sync_msg = mqtsdev->sync_msg; + sync_msg->header.signature = cpu_to_le16(QAIC_TIMESYNC_SIGNATURE); + sync_msg->header.msg_type = QAIC_TS_SYNC_REQ; + /* Read host UTC time and convert to uS*/ + host_time_us = div_u64(ktime_get_real_ns(), NSEC_PER_USEC); + device_qtimer = read_qtimer(mqtsdev->qtimer_addr); + device_qtimer_us = QAIC_CONV_QTIMER_TO_US(device_qtimer); + /* Offset between host UTC and device time */ + offset_us = host_time_us - device_qtimer_us; + + host_sec = div_u64(offset_us, USEC_PER_SEC); + sync_msg->data.tv_usec = cpu_to_le64(offset_us - host_sec * USEC_PER_SEC); + sync_msg->data.tv_sec = cpu_to_le64(host_sec); + ret = mhi_queue_buf(mqtsdev->mhi_dev, DMA_TO_DEVICE, sync_msg, sizeof(*sync_msg), MHI_EOT); + if (ret && (ret != -EAGAIN)) { + dev_err(mqtsdev->dev, "%s unable to queue to mhi:%d\n", __func__, ret); + return; + } else if (ret == -EAGAIN) { + atomic_set(&mqtsdev->buff_in_use, 0); + } + +mod_timer: + ret = mod_timer(t, jiffies + msecs_to_jiffies(timesync_delay_ms)); + if (ret) + dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret); +} + +void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev) +{ + struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + + timer_delete_sync(&mqtsdev->timer); +} + +static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct mqts_dev *mqtsdev; + struct timer_list *timer; + int ret; + + mqtsdev = kzalloc(sizeof(*mqtsdev), GFP_KERNEL); + if (!mqtsdev) { + ret = -ENOMEM; + goto out; + } + + timer = &mqtsdev->timer; + mqtsdev->mhi_dev = mhi_dev; + mqtsdev->qdev = qdev; + mqtsdev->dev = &qdev->pdev->dev; + + mqtsdev->sync_msg = kzalloc(sizeof(*mqtsdev->sync_msg), GFP_KERNEL); + if (!mqtsdev->sync_msg) { + ret = -ENOMEM; + goto free_mqts_dev; + } + atomic_set(&mqtsdev->buff_in_use, 0); + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + goto free_sync_msg; + + /* Qtimer register pointer */ + mqtsdev->qtimer_addr = qdev->bar_mhi + QTIMER_REG_OFFSET; + timer_setup(timer, qaic_timesync_timer, 0); + timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms); + add_timer(timer); + dev_set_drvdata(&mhi_dev->dev, mqtsdev); + qdev->mqts_ch = mhi_dev; + + return 0; + +free_sync_msg: + kfree(mqtsdev->sync_msg); +free_mqts_dev: + kfree(mqtsdev); +out: + return ret; +}; + +static void qaic_timesync_remove(struct mhi_device *mhi_dev) +{ + struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + + mqtsdev->qdev->mqts_ch = NULL; + timer_delete_sync(&mqtsdev->timer); + mhi_unprepare_from_transfer(mqtsdev->mhi_dev); + kfree(mqtsdev->sync_msg); + kfree(mqtsdev); +} + +static const struct mhi_device_id qaic_timesync_match_table[] = { + { .chan = "QAIC_TIMESYNC_PERIODIC"}, + {}, +}; + +MODULE_DEVICE_TABLE(mhi, qaic_timesync_match_table); + +static struct mhi_driver qaic_timesync_driver = { + .id_table = qaic_timesync_match_table, + .remove = qaic_timesync_remove, + .probe = qaic_timesync_probe, + .ul_xfer_cb = qaic_timesync_ul_xfer_cb, + .dl_xfer_cb = qaic_timesync_dl_xfer_cb, + .driver = { + .name = "qaic_timesync_periodic", + }, +}; + +static void qaic_boot_timesync_worker(struct work_struct *work) +{ + struct qts_resp *resp = container_of(work, struct qts_resp, work); + struct qts_host_time_sync_msg_data *req; + struct qts_resp_msg data = resp->data; + struct qaic_device *qdev = resp->qdev; + struct mhi_device *mhi_dev; + struct timespec64 ts; + int ret; + + mhi_dev = qdev->qts_ch; + /* Queue the response message beforehand to avoid race conditions */ + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT); + if (ret) { + kfree(resp); + dev_warn(&mhi_dev->dev, "Failed to re-queue response buffer %d\n", ret); + return; + } + + switch (data.hdr.msg_type) { + case QAIC_TS_CMD_TO_HOST: + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + break; + + req->header = data.hdr; + req->header.msg_type = QAIC_TS_SYNC_REQ; + ktime_get_real_ts64(&ts); + req->data.tv_sec = cpu_to_le64(ts.tv_sec); + req->data.tv_usec = cpu_to_le64(div_u64(ts.tv_nsec, NSEC_PER_USEC)); + + ret = mhi_queue_buf(mhi_dev, DMA_TO_DEVICE, req, sizeof(*req), MHI_EOT); + if (ret) { + kfree(req); + dev_dbg(&mhi_dev->dev, "Failed to send request message. Error %d\n", ret); + } + break; + case QAIC_TS_ACK_TO_HOST: + dev_dbg(&mhi_dev->dev, "ACK received from device\n"); + break; + default: + dev_err(&mhi_dev->dev, "Invalid message type %u.\n", data.hdr.msg_type); + } +} + +static int qaic_boot_timesync_queue_resp(struct mhi_device *mhi_dev, struct qaic_device *qdev) +{ + struct qts_resp *resp; + int ret; + + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) + return -ENOMEM; + + resp->qdev = qdev; + INIT_WORK(&resp->work, qaic_boot_timesync_worker); + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT); + if (ret) { + kfree(resp); + dev_warn(&mhi_dev->dev, "Failed to queue response buffer %d\n", ret); + return ret; + } + + return 0; +} + +static void qaic_boot_timesync_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + mhi_unprepare_from_transfer(qdev->qts_ch); + qdev->qts_ch = NULL; +} + +static int qaic_boot_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + int ret; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + qdev->qts_ch = mhi_dev; + dev_set_drvdata(&mhi_dev->dev, qdev); + + ret = qaic_boot_timesync_queue_resp(mhi_dev, qdev); + if (ret) { + dev_set_drvdata(&mhi_dev->dev, NULL); + qdev->qts_ch = NULL; + mhi_unprepare_from_transfer(mhi_dev); + } + + return ret; +} + +static void qaic_boot_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + kfree(mhi_result->buf_addr); +} + +static void qaic_boot_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qts_resp *resp = container_of(mhi_result->buf_addr, struct qts_resp, data); + + if (mhi_result->transaction_status || mhi_result->bytes_xferd != sizeof(resp->data)) { + kfree(resp); + return; + } + + queue_work(resp->qdev->qts_wq, &resp->work); +} + +static const struct mhi_device_id qaic_boot_timesync_match_table[] = { + { .chan = "QAIC_TIMESYNC"}, + {}, +}; + +static struct mhi_driver qaic_boot_timesync_driver = { + .id_table = qaic_boot_timesync_match_table, + .remove = qaic_boot_timesync_remove, + .probe = qaic_boot_timesync_probe, + .ul_xfer_cb = qaic_boot_timesync_ul_xfer_cb, + .dl_xfer_cb = qaic_boot_timesync_dl_xfer_cb, + .driver = { + .name = "qaic_timesync", + }, +}; + +int qaic_timesync_init(void) +{ + int ret; + + ret = mhi_driver_register(&qaic_timesync_driver); + if (ret) + return ret; + ret = mhi_driver_register(&qaic_boot_timesync_driver); + + return ret; +} + +void qaic_timesync_deinit(void) +{ + mhi_driver_unregister(&qaic_boot_timesync_driver); + mhi_driver_unregister(&qaic_timesync_driver); +} diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h new file mode 100644 index 000000000000..77b9c2b55057 --- /dev/null +++ b/drivers/accel/qaic/qaic_timesync.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __QAIC_TIMESYNC_H__ +#define __QAIC_TIMESYNC_H__ + +#include <linux/mhi.h> + +int qaic_timesync_init(void); +void qaic_timesync_deinit(void); +void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev); +#endif /* __QAIC_TIMESYNC_H__ */ diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c new file mode 100644 index 000000000000..fd3c3b2d1fd3 --- /dev/null +++ b/drivers/accel/qaic/sahara.c @@ -0,0 +1,935 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> +#include <linux/limits.h> +#include <linux/mhi.h> +#include <linux/minmax.h> +#include <linux/mod_devicetable.h> +#include <linux/overflow.h> +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <linux/workqueue.h> + +#include "sahara.h" + +#define SAHARA_HELLO_CMD 0x1 /* Min protocol version 1.0 */ +#define SAHARA_HELLO_RESP_CMD 0x2 /* Min protocol version 1.0 */ +#define SAHARA_READ_DATA_CMD 0x3 /* Min protocol version 1.0 */ +#define SAHARA_END_OF_IMAGE_CMD 0x4 /* Min protocol version 1.0 */ +#define SAHARA_DONE_CMD 0x5 /* Min protocol version 1.0 */ +#define SAHARA_DONE_RESP_CMD 0x6 /* Min protocol version 1.0 */ +#define SAHARA_RESET_CMD 0x7 /* Min protocol version 1.0 */ +#define SAHARA_RESET_RESP_CMD 0x8 /* Min protocol version 1.0 */ +#define SAHARA_MEM_DEBUG_CMD 0x9 /* Min protocol version 2.0 */ +#define SAHARA_MEM_READ_CMD 0xa /* Min protocol version 2.0 */ +#define SAHARA_CMD_READY_CMD 0xb /* Min protocol version 2.1 */ +#define SAHARA_SWITCH_MODE_CMD 0xc /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_CMD 0xd /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_RESP_CMD 0xe /* Min protocol version 2.1 */ +#define SAHARA_EXECUTE_DATA_CMD 0xf /* Min protocol version 2.1 */ +#define SAHARA_MEM_DEBUG64_CMD 0x10 /* Min protocol version 2.5 */ +#define SAHARA_MEM_READ64_CMD 0x11 /* Min protocol version 2.5 */ +#define SAHARA_READ_DATA64_CMD 0x12 /* Min protocol version 2.8 */ +#define SAHARA_RESET_STATE_CMD 0x13 /* Min protocol version 2.9 */ +#define SAHARA_WRITE_DATA_CMD 0x14 /* Min protocol version 3.0 */ + +#define SAHARA_PACKET_MAX_SIZE 0xffffU /* MHI_MAX_MTU */ +#define SAHARA_TRANSFER_MAX_SIZE 0x80000 +#define SAHARA_READ_MAX_SIZE 0xfff0U /* Avoid unaligned requests */ +#define SAHARA_NUM_TX_BUF DIV_ROUND_UP(SAHARA_TRANSFER_MAX_SIZE,\ + SAHARA_PACKET_MAX_SIZE) +#define SAHARA_IMAGE_ID_NONE U32_MAX + +#define SAHARA_VERSION 2 +#define SAHARA_SUCCESS 0 +#define SAHARA_TABLE_ENTRY_STR_LEN 20 + +#define SAHARA_MODE_IMAGE_TX_PENDING 0x0 +#define SAHARA_MODE_IMAGE_TX_COMPLETE 0x1 +#define SAHARA_MODE_MEMORY_DEBUG 0x2 +#define SAHARA_MODE_COMMAND 0x3 + +#define SAHARA_HELLO_LENGTH 0x30 +#define SAHARA_READ_DATA_LENGTH 0x14 +#define SAHARA_END_OF_IMAGE_LENGTH 0x10 +#define SAHARA_DONE_LENGTH 0x8 +#define SAHARA_RESET_LENGTH 0x8 +#define SAHARA_MEM_DEBUG64_LENGTH 0x18 +#define SAHARA_MEM_READ64_LENGTH 0x18 + +struct sahara_packet { + __le32 cmd; + __le32 length; + + union { + struct { + __le32 version; + __le32 version_compat; + __le32 max_length; + __le32 mode; + } hello; + struct { + __le32 version; + __le32 version_compat; + __le32 status; + __le32 mode; + } hello_resp; + struct { + __le32 image; + __le32 offset; + __le32 length; + } read_data; + struct { + __le32 image; + __le32 status; + } end_of_image; + struct { + __le64 table_address; + __le64 table_length; + } memory_debug64; + struct { + __le64 memory_address; + __le64 memory_length; + } memory_read64; + }; +}; + +struct sahara_debug_table_entry64 { + __le64 type; + __le64 address; + __le64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +struct sahara_dump_table_entry { + u64 type; + u64 address; + u64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +#define SAHARA_DUMP_V1_MAGIC 0x1234567890abcdef +#define SAHARA_DUMP_V1_VER 1 +struct sahara_memory_dump_meta_v1 { + u64 magic; + u64 version; + u64 dump_size; + u64 table_size; +}; + +/* + * Layout of crashdump provided to user via devcoredump + * +------------------------------------------+ + * | Crashdump Meta structure | + * | type: struct sahara_memory_dump_meta_v1 | + * +------------------------------------------+ + * | Crashdump Table | + * | type: array of struct | + * | sahara_dump_table_entry | + * | | + * | | + * +------------------------------------------+ + * | Crashdump | + * | | + * | | + * | | + * | | + * | | + * +------------------------------------------+ + * + * First is the metadata header. Userspace can use the magic number to verify + * the content type, and then check the version for the rest of the format. + * New versions should keep the magic number location/value, and version + * location, but increment the version value. + * + * For v1, the metadata lists the size of the entire dump (header + table + + * dump) and the size of the table. Then the dump image table, which describes + * the contents of the dump. Finally all the images are listed in order, with + * no deadspace in between. Userspace can use the sizes listed in the image + * table to reconstruct the individual images. + */ + +struct sahara_context { + struct sahara_packet *tx[SAHARA_NUM_TX_BUF]; + struct sahara_packet *rx; + struct work_struct fw_work; + struct work_struct dump_work; + struct work_struct read_data_work; + struct mhi_device *mhi_dev; + const char * const *image_table; + u32 table_size; + u32 active_image_id; + const struct firmware *firmware; + u64 dump_table_address; + u64 dump_table_length; + size_t rx_size; + size_t rx_size_requested; + void *mem_dump; + size_t mem_dump_sz; + struct sahara_dump_table_entry *dump_image; + u64 dump_image_offset; + void *mem_dump_freespace; + u64 dump_images_left; + u32 read_data_offset; + u32 read_data_length; + bool is_mem_dump_mode; + bool non_streaming; +}; + +static const char * const aic100_image_table[] = { + [1] = "qcom/aic100/fw1.bin", + [2] = "qcom/aic100/fw2.bin", + [4] = "qcom/aic100/fw4.bin", + [5] = "qcom/aic100/fw5.bin", + [6] = "qcom/aic100/fw6.bin", + [8] = "qcom/aic100/fw8.bin", + [9] = "qcom/aic100/fw9.bin", + [10] = "qcom/aic100/fw10.bin", +}; + +static const char * const aic200_image_table[] = { + [5] = "qcom/aic200/uefi.elf", + [12] = "qcom/aic200/aic200-nsp.bin", + [23] = "qcom/aic200/aop.mbn", + [32] = "qcom/aic200/tz.mbn", + [33] = "qcom/aic200/hypvm.mbn", + [38] = "qcom/aic200/xbl_config.elf", + [39] = "qcom/aic200/aic200_abl.elf", + [40] = "qcom/aic200/apdp.mbn", + [41] = "qcom/aic200/devcfg.mbn", + [42] = "qcom/aic200/sec.elf", + [43] = "qcom/aic200/aic200-hlos.elf", + [49] = "qcom/aic200/shrm.elf", + [50] = "qcom/aic200/cpucp.elf", + [51] = "qcom/aic200/aop_devcfg.mbn", + [54] = "qcom/aic200/qupv3fw.elf", + [57] = "qcom/aic200/cpucp_dtbs.elf", + [62] = "qcom/aic200/uefi_dtbs.elf", + [63] = "qcom/aic200/xbl_ac_config.mbn", + [64] = "qcom/aic200/tz_ac_config.mbn", + [65] = "qcom/aic200/hyp_ac_config.mbn", + [66] = "qcom/aic200/pdp.elf", + [67] = "qcom/aic200/pdp_cdb.elf", + [68] = "qcom/aic200/sdi.mbn", + [69] = "qcom/aic200/dcd.mbn", + [73] = "qcom/aic200/gearvm.mbn", + [74] = "qcom/aic200/sti.bin", + [76] = "qcom/aic200/tz_qti_config.mbn", + [78] = "qcom/aic200/pvs.bin", +}; + +static bool is_streaming(struct sahara_context *context) +{ + return !context->non_streaming; +} + +static int sahara_find_image(struct sahara_context *context, u32 image_id) +{ + int ret; + + if (image_id == context->active_image_id) + return 0; + + if (context->active_image_id != SAHARA_IMAGE_ID_NONE) { + dev_err(&context->mhi_dev->dev, "image id %d is not valid as %d is active\n", + image_id, context->active_image_id); + return -EINVAL; + } + + if (image_id >= context->table_size || !context->image_table[image_id]) { + dev_err(&context->mhi_dev->dev, "request for unknown image: %d\n", image_id); + return -EINVAL; + } + + /* + * This image might be optional. The device may continue without it. + * Only the device knows. Suppress error messages that could suggest an + * a problem when we were actually able to continue. + */ + ret = firmware_request_nowarn(&context->firmware, + context->image_table[image_id], + &context->mhi_dev->dev); + if (ret) { + dev_dbg(&context->mhi_dev->dev, "request for image id %d / file %s failed %d\n", + image_id, context->image_table[image_id], ret); + return ret; + } + + context->active_image_id = image_id; + + return 0; +} + +static void sahara_release_image(struct sahara_context *context) +{ + if (context->active_image_id != SAHARA_IMAGE_ID_NONE) + release_firmware(context->firmware); + context->active_image_id = SAHARA_IMAGE_ID_NONE; +} + +static void sahara_send_reset(struct sahara_context *context) +{ + int ret; + + context->is_mem_dump_mode = false; + context->read_data_offset = 0; + context->read_data_length = 0; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH); + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_RESET_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send reset response %d\n", ret); +} + +static void sahara_hello(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "HELLO cmd received. length:%d version:%d version_compat:%d max_length:%d mode:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->hello.version), + le32_to_cpu(context->rx->hello.version_compat), + le32_to_cpu(context->rx->hello.max_length), + le32_to_cpu(context->rx->hello.mode)); + + if (le32_to_cpu(context->rx->length) != SAHARA_HELLO_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed hello packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + if (le32_to_cpu(context->rx->hello.version) != SAHARA_VERSION) { + dev_err(&context->mhi_dev->dev, "Unsupported hello packet - version %d\n", + le32_to_cpu(context->rx->hello.version)); + return; + } + + if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING && + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE && + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) { + dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n", + le32_to_cpu(context->rx->hello.mode)); + return; + } + + context->tx[0]->cmd = cpu_to_le32(SAHARA_HELLO_RESP_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_HELLO_LENGTH); + context->tx[0]->hello_resp.version = cpu_to_le32(SAHARA_VERSION); + context->tx[0]->hello_resp.version_compat = cpu_to_le32(SAHARA_VERSION); + context->tx[0]->hello_resp.status = cpu_to_le32(SAHARA_SUCCESS); + context->tx[0]->hello_resp.mode = context->rx->hello_resp.mode; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_HELLO_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret); +} + +static int read_data_helper(struct sahara_context *context, int buf_index) +{ + enum mhi_flags mhi_flag; + u32 pkt_data_len; + int ret; + + pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE); + + memcpy(context->tx[buf_index], + &context->firmware->data[context->read_data_offset], + pkt_data_len); + + context->read_data_offset += pkt_data_len; + context->read_data_length -= pkt_data_len; + + if (is_streaming(context) || !context->read_data_length) + mhi_flag = MHI_EOT; + else + mhi_flag = MHI_CHAIN; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, + context->tx[buf_index], pkt_data_len, mhi_flag); + if (ret) { + dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret); + return ret; + } + + return 0; +} + +static void sahara_read_data(struct sahara_context *context) +{ + u32 image_id, data_offset, data_len; + int ret; + int i; + + dev_dbg(&context->mhi_dev->dev, + "READ_DATA cmd received. length:%d image:%d offset:%d data_length:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->read_data.image), + le32_to_cpu(context->rx->read_data.offset), + le32_to_cpu(context->rx->read_data.length)); + + if (le32_to_cpu(context->rx->length) != SAHARA_READ_DATA_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + image_id = le32_to_cpu(context->rx->read_data.image); + data_offset = le32_to_cpu(context->rx->read_data.offset); + data_len = le32_to_cpu(context->rx->read_data.length); + + ret = sahara_find_image(context, image_id); + if (ret) { + sahara_send_reset(context); + return; + } + + /* + * Image is released when the device is done with it via + * SAHARA_END_OF_IMAGE_CMD. sahara_send_reset() will either cause the + * device to retry the operation with a modification, or decide to be + * done with the image and trigger SAHARA_END_OF_IMAGE_CMD. + * release_image() is called from SAHARA_END_OF_IMAGE_CMD. processing + * and is not needed here on error. + */ + + if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n", + data_len, SAHARA_TRANSFER_MAX_SIZE); + sahara_send_reset(context); + return; + } + + if (data_offset >= context->firmware->size) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data offset %d exceeds file size %zu\n", + data_offset, context->firmware->size); + sahara_send_reset(context); + return; + } + + if (size_add(data_offset, data_len) > context->firmware->size) { + dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data offset %d and length %d exceeds file size %zu\n", + data_offset, data_len, context->firmware->size); + sahara_send_reset(context); + return; + } + + context->read_data_offset = data_offset; + context->read_data_length = data_len; + + if (is_streaming(context)) { + schedule_work(&context->read_data_work); + return; + } + + for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) { + ret = read_data_helper(context, i); + if (ret) + break; + } +} + +static void sahara_end_of_image(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "END_OF_IMAGE cmd received. length:%d image:%d status:%d\n", + le32_to_cpu(context->rx->length), + le32_to_cpu(context->rx->end_of_image.image), + le32_to_cpu(context->rx->end_of_image.status)); + + if (le32_to_cpu(context->rx->length) != SAHARA_END_OF_IMAGE_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed end_of_image packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + if (context->active_image_id != SAHARA_IMAGE_ID_NONE && + le32_to_cpu(context->rx->end_of_image.image) != context->active_image_id) { + dev_err(&context->mhi_dev->dev, "Malformed end_of_image packet - image %d is not the active image\n", + le32_to_cpu(context->rx->end_of_image.image)); + return; + } + + sahara_release_image(context); + + if (le32_to_cpu(context->rx->end_of_image.status)) + return; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_DONE_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_DONE_LENGTH); + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_DONE_LENGTH, MHI_EOT); + if (ret) + dev_dbg(&context->mhi_dev->dev, "Unable to send done response %d\n", ret); +} + +static void sahara_memory_debug64(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "MEMORY DEBUG64 cmd received. length:%d table_address:%#llx table_length:%#llx\n", + le32_to_cpu(context->rx->length), + le64_to_cpu(context->rx->memory_debug64.table_address), + le64_to_cpu(context->rx->memory_debug64.table_length)); + + if (le32_to_cpu(context->rx->length) != SAHARA_MEM_DEBUG64_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + context->dump_table_address = le64_to_cpu(context->rx->memory_debug64.table_address); + context->dump_table_length = le64_to_cpu(context->rx->memory_debug64.table_length); + + if (context->dump_table_length % sizeof(struct sahara_debug_table_entry64) != 0 || + !context->dump_table_length) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - table length %lld\n", + context->dump_table_length); + return; + } + + /* + * From this point, the protocol flips. We make memory_read requests to + * the device, and the device responds with the raw data. If the device + * has an error, it will send an End of Image command. First we need to + * request the memory dump table so that we know where all the pieces + * of the dump are that we can consume. + */ + + context->is_mem_dump_mode = true; + + /* + * Assume that the table is smaller than our MTU so that we can read it + * in one shot. The spec does not put an upper limit on the table, but + * no known device will exceed this. + */ + if (context->dump_table_length > SAHARA_PACKET_MAX_SIZE) { + dev_err(&context->mhi_dev->dev, "Memory dump table length %lld exceeds supported size. Discarding dump\n", + context->dump_table_length); + sahara_send_reset(context); + return; + } + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_table_address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(context->dump_table_length); + + context->rx_size_requested = context->dump_table_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump table %d\n", ret); +} + +static void sahara_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, fw_work); + int ret; + + switch (le32_to_cpu(context->rx->cmd)) { + case SAHARA_HELLO_CMD: + sahara_hello(context); + break; + case SAHARA_READ_DATA_CMD: + sahara_read_data(context); + break; + case SAHARA_END_OF_IMAGE_CMD: + sahara_end_of_image(context); + break; + case SAHARA_DONE_RESP_CMD: + /* Intentional do nothing as we don't need to exit an app */ + break; + case SAHARA_RESET_RESP_CMD: + /* Intentional do nothing as we don't need to exit an app */ + break; + case SAHARA_MEM_DEBUG64_CMD: + sahara_memory_debug64(context); + break; + default: + dev_err(&context->mhi_dev->dev, "Unknown command %d\n", + le32_to_cpu(context->rx->cmd)); + break; + } + + ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx, + SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); +} + +static void sahara_parse_dump_table(struct sahara_context *context) +{ + struct sahara_dump_table_entry *image_out_table; + struct sahara_debug_table_entry64 *dev_table; + struct sahara_memory_dump_meta_v1 *dump_meta; + u64 table_nents; + u64 dump_length; + u64 mul_bytes; + int ret; + u64 i; + + table_nents = context->dump_table_length / sizeof(*dev_table); + context->dump_images_left = table_nents; + dump_length = 0; + + dev_table = (struct sahara_debug_table_entry64 *)(context->rx); + for (i = 0; i < table_nents; ++i) { + /* Do not trust the device, ensure the strings are terminated */ + dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + + if (check_add_overflow(dump_length, + le64_to_cpu(dev_table[i].length), + &dump_length)) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + dev_dbg(&context->mhi_dev->dev, + "Memory dump table entry %lld type: %lld address: %#llx length: %#llx description: \"%s\" filename \"%s\"\n", + i, + le64_to_cpu(dev_table[i].type), + le64_to_cpu(dev_table[i].address), + le64_to_cpu(dev_table[i].length), + dev_table[i].description, + dev_table[i].filename); + } + + if (check_add_overflow(dump_length, (u64)sizeof(*dump_meta), &dump_length)) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + if (check_mul_overflow((u64)sizeof(*image_out_table), table_nents, &mul_bytes)) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + if (check_add_overflow(dump_length, mul_bytes, &dump_length)) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + context->mem_dump_sz = dump_length; + context->mem_dump = vzalloc(dump_length); + if (!context->mem_dump) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + /* Populate the dump metadata and table for userspace */ + dump_meta = context->mem_dump; + dump_meta->magic = SAHARA_DUMP_V1_MAGIC; + dump_meta->version = SAHARA_DUMP_V1_VER; + dump_meta->dump_size = dump_length; + dump_meta->table_size = context->dump_table_length; + + image_out_table = context->mem_dump + sizeof(*dump_meta); + for (i = 0; i < table_nents; ++i) { + image_out_table[i].type = le64_to_cpu(dev_table[i].type); + image_out_table[i].address = le64_to_cpu(dev_table[i].address); + image_out_table[i].length = le64_to_cpu(dev_table[i].length); + strscpy(image_out_table[i].description, dev_table[i].description, + SAHARA_TABLE_ENTRY_STR_LEN); + strscpy(image_out_table[i].filename, + dev_table[i].filename, + SAHARA_TABLE_ENTRY_STR_LEN); + } + + context->mem_dump_freespace = &image_out_table[i]; + + /* Done parsing the table, switch to image dump mode */ + context->dump_table_length = 0; + + /* Request the first chunk of the first image */ + context->dump_image = &image_out_table[0]; + dump_length = min_t(u64, context->dump_image->length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->dump_image_offset = dump_length; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_image->address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump content %d\n", ret); +} + +static void sahara_parse_dump_image(struct sahara_context *context) +{ + u64 dump_length; + int ret; + + memcpy(context->mem_dump_freespace, context->rx, context->rx_size); + context->mem_dump_freespace += context->rx_size; + + if (context->dump_image_offset >= context->dump_image->length) { + /* Need to move to next image */ + context->dump_image++; + context->dump_images_left--; + context->dump_image_offset = 0; + + if (!context->dump_images_left) { + /* Dump done */ + dev_coredumpv(context->mhi_dev->mhi_cntrl->cntrl_dev, + context->mem_dump, + context->mem_dump_sz, + GFP_KERNEL); + context->mem_dump = NULL; + sahara_send_reset(context); + return; + } + } + + /* Get next image chunk */ + dump_length = context->dump_image->length - context->dump_image_offset; + dump_length = min_t(u64, dump_length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = + cpu_to_le64(context->dump_image->address + context->dump_image_offset); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->dump_image_offset += dump_length; + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, + "Unable to send read for dump content %d\n", ret); +} + +static void sahara_dump_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, dump_work); + int ret; + + /* + * We should get the expected raw data, but if the device has an error + * it is supposed to send EOI with an error code. + */ + if (context->rx_size != context->rx_size_requested && + context->rx_size != SAHARA_END_OF_IMAGE_LENGTH) { + dev_err(&context->mhi_dev->dev, + "Unexpected response to read_data. Expected size: %#zx got: %#zx\n", + context->rx_size_requested, + context->rx_size); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) == SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Unexpected EOI response to read_data. Status: %d\n", + le32_to_cpu(context->rx->end_of_image.status)); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) != SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Invalid EOI response to read_data. CMD: %d\n", + le32_to_cpu(context->rx->cmd)); + goto error; + } + + /* + * Need to know if we received the dump table, or part of a dump image. + * Since we get raw data, we cannot tell from the data itself. Instead, + * we use the stored dump_table_length, which we zero after we read and + * process the entire table. + */ + if (context->dump_table_length) + sahara_parse_dump_table(context); + else + sahara_parse_dump_image(context); + + ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx, + SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); + + return; + +error: + vfree(context->mem_dump); + context->mem_dump = NULL; + sahara_send_reset(context); +} + +static void sahara_read_data_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, read_data_work); + + read_data_helper(context, 0); +} + +static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct sahara_context *context; + int ret; + int i; + + context = devm_kzalloc(&mhi_dev->dev, sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->rx = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL); + if (!context->rx) + return -ENOMEM; + + if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) { + context->image_table = aic200_image_table; + context->table_size = ARRAY_SIZE(aic200_image_table); + } else { + context->image_table = aic100_image_table; + context->table_size = ARRAY_SIZE(aic100_image_table); + context->non_streaming = true; + } + + /* + * There are two firmware implementations for READ_DATA handling. + * The older "SBL" implementation defines a Sahara transfer size, and + * expects that the response is a single transport transfer. If the + * FW wants to transfer a file that is larger than the transfer size, + * the FW will issue multiple READ_DATA commands. For this + * implementation, we need to allocate enough buffers to contain the + * entire Sahara transfer size. + * + * The newer "XBL" implementation does not define a maximum transfer + * size and instead expects the data to be streamed over using the + * transport level MTU. The FW will issue a single READ_DATA command + * of whatever size, and consume multiple transport level transfers + * until the expected amount of data is consumed. For this + * implementation we only need a single buffer of the transport MTU + * but we'll need to be able to use it multiple times for a single + * READ_DATA request. + * + * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE + * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use + * MHI_CHAIN to link multiple buffers into a single transfer but the + * remote side will not consume the buffers until it sees an EOT, thus + * we need to allocate enough buffers to put in the tx fifo to cover an + * entire READ_DATA request of the max size. + * + * AIC200 is the XBL implementation, and so a single buffer will work. + */ + for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) { + context->tx[i] = devm_kzalloc(&mhi_dev->dev, + SAHARA_PACKET_MAX_SIZE, + GFP_KERNEL); + if (!context->tx[i]) + return -ENOMEM; + if (is_streaming(context)) + break; + } + + context->mhi_dev = mhi_dev; + INIT_WORK(&context->fw_work, sahara_processing); + INIT_WORK(&context->dump_work, sahara_dump_processing); + INIT_WORK(&context->read_data_work, sahara_read_data_processing); + + context->active_image_id = SAHARA_IMAGE_ID_NONE; + dev_set_drvdata(&mhi_dev->dev, context); + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, context->rx, SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) { + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + return 0; +} + +static void sahara_mhi_remove(struct mhi_device *mhi_dev) +{ + struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); + + cancel_work_sync(&context->fw_work); + cancel_work_sync(&context->dump_work); + vfree(context->mem_dump); + sahara_release_image(context); + mhi_unprepare_from_transfer(mhi_dev); +} + +static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); + + if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context)) + schedule_work(&context->read_data_work); +} + +static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); + + if (!mhi_result->transaction_status) { + context->rx_size = mhi_result->bytes_xferd; + if (context->is_mem_dump_mode) + schedule_work(&context->dump_work); + else + schedule_work(&context->fw_work); + } + +} + +static const struct mhi_device_id sahara_mhi_match_table[] = { + { .chan = "QAIC_SAHARA", }, + {}, +}; + +static struct mhi_driver sahara_mhi_driver = { + .id_table = sahara_mhi_match_table, + .remove = sahara_mhi_remove, + .probe = sahara_mhi_probe, + .ul_xfer_cb = sahara_mhi_ul_xfer_cb, + .dl_xfer_cb = sahara_mhi_dl_xfer_cb, + .driver = { + .name = "sahara", + }, +}; + +int sahara_register(void) +{ + return mhi_driver_register(&sahara_mhi_driver); +} + +void sahara_unregister(void) +{ + mhi_driver_unregister(&sahara_mhi_driver); +} diff --git a/drivers/accel/qaic/sahara.h b/drivers/accel/qaic/sahara.h new file mode 100644 index 000000000000..640208acc0d1 --- /dev/null +++ b/drivers/accel/qaic/sahara.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#ifndef __SAHARA_H__ +#define __SAHARA_H__ + +int sahara_register(void); +void sahara_unregister(void); +#endif /* __SAHARA_H__ */ diff --git a/drivers/accel/rocket/Kconfig b/drivers/accel/rocket/Kconfig new file mode 100644 index 000000000000..16465abe0660 --- /dev/null +++ b/drivers/accel/rocket/Kconfig @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_ROCKET + tristate "Rocket (support for Rockchip NPUs)" + depends on DRM_ACCEL + depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST + depends on ROCKCHIP_IOMMU || COMPILE_TEST + depends on MMU + select DRM_SCHED + select DRM_GEM_SHMEM_HELPER + help + Choose this option if you have a Rockchip SoC that contains a + compatible Neural Processing Unit (NPU), such as the RK3588. Called by + Rockchip either RKNN or RKNPU, it accelerates inference of neural + networks. + + The interface exposed to userspace is described in + include/uapi/drm/rocket_accel.h and is used by the Rocket userspace + driver in Mesa3D. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called rocket. diff --git a/drivers/accel/rocket/Makefile b/drivers/accel/rocket/Makefile new file mode 100644 index 000000000000..3713dfe223d6 --- /dev/null +++ b/drivers/accel/rocket/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_DRM_ACCEL_ROCKET) := rocket.o + +rocket-y := \ + rocket_core.o \ + rocket_device.o \ + rocket_drv.o \ + rocket_gem.o \ + rocket_job.o diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c new file mode 100644 index 000000000000..abe7719c1db4 --- /dev/null +++ b/drivers/accel/rocket/rocket_core.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dev_printk.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/iommu.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#include "rocket_core.h" +#include "rocket_job.h" + +int rocket_core_init(struct rocket_core *core) +{ + struct device *dev = core->dev; + struct platform_device *pdev = to_platform_device(dev); + u32 version; + int err = 0; + + core->resets[0].id = "srst_a"; + core->resets[1].id = "srst_h"; + err = devm_reset_control_bulk_get_exclusive(&pdev->dev, ARRAY_SIZE(core->resets), + core->resets); + if (err) + return dev_err_probe(dev, err, "failed to get resets for core %d\n", core->index); + + err = devm_clk_bulk_get(dev, ARRAY_SIZE(core->clks), core->clks); + if (err) + return dev_err_probe(dev, err, "failed to get clocks for core %d\n", core->index); + + core->pc_iomem = devm_platform_ioremap_resource_byname(pdev, "pc"); + if (IS_ERR(core->pc_iomem)) { + dev_err(dev, "couldn't find PC registers %ld\n", PTR_ERR(core->pc_iomem)); + return PTR_ERR(core->pc_iomem); + } + + core->cna_iomem = devm_platform_ioremap_resource_byname(pdev, "cna"); + if (IS_ERR(core->cna_iomem)) { + dev_err(dev, "couldn't find CNA registers %ld\n", PTR_ERR(core->cna_iomem)); + return PTR_ERR(core->cna_iomem); + } + + core->core_iomem = devm_platform_ioremap_resource_byname(pdev, "core"); + if (IS_ERR(core->core_iomem)) { + dev_err(dev, "couldn't find CORE registers %ld\n", PTR_ERR(core->core_iomem)); + return PTR_ERR(core->core_iomem); + } + + dma_set_max_seg_size(dev, UINT_MAX); + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (err) + return err; + + core->iommu_group = iommu_group_get(dev); + + err = rocket_job_init(core); + if (err) + return err; + + pm_runtime_use_autosuspend(dev); + + /* + * As this NPU will be most often used as part of a media pipeline that + * ends presenting in a display, choose 50 ms (~3 frames at 60Hz) as an + * autosuspend delay as that will keep the device powered up while the + * pipeline is running. + */ + pm_runtime_set_autosuspend_delay(dev, 50); + + pm_runtime_enable(dev); + + err = pm_runtime_resume_and_get(dev); + if (err) { + rocket_job_fini(core); + return err; + } + + version = rocket_pc_readl(core, VERSION); + version += rocket_pc_readl(core, VERSION_NUM) & 0xffff; + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + dev_info(dev, "Rockchip NPU core %d version: %d\n", core->index, version); + + return 0; +} + +void rocket_core_fini(struct rocket_core *core) +{ + pm_runtime_dont_use_autosuspend(core->dev); + pm_runtime_disable(core->dev); + iommu_group_put(core->iommu_group); + core->iommu_group = NULL; + rocket_job_fini(core); +} + +void rocket_core_reset(struct rocket_core *core) +{ + reset_control_bulk_assert(ARRAY_SIZE(core->resets), core->resets); + + udelay(10); + + reset_control_bulk_deassert(ARRAY_SIZE(core->resets), core->resets); +} diff --git a/drivers/accel/rocket/rocket_core.h b/drivers/accel/rocket/rocket_core.h new file mode 100644 index 000000000000..f6d7382854ca --- /dev/null +++ b/drivers/accel/rocket/rocket_core.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#ifndef __ROCKET_CORE_H__ +#define __ROCKET_CORE_H__ + +#include <drm/gpu_scheduler.h> +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/mutex_types.h> +#include <linux/reset.h> + +#include "rocket_registers.h" + +#define rocket_pc_readl(core, reg) \ + readl((core)->pc_iomem + (REG_PC_##reg)) +#define rocket_pc_writel(core, reg, value) \ + writel(value, (core)->pc_iomem + (REG_PC_##reg)) + +#define rocket_cna_readl(core, reg) \ + readl((core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS) +#define rocket_cna_writel(core, reg, value) \ + writel(value, (core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS) + +#define rocket_core_readl(core, reg) \ + readl((core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS) +#define rocket_core_writel(core, reg, value) \ + writel(value, (core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS) + +struct rocket_core { + struct device *dev; + struct rocket_device *rdev; + unsigned int index; + + int irq; + void __iomem *pc_iomem; + void __iomem *cna_iomem; + void __iomem *core_iomem; + struct clk_bulk_data clks[4]; + struct reset_control_bulk_data resets[2]; + + struct iommu_group *iommu_group; + + struct mutex job_lock; + struct rocket_job *in_flight_job; + + spinlock_t fence_lock; + + struct { + struct workqueue_struct *wq; + struct work_struct work; + atomic_t pending; + } reset; + + struct drm_gpu_scheduler sched; + u64 fence_context; + u64 emit_seqno; +}; + +int rocket_core_init(struct rocket_core *core); +void rocket_core_fini(struct rocket_core *core); +void rocket_core_reset(struct rocket_core *core); + +#endif diff --git a/drivers/accel/rocket/rocket_device.c b/drivers/accel/rocket/rocket_device.c new file mode 100644 index 000000000000..46e6ee1e72c5 --- /dev/null +++ b/drivers/accel/rocket/rocket_device.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#include <drm/drm_drv.h> +#include <linux/array_size.h> +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/platform_device.h> +#include <linux/of.h> + +#include "rocket_device.h" + +struct rocket_device *rocket_device_init(struct platform_device *pdev, + const struct drm_driver *rocket_drm_driver) +{ + struct device *dev = &pdev->dev; + struct device_node *core_node; + struct rocket_device *rdev; + struct drm_device *ddev; + unsigned int num_cores = 0; + int err; + + rdev = devm_drm_dev_alloc(dev, rocket_drm_driver, struct rocket_device, ddev); + if (IS_ERR(rdev)) + return rdev; + + ddev = &rdev->ddev; + dev_set_drvdata(dev, rdev); + + for_each_compatible_node(core_node, NULL, "rockchip,rk3588-rknn-core") + if (of_device_is_available(core_node)) + num_cores++; + + rdev->cores = devm_kcalloc(dev, num_cores, sizeof(*rdev->cores), GFP_KERNEL); + if (!rdev->cores) + return ERR_PTR(-ENOMEM); + + dma_set_max_seg_size(dev, UINT_MAX); + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (err) + return ERR_PTR(err); + + err = devm_mutex_init(dev, &rdev->sched_lock); + if (err) + return ERR_PTR(-ENOMEM); + + err = drm_dev_register(ddev, 0); + if (err) + return ERR_PTR(err); + + return rdev; +} + +void rocket_device_fini(struct rocket_device *rdev) +{ + WARN_ON(rdev->num_cores > 0); + + drm_dev_unregister(&rdev->ddev); +} diff --git a/drivers/accel/rocket/rocket_device.h b/drivers/accel/rocket/rocket_device.h new file mode 100644 index 000000000000..ce662abc01d3 --- /dev/null +++ b/drivers/accel/rocket/rocket_device.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#ifndef __ROCKET_DEVICE_H__ +#define __ROCKET_DEVICE_H__ + +#include <drm/drm_device.h> +#include <linux/clk.h> +#include <linux/container_of.h> +#include <linux/iommu.h> +#include <linux/platform_device.h> + +#include "rocket_core.h" + +struct rocket_device { + struct drm_device ddev; + + struct mutex sched_lock; + + struct rocket_core *cores; + unsigned int num_cores; +}; + +struct rocket_device *rocket_device_init(struct platform_device *pdev, + const struct drm_driver *rocket_drm_driver); +void rocket_device_fini(struct rocket_device *rdev); +#define to_rocket_device(drm_dev) \ + ((struct rocket_device *)(container_of((drm_dev), struct rocket_device, ddev))) + +#endif /* __ROCKET_DEVICE_H__ */ diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c new file mode 100644 index 000000000000..5c0b63f0a8f0 --- /dev/null +++ b/drivers/accel/rocket/rocket_drv.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> +#include <drm/rocket_accel.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/iommu.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include "rocket_drv.h" +#include "rocket_gem.h" +#include "rocket_job.h" + +/* + * Facade device, used to expose a single DRM device to userspace, that + * schedules jobs to any RKNN cores in the system. + */ +static struct platform_device *drm_dev; +static struct rocket_device *rdev; + +static void +rocket_iommu_domain_destroy(struct kref *kref) +{ + struct rocket_iommu_domain *domain = container_of(kref, struct rocket_iommu_domain, kref); + + iommu_domain_free(domain->domain); + domain->domain = NULL; + kfree(domain); +} + +static struct rocket_iommu_domain* +rocket_iommu_domain_create(struct device *dev) +{ + struct rocket_iommu_domain *domain = kmalloc(sizeof(*domain), GFP_KERNEL); + void *err; + + if (!domain) + return ERR_PTR(-ENOMEM); + + domain->domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain->domain)) { + err = ERR_CAST(domain->domain); + kfree(domain); + return err; + } + kref_init(&domain->kref); + + return domain; +} + +struct rocket_iommu_domain * +rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv) +{ + kref_get(&rocket_priv->domain->kref); + return rocket_priv->domain; +} + +void +rocket_iommu_domain_put(struct rocket_iommu_domain *domain) +{ + kref_put(&domain->kref, rocket_iommu_domain_destroy); +} + +static int +rocket_open(struct drm_device *dev, struct drm_file *file) +{ + struct rocket_device *rdev = to_rocket_device(dev); + struct rocket_file_priv *rocket_priv; + u64 start, end; + int ret; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + rocket_priv = kzalloc(sizeof(*rocket_priv), GFP_KERNEL); + if (!rocket_priv) { + ret = -ENOMEM; + goto err_put_mod; + } + + rocket_priv->rdev = rdev; + rocket_priv->domain = rocket_iommu_domain_create(rdev->cores[0].dev); + if (IS_ERR(rocket_priv->domain)) { + ret = PTR_ERR(rocket_priv->domain); + goto err_free; + } + + file->driver_priv = rocket_priv; + + start = rocket_priv->domain->domain->geometry.aperture_start; + end = rocket_priv->domain->domain->geometry.aperture_end; + drm_mm_init(&rocket_priv->mm, start, end - start + 1); + mutex_init(&rocket_priv->mm_lock); + + ret = rocket_job_open(rocket_priv); + if (ret) + goto err_mm_takedown; + + return 0; + +err_mm_takedown: + mutex_destroy(&rocket_priv->mm_lock); + drm_mm_takedown(&rocket_priv->mm); + rocket_iommu_domain_put(rocket_priv->domain); +err_free: + kfree(rocket_priv); +err_put_mod: + module_put(THIS_MODULE); + return ret; +} + +static void +rocket_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct rocket_file_priv *rocket_priv = file->driver_priv; + + rocket_job_close(rocket_priv); + mutex_destroy(&rocket_priv->mm_lock); + drm_mm_takedown(&rocket_priv->mm); + rocket_iommu_domain_put(rocket_priv->domain); + kfree(rocket_priv); + module_put(THIS_MODULE); +} + +static const struct drm_ioctl_desc rocket_drm_driver_ioctls[] = { +#define ROCKET_IOCTL(n, func) \ + DRM_IOCTL_DEF_DRV(ROCKET_##n, rocket_ioctl_##func, 0) + + ROCKET_IOCTL(CREATE_BO, create_bo), + ROCKET_IOCTL(SUBMIT, submit), + ROCKET_IOCTL(PREP_BO, prep_bo), + ROCKET_IOCTL(FINI_BO, fini_bo), +}; + +DEFINE_DRM_ACCEL_FOPS(rocket_accel_driver_fops); + +/* + * Rocket driver version: + * - 1.0 - initial interface + */ +static const struct drm_driver rocket_drm_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM, + .open = rocket_open, + .postclose = rocket_postclose, + .gem_create_object = rocket_gem_create_object, + .ioctls = rocket_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(rocket_drm_driver_ioctls), + .fops = &rocket_accel_driver_fops, + .name = "rocket", + .desc = "rocket DRM", +}; + +static int rocket_probe(struct platform_device *pdev) +{ + if (rdev == NULL) { + /* First core probing, initialize DRM device. */ + rdev = rocket_device_init(drm_dev, &rocket_drm_driver); + if (IS_ERR(rdev)) { + dev_err(&pdev->dev, "failed to initialize rocket device\n"); + return PTR_ERR(rdev); + } + } + + unsigned int core = rdev->num_cores; + + dev_set_drvdata(&pdev->dev, rdev); + + rdev->cores[core].rdev = rdev; + rdev->cores[core].dev = &pdev->dev; + rdev->cores[core].index = core; + + rdev->num_cores++; + + return rocket_core_init(&rdev->cores[core]); +} + +static void rocket_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + for (unsigned int core = 0; core < rdev->num_cores; core++) { + if (rdev->cores[core].dev == dev) { + rocket_core_fini(&rdev->cores[core]); + rdev->num_cores--; + break; + } + } + + if (rdev->num_cores == 0) { + /* Last core removed, deinitialize DRM device. */ + rocket_device_fini(rdev); + rdev = NULL; + } +} + +static const struct of_device_id dt_match[] = { + { .compatible = "rockchip,rk3588-rknn-core" }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static int find_core_for_dev(struct device *dev) +{ + struct rocket_device *rdev = dev_get_drvdata(dev); + + for (unsigned int core = 0; core < rdev->num_cores; core++) { + if (dev == rdev->cores[core].dev) + return core; + } + + return -1; +} + +static int rocket_device_runtime_resume(struct device *dev) +{ + struct rocket_device *rdev = dev_get_drvdata(dev); + int core = find_core_for_dev(dev); + int err = 0; + + if (core < 0) + return -ENODEV; + + err = clk_bulk_prepare_enable(ARRAY_SIZE(rdev->cores[core].clks), rdev->cores[core].clks); + if (err) { + dev_err(dev, "failed to enable (%d) clocks for core %d\n", err, core); + return err; + } + + return 0; +} + +static int rocket_device_runtime_suspend(struct device *dev) +{ + struct rocket_device *rdev = dev_get_drvdata(dev); + int core = find_core_for_dev(dev); + + if (core < 0) + return -ENODEV; + + if (!rocket_job_is_idle(&rdev->cores[core])) + return -EBUSY; + + clk_bulk_disable_unprepare(ARRAY_SIZE(rdev->cores[core].clks), rdev->cores[core].clks); + + return 0; +} + +EXPORT_GPL_DEV_PM_OPS(rocket_pm_ops) = { + RUNTIME_PM_OPS(rocket_device_runtime_suspend, rocket_device_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) +}; + +static struct platform_driver rocket_driver = { + .probe = rocket_probe, + .remove = rocket_remove, + .driver = { + .name = "rocket", + .pm = pm_ptr(&rocket_pm_ops), + .of_match_table = dt_match, + }, +}; + +static int __init rocket_register(void) +{ + drm_dev = platform_device_register_simple("rknn", -1, NULL, 0); + if (IS_ERR(drm_dev)) + return PTR_ERR(drm_dev); + + return platform_driver_register(&rocket_driver); +} + +static void __exit rocket_unregister(void) +{ + platform_driver_unregister(&rocket_driver); + + platform_device_unregister(drm_dev); +} + +module_init(rocket_register); +module_exit(rocket_unregister); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DRM driver for the Rockchip NPU IP"); +MODULE_AUTHOR("Tomeu Vizoso"); diff --git a/drivers/accel/rocket/rocket_drv.h b/drivers/accel/rocket/rocket_drv.h new file mode 100644 index 000000000000..2c673bb99ccc --- /dev/null +++ b/drivers/accel/rocket/rocket_drv.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#ifndef __ROCKET_DRV_H__ +#define __ROCKET_DRV_H__ + +#include <drm/drm_mm.h> +#include <drm/gpu_scheduler.h> + +#include "rocket_device.h" + +extern const struct dev_pm_ops rocket_pm_ops; + +struct rocket_iommu_domain { + struct iommu_domain *domain; + struct kref kref; +}; + +struct rocket_file_priv { + struct rocket_device *rdev; + + struct rocket_iommu_domain *domain; + struct drm_mm mm; + struct mutex mm_lock; + + struct drm_sched_entity sched_entity; +}; + +struct rocket_iommu_domain *rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv); +void rocket_iommu_domain_put(struct rocket_iommu_domain *domain); + +#endif diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c new file mode 100644 index 000000000000..624c4ecf5a34 --- /dev/null +++ b/drivers/accel/rocket/rocket_gem.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/drm_utils.h> +#include <drm/rocket_accel.h> +#include <linux/dma-mapping.h> +#include <linux/iommu.h> + +#include "rocket_drv.h" +#include "rocket_gem.h" + +static void rocket_gem_bo_free(struct drm_gem_object *obj) +{ + struct rocket_gem_object *bo = to_rocket_bo(obj); + struct rocket_file_priv *rocket_priv = bo->driver_priv; + size_t unmapped; + + drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1); + + unmapped = iommu_unmap(bo->domain->domain, bo->mm.start, bo->size); + drm_WARN_ON(obj->dev, unmapped != bo->size); + + mutex_lock(&rocket_priv->mm_lock); + drm_mm_remove_node(&bo->mm); + mutex_unlock(&rocket_priv->mm_lock); + + rocket_iommu_domain_put(bo->domain); + bo->domain = NULL; + + drm_gem_shmem_free(&bo->base); +} + +static const struct drm_gem_object_funcs rocket_gem_funcs = { + .free = rocket_gem_bo_free, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, +}; + +struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size) +{ + struct rocket_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + obj->base.base.funcs = &rocket_gem_funcs; + + return &obj->base.base; +} + +int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct rocket_file_priv *rocket_priv = file->driver_priv; + struct drm_rocket_create_bo *args = data; + struct drm_gem_shmem_object *shmem_obj; + struct rocket_gem_object *rkt_obj; + struct drm_gem_object *gem_obj; + struct sg_table *sgt; + int ret; + + shmem_obj = drm_gem_shmem_create(dev, args->size); + if (IS_ERR(shmem_obj)) + return PTR_ERR(shmem_obj); + + gem_obj = &shmem_obj->base; + rkt_obj = to_rocket_bo(gem_obj); + + rkt_obj->driver_priv = rocket_priv; + rkt_obj->domain = rocket_iommu_domain_get(rocket_priv); + rkt_obj->size = args->size; + rkt_obj->offset = 0; + + ret = drm_gem_handle_create(file, gem_obj, &args->handle); + drm_gem_object_put(gem_obj); + if (ret) + goto err; + + sgt = drm_gem_shmem_get_pages_sgt(shmem_obj); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto err; + } + + mutex_lock(&rocket_priv->mm_lock); + ret = drm_mm_insert_node_generic(&rocket_priv->mm, &rkt_obj->mm, + rkt_obj->size, PAGE_SIZE, + 0, 0); + mutex_unlock(&rocket_priv->mm_lock); + + ret = iommu_map_sgtable(rocket_priv->domain->domain, + rkt_obj->mm.start, + shmem_obj->sgt, + IOMMU_READ | IOMMU_WRITE); + if (ret < 0 || ret < args->size) { + drm_err(dev, "failed to map buffer: size=%d request_size=%u\n", + ret, args->size); + ret = -ENOMEM; + goto err_remove_node; + } + + /* iommu_map_sgtable might have aligned the size */ + rkt_obj->size = ret; + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + args->dma_address = rkt_obj->mm.start; + + return 0; + +err_remove_node: + mutex_lock(&rocket_priv->mm_lock); + drm_mm_remove_node(&rkt_obj->mm); + mutex_unlock(&rocket_priv->mm_lock); + +err: + drm_gem_shmem_object_free(gem_obj); + + return ret; +} + +int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_rocket_prep_bo *args = data; + unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + struct drm_gem_object *gem_obj; + struct drm_gem_shmem_object *shmem_obj; + long ret = 0; + + if (args->reserved != 0) { + drm_dbg(dev, "Reserved field in drm_rocket_prep_bo struct should be 0.\n"); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file, args->handle); + if (!gem_obj) + return -ENOENT; + + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_WRITE, true, timeout); + if (!ret) + ret = timeout ? -ETIMEDOUT : -EBUSY; + + shmem_obj = &to_rocket_bo(gem_obj)->base; + + dma_sync_sgtable_for_cpu(dev->dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + + drm_gem_object_put(gem_obj); + + return ret; +} + +int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_rocket_fini_bo *args = data; + struct drm_gem_shmem_object *shmem_obj; + struct rocket_gem_object *rkt_obj; + struct drm_gem_object *gem_obj; + + if (args->reserved != 0) { + drm_dbg(dev, "Reserved field in drm_rocket_fini_bo struct should be 0.\n"); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file, args->handle); + if (!gem_obj) + return -ENOENT; + + rkt_obj = to_rocket_bo(gem_obj); + shmem_obj = &rkt_obj->base; + + dma_sync_sgtable_for_device(dev->dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + + drm_gem_object_put(gem_obj); + + return 0; +} diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h new file mode 100644 index 000000000000..240430334509 --- /dev/null +++ b/drivers/accel/rocket/rocket_gem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#ifndef __ROCKET_GEM_H__ +#define __ROCKET_GEM_H__ + +#include <drm/drm_gem_shmem_helper.h> + +struct rocket_gem_object { + struct drm_gem_shmem_object base; + + struct rocket_file_priv *driver_priv; + + struct rocket_iommu_domain *domain; + struct drm_mm_node mm; + size_t size; + u32 offset; +}; + +struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size); + +int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file); + +int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file); + +int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file); + +static inline +struct rocket_gem_object *to_rocket_bo(struct drm_gem_object *obj) +{ + return container_of(to_drm_gem_shmem_obj(obj), struct rocket_gem_object, base); +} + +#endif diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c new file mode 100644 index 000000000000..acd606160dc9 --- /dev/null +++ b/drivers/accel/rocket/rocket_job.c @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ +/* Copyright 2019 Collabora ltd. */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#include <drm/drm_print.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/rocket_accel.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include "rocket_core.h" +#include "rocket_device.h" +#include "rocket_drv.h" +#include "rocket_job.h" +#include "rocket_registers.h" + +#define JOB_TIMEOUT_MS 500 + +static struct rocket_job * +to_rocket_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct rocket_job, base); +} + +static const char *rocket_fence_get_driver_name(struct dma_fence *fence) +{ + return "rocket"; +} + +static const char *rocket_fence_get_timeline_name(struct dma_fence *fence) +{ + return "rockchip-npu"; +} + +static const struct dma_fence_ops rocket_fence_ops = { + .get_driver_name = rocket_fence_get_driver_name, + .get_timeline_name = rocket_fence_get_timeline_name, +}; + +static struct dma_fence *rocket_fence_create(struct rocket_core *core) +{ + struct dma_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock, + core->fence_context, ++core->emit_seqno); + + return fence; +} + +static int +rocket_copy_tasks(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_rocket_job *job, + struct rocket_job *rjob) +{ + int ret = 0; + + if (job->task_struct_size < sizeof(struct drm_rocket_task)) + return -EINVAL; + + rjob->task_count = job->task_count; + + if (!rjob->task_count) + return 0; + + rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL); + if (!rjob->tasks) { + drm_dbg(dev, "Failed to allocate task array\n"); + return -ENOMEM; + } + + for (int i = 0; i < rjob->task_count; i++) { + struct drm_rocket_task task = {0}; + + if (copy_from_user(&task, + u64_to_user_ptr(job->tasks) + i * job->task_struct_size, + sizeof(task))) { + drm_dbg(dev, "Failed to copy incoming tasks\n"); + ret = -EFAULT; + goto fail; + } + + if (task.regcmd_count == 0) { + drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n"); + ret = -EINVAL; + goto fail; + } + + rjob->tasks[i].regcmd = task.regcmd; + rjob->tasks[i].regcmd_count = task.regcmd_count; + } + + return 0; + +fail: + kvfree(rjob->tasks); + return ret; +} + +static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job) +{ + struct rocket_task *task; + unsigned int extra_bit; + + /* Don't queue the job if a reset is in progress */ + if (atomic_read(&core->reset.pending)) + return; + + /* GO ! */ + + task = &job->tasks[job->next_task_idx]; + job->next_task_idx++; + + rocket_pc_writel(core, BASE_ADDRESS, 0x1); + + /* From rknpu, in the TRM this bit is marked as reserved */ + extra_bit = 0x10000000 * core->index; + rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) | + CNA_S_POINTER_EXECUTER_PP_EN(1) | + CNA_S_POINTER_POINTER_PP_MODE(1) | + extra_bit); + + rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) | + CORE_S_POINTER_EXECUTER_PP_EN(1) | + CORE_S_POINTER_POINTER_PP_MODE(1) | + extra_bit); + + rocket_pc_writel(core, BASE_ADDRESS, task->regcmd); + rocket_pc_writel(core, REGISTER_AMOUNTS, + PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1)); + + rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1); + rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1); + + rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) | + PC_TASK_CON_TASK_COUNT_CLEAR(1) | + PC_TASK_CON_TASK_NUMBER(1) | + PC_TASK_CON_TASK_PP_EN(1)); + + rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0)); + + rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1)); + + dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index); +} + +static int rocket_acquire_object_fences(struct drm_gem_object **bos, + int bo_count, + struct drm_sched_job *job, + bool is_write) +{ + int i, ret; + + for (i = 0; i < bo_count; i++) { + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + + ret = drm_sched_job_add_implicit_dependencies(job, bos[i], + is_write); + if (ret) + return ret; + } + + return 0; +} + +static void rocket_attach_object_fences(struct drm_gem_object **bos, + int bo_count, + struct dma_fence *fence) +{ + int i; + + for (i = 0; i < bo_count; i++) + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); +} + +static int rocket_job_push(struct rocket_job *job) +{ + struct rocket_device *rdev = job->rdev; + struct drm_gem_object **bos; + struct ww_acquire_ctx acquire_ctx; + int ret = 0; + + bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *), + GFP_KERNEL); + memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *)); + memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *)); + + ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); + if (ret) + goto err; + + scoped_guard(mutex, &rdev->sched_lock) { + drm_sched_job_arm(&job->base); + + job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); + + ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false); + if (ret) + goto err_unlock; + + ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true); + if (ret) + goto err_unlock; + + kref_get(&job->refcount); /* put by scheduler job completion */ + + drm_sched_entity_push_job(&job->base); + } + + rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence); + +err_unlock: + drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); +err: + kvfree(bos); + + return ret; +} + +static void rocket_job_cleanup(struct kref *ref) +{ + struct rocket_job *job = container_of(ref, struct rocket_job, + refcount); + unsigned int i; + + rocket_iommu_domain_put(job->domain); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + if (job->in_bos) { + for (i = 0; i < job->in_bo_count; i++) + drm_gem_object_put(job->in_bos[i]); + + kvfree(job->in_bos); + } + + if (job->out_bos) { + for (i = 0; i < job->out_bo_count; i++) + drm_gem_object_put(job->out_bos[i]); + + kvfree(job->out_bos); + } + + kvfree(job->tasks); + + kfree(job); +} + +static void rocket_job_put(struct rocket_job *job) +{ + kref_put(&job->refcount, rocket_job_cleanup); +} + +static void rocket_job_free(struct drm_sched_job *sched_job) +{ + struct rocket_job *job = to_rocket_job(sched_job); + + drm_sched_job_cleanup(sched_job); + + rocket_job_put(job); +} + +static struct rocket_core *sched_to_core(struct rocket_device *rdev, + struct drm_gpu_scheduler *sched) +{ + unsigned int core; + + for (core = 0; core < rdev->num_cores; core++) { + if (&rdev->cores[core].sched == sched) + return &rdev->cores[core]; + } + + return NULL; +} + +static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job) +{ + struct rocket_job *job = to_rocket_job(sched_job); + struct rocket_device *rdev = job->rdev; + struct rocket_core *core = sched_to_core(rdev, sched_job->sched); + struct dma_fence *fence = NULL; + int ret; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + /* + * Nothing to execute: can happen if the job has finished while + * we were resetting the NPU. + */ + if (job->next_task_idx == job->task_count) + return NULL; + + fence = rocket_fence_create(core); + if (IS_ERR(fence)) + return fence; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + ret = pm_runtime_get_sync(core->dev); + if (ret < 0) + return fence; + + ret = iommu_attach_group(job->domain->domain, core->iommu_group); + if (ret < 0) + return fence; + + scoped_guard(mutex, &core->job_lock) { + core->in_flight_job = job; + rocket_job_hw_submit(core, job); + } + + return fence; +} + +static void rocket_job_handle_irq(struct rocket_core *core) +{ + pm_runtime_mark_last_busy(core->dev); + + rocket_pc_writel(core, OPERATION_ENABLE, 0x0); + rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff); + + scoped_guard(mutex, &core->job_lock) + if (core->in_flight_job) { + if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) { + rocket_job_hw_submit(core, core->in_flight_job); + return; + } + + iommu_detach_group(NULL, iommu_group_get(core->dev)); + dma_fence_signal(core->in_flight_job->done_fence); + pm_runtime_put_autosuspend(core->dev); + core->in_flight_job = NULL; + } +} + +static void +rocket_reset(struct rocket_core *core, struct drm_sched_job *bad) +{ + if (!atomic_read(&core->reset.pending)) + return; + + drm_sched_stop(&core->sched, bad); + + /* + * Remaining interrupts have been handled, but we might still have + * stuck jobs. Let's make sure the PM counters stay balanced by + * manually calling pm_runtime_put_noidle(). + */ + scoped_guard(mutex, &core->job_lock) { + if (core->in_flight_job) + pm_runtime_put_noidle(core->dev); + + iommu_detach_group(NULL, core->iommu_group); + + core->in_flight_job = NULL; + } + + /* Proceed with reset now. */ + rocket_core_reset(core); + + /* NPU has been reset, we can clear the reset pending bit. */ + atomic_set(&core->reset.pending, 0); + + /* Restart the scheduler */ + drm_sched_start(&core->sched, 0); +} + +static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job) +{ + struct rocket_job *job = to_rocket_job(sched_job); + struct rocket_device *rdev = job->rdev; + struct rocket_core *core = sched_to_core(rdev, sched_job->sched); + + dev_err(core->dev, "NPU job timed out"); + + atomic_set(&core->reset.pending, 1); + rocket_reset(core, sched_job); + + return DRM_GPU_SCHED_STAT_RESET; +} + +static void rocket_reset_work(struct work_struct *work) +{ + struct rocket_core *core; + + core = container_of(work, struct rocket_core, reset.work); + rocket_reset(core, NULL); +} + +static const struct drm_sched_backend_ops rocket_sched_ops = { + .run_job = rocket_job_run, + .timedout_job = rocket_job_timedout, + .free_job = rocket_job_free +}; + +static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data) +{ + struct rocket_core *core = data; + + rocket_job_handle_irq(core); + + return IRQ_HANDLED; +} + +static irqreturn_t rocket_job_irq_handler(int irq, void *data) +{ + struct rocket_core *core = data; + u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS); + + WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR); + WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR); + + if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 || + raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1)) + return IRQ_NONE; + + rocket_pc_writel(core, INTERRUPT_MASK, 0x0); + + return IRQ_WAKE_THREAD; +} + +int rocket_job_init(struct rocket_core *core) +{ + struct drm_sched_init_args args = { + .ops = &rocket_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = 1, + .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .name = dev_name(core->dev), + .dev = core->dev, + }; + int ret; + + INIT_WORK(&core->reset.work, rocket_reset_work); + spin_lock_init(&core->fence_lock); + mutex_init(&core->job_lock); + + core->irq = platform_get_irq(to_platform_device(core->dev), 0); + if (core->irq < 0) + return core->irq; + + ret = devm_request_threaded_irq(core->dev, core->irq, + rocket_job_irq_handler, + rocket_job_irq_handler_thread, + IRQF_SHARED, dev_name(core->dev), + core); + if (ret) { + dev_err(core->dev, "failed to request job irq"); + return ret; + } + + core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index); + if (!core->reset.wq) + return -ENOMEM; + + core->fence_context = dma_fence_context_alloc(1); + + args.timeout_wq = core->reset.wq; + ret = drm_sched_init(&core->sched, &args); + if (ret) { + dev_err(core->dev, "Failed to create scheduler: %d.", ret); + goto err_sched; + } + + return 0; + +err_sched: + drm_sched_fini(&core->sched); + + destroy_workqueue(core->reset.wq); + return ret; +} + +void rocket_job_fini(struct rocket_core *core) +{ + drm_sched_fini(&core->sched); + + cancel_work_sync(&core->reset.work); + destroy_workqueue(core->reset.wq); +} + +int rocket_job_open(struct rocket_file_priv *rocket_priv) +{ + struct rocket_device *rdev = rocket_priv->rdev; + struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores, + sizeof(*scheds), + GFP_KERNEL); + unsigned int core; + int ret; + + for (core = 0; core < rdev->num_cores; core++) + scheds[core] = &rdev->cores[core].sched; + + ret = drm_sched_entity_init(&rocket_priv->sched_entity, + DRM_SCHED_PRIORITY_NORMAL, + scheds, + rdev->num_cores, NULL); + if (WARN_ON(ret)) + return ret; + + return 0; +} + +void rocket_job_close(struct rocket_file_priv *rocket_priv) +{ + struct drm_sched_entity *entity = &rocket_priv->sched_entity; + + kfree(entity->sched_list); + drm_sched_entity_destroy(entity); +} + +int rocket_job_is_idle(struct rocket_core *core) +{ + /* If there are any jobs in this HW queue, we're not idle */ + if (atomic_read(&core->sched.credit_count)) + return false; + + return true; +} + +static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, + struct drm_rocket_job *job) +{ + struct rocket_device *rdev = to_rocket_device(dev); + struct rocket_file_priv *file_priv = file->driver_priv; + struct rocket_job *rjob = NULL; + int ret = 0; + + if (job->task_count == 0) + return -EINVAL; + + rjob = kzalloc(sizeof(*rjob), GFP_KERNEL); + if (!rjob) + return -ENOMEM; + + kref_init(&rjob->refcount); + + rjob->rdev = rdev; + + ret = drm_sched_job_init(&rjob->base, + &file_priv->sched_entity, + 1, NULL, file->client_id); + if (ret) + goto out_put_job; + + ret = rocket_copy_tasks(dev, file, job, rjob); + if (ret) + goto out_cleanup_job; + + ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles), + job->in_bo_handle_count, &rjob->in_bos); + if (ret) + goto out_cleanup_job; + + rjob->in_bo_count = job->in_bo_handle_count; + + ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles), + job->out_bo_handle_count, &rjob->out_bos); + if (ret) + goto out_cleanup_job; + + rjob->out_bo_count = job->out_bo_handle_count; + + rjob->domain = rocket_iommu_domain_get(file_priv); + + ret = rocket_job_push(rjob); + if (ret) + goto out_cleanup_job; + +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&rjob->base); +out_put_job: + rocket_job_put(rjob); + + return ret; +} + +int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_rocket_submit *args = data; + struct drm_rocket_job *jobs; + int ret = 0; + unsigned int i = 0; + + if (args->job_count == 0) + return 0; + + if (args->job_struct_size < sizeof(struct drm_rocket_job)) { + drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n"); + return -EINVAL; + } + + if (args->reserved != 0) { + drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n"); + return -EINVAL; + } + + jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + if (!jobs) { + drm_dbg(dev, "Failed to allocate incoming job array\n"); + return -ENOMEM; + } + + for (i = 0; i < args->job_count; i++) { + if (copy_from_user(&jobs[i], + u64_to_user_ptr(args->jobs) + i * args->job_struct_size, + sizeof(*jobs))) { + ret = -EFAULT; + drm_dbg(dev, "Failed to copy incoming job array\n"); + goto exit; + } + } + + + for (i = 0; i < args->job_count; i++) + rocket_ioctl_submit_job(dev, file, &jobs[i]); + +exit: + kvfree(jobs); + + return ret; +} diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h new file mode 100644 index 000000000000..4ae00feec3b9 --- /dev/null +++ b/drivers/accel/rocket/rocket_job.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ + +#ifndef __ROCKET_JOB_H__ +#define __ROCKET_JOB_H__ + +#include <drm/drm_drv.h> +#include <drm/gpu_scheduler.h> + +#include "rocket_core.h" +#include "rocket_drv.h" + +struct rocket_task { + u64 regcmd; + u32 regcmd_count; +}; + +struct rocket_job { + struct drm_sched_job base; + + struct rocket_device *rdev; + + struct drm_gem_object **in_bos; + struct drm_gem_object **out_bos; + + u32 in_bo_count; + u32 out_bo_count; + + struct rocket_task *tasks; + u32 task_count; + u32 next_task_idx; + + /* Fence to be signaled by drm-sched once its done with the job */ + struct dma_fence *inference_done_fence; + + /* Fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + struct rocket_iommu_domain *domain; + + struct kref refcount; +}; + +int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file); + +int rocket_job_init(struct rocket_core *core); +void rocket_job_fini(struct rocket_core *core); +int rocket_job_open(struct rocket_file_priv *rocket_priv); +void rocket_job_close(struct rocket_file_priv *rocket_priv); +int rocket_job_is_idle(struct rocket_core *core); + +#endif diff --git a/drivers/accel/rocket/rocket_registers.h b/drivers/accel/rocket/rocket_registers.h new file mode 100644 index 000000000000..9aef614c3470 --- /dev/null +++ b/drivers/accel/rocket/rocket_registers.h @@ -0,0 +1,4404 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ + +#ifndef __ROCKET_REGISTERS_XML__ +#define __ROCKET_REGISTERS_XML__ + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng gen_header.py tool in this git repository: +http://gitlab.freedesktop.org/mesa/mesa/ +git clone https://gitlab.freedesktop.org/mesa/mesa.git + +The rules-ng-ng source files this header was generated from are: + +- /home/tomeu/src/mesa/src/gallium/drivers/rocket/registers.xml ( 60076 bytes, from Wed Jun 12 10:02:25 2024) + +Copyright (C) 2024-2025 by the following authors: +- Tomeu Vizoso <tomeu@tomeuvizoso.net> +*/ + +#define REG_PC_VERSION 0x00000000 +#define PC_VERSION_VERSION__MASK 0xffffffff +#define PC_VERSION_VERSION__SHIFT 0 +static inline uint32_t PC_VERSION_VERSION(uint32_t val) +{ + return ((val) << PC_VERSION_VERSION__SHIFT) & PC_VERSION_VERSION__MASK; +} + +#define REG_PC_VERSION_NUM 0x00000004 +#define PC_VERSION_NUM_VERSION_NUM__MASK 0xffffffff +#define PC_VERSION_NUM_VERSION_NUM__SHIFT 0 +static inline uint32_t PC_VERSION_NUM_VERSION_NUM(uint32_t val) +{ + return ((val) << PC_VERSION_NUM_VERSION_NUM__SHIFT) & PC_VERSION_NUM_VERSION_NUM__MASK; +} + +#define REG_PC_OPERATION_ENABLE 0x00000008 +#define PC_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define PC_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t PC_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << PC_OPERATION_ENABLE_RESERVED_0__SHIFT) & PC_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define PC_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define PC_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t PC_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << PC_OPERATION_ENABLE_OP_EN__SHIFT) & PC_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_PC_BASE_ADDRESS 0x00000010 +#define PC_BASE_ADDRESS_PC_SOURCE_ADDR__MASK 0xfffffff0 +#define PC_BASE_ADDRESS_PC_SOURCE_ADDR__SHIFT 4 +static inline uint32_t PC_BASE_ADDRESS_PC_SOURCE_ADDR(uint32_t val) +{ + return ((val) << PC_BASE_ADDRESS_PC_SOURCE_ADDR__SHIFT) & PC_BASE_ADDRESS_PC_SOURCE_ADDR__MASK; +} +#define PC_BASE_ADDRESS_RESERVED_0__MASK 0x0000000e +#define PC_BASE_ADDRESS_RESERVED_0__SHIFT 1 +static inline uint32_t PC_BASE_ADDRESS_RESERVED_0(uint32_t val) +{ + return ((val) << PC_BASE_ADDRESS_RESERVED_0__SHIFT) & PC_BASE_ADDRESS_RESERVED_0__MASK; +} +#define PC_BASE_ADDRESS_PC_SEL__MASK 0x00000001 +#define PC_BASE_ADDRESS_PC_SEL__SHIFT 0 +static inline uint32_t PC_BASE_ADDRESS_PC_SEL(uint32_t val) +{ + return ((val) << PC_BASE_ADDRESS_PC_SEL__SHIFT) & PC_BASE_ADDRESS_PC_SEL__MASK; +} + +#define REG_PC_REGISTER_AMOUNTS 0x00000014 +#define PC_REGISTER_AMOUNTS_RESERVED_0__MASK 0xffff0000 +#define PC_REGISTER_AMOUNTS_RESERVED_0__SHIFT 16 +static inline uint32_t PC_REGISTER_AMOUNTS_RESERVED_0(uint32_t val) +{ + return ((val) << PC_REGISTER_AMOUNTS_RESERVED_0__SHIFT) & PC_REGISTER_AMOUNTS_RESERVED_0__MASK; +} +#define PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__MASK 0x0000ffff +#define PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__SHIFT 0 +static inline uint32_t PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT(uint32_t val) +{ + return ((val) << PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__SHIFT) & PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT__MASK; +} + +#define REG_PC_INTERRUPT_MASK 0x00000020 +#define PC_INTERRUPT_MASK_RESERVED_0__MASK 0xffffc000 +#define PC_INTERRUPT_MASK_RESERVED_0__SHIFT 14 +static inline uint32_t PC_INTERRUPT_MASK_RESERVED_0(uint32_t val) +{ + return ((val) << PC_INTERRUPT_MASK_RESERVED_0__SHIFT) & PC_INTERRUPT_MASK_RESERVED_0__MASK; +} +#define PC_INTERRUPT_MASK_DMA_WRITE_ERROR 0x00002000 +#define PC_INTERRUPT_MASK_DMA_READ_ERROR 0x00001000 +#define PC_INTERRUPT_MASK_PPU_1 0x00000800 +#define PC_INTERRUPT_MASK_PPU_0 0x00000400 +#define PC_INTERRUPT_MASK_DPU_1 0x00000200 +#define PC_INTERRUPT_MASK_DPU_0 0x00000100 +#define PC_INTERRUPT_MASK_CORE_1 0x00000080 +#define PC_INTERRUPT_MASK_CORE_0 0x00000040 +#define PC_INTERRUPT_MASK_CNA_CSC_1 0x00000020 +#define PC_INTERRUPT_MASK_CNA_CSC_0 0x00000010 +#define PC_INTERRUPT_MASK_CNA_WEIGHT_1 0x00000008 +#define PC_INTERRUPT_MASK_CNA_WEIGHT_0 0x00000004 +#define PC_INTERRUPT_MASK_CNA_FEATURE_1 0x00000002 +#define PC_INTERRUPT_MASK_CNA_FEATURE_0 0x00000001 + +#define REG_PC_INTERRUPT_CLEAR 0x00000024 +#define PC_INTERRUPT_CLEAR_RESERVED_0__MASK 0xffffc000 +#define PC_INTERRUPT_CLEAR_RESERVED_0__SHIFT 14 +static inline uint32_t PC_INTERRUPT_CLEAR_RESERVED_0(uint32_t val) +{ + return ((val) << PC_INTERRUPT_CLEAR_RESERVED_0__SHIFT) & PC_INTERRUPT_CLEAR_RESERVED_0__MASK; +} +#define PC_INTERRUPT_CLEAR_DMA_WRITE_ERROR 0x00002000 +#define PC_INTERRUPT_CLEAR_DMA_READ_ERROR 0x00001000 +#define PC_INTERRUPT_CLEAR_PPU_1 0x00000800 +#define PC_INTERRUPT_CLEAR_PPU_0 0x00000400 +#define PC_INTERRUPT_CLEAR_DPU_1 0x00000200 +#define PC_INTERRUPT_CLEAR_DPU_0 0x00000100 +#define PC_INTERRUPT_CLEAR_CORE_1 0x00000080 +#define PC_INTERRUPT_CLEAR_CORE_0 0x00000040 +#define PC_INTERRUPT_CLEAR_CNA_CSC_1 0x00000020 +#define PC_INTERRUPT_CLEAR_CNA_CSC_0 0x00000010 +#define PC_INTERRUPT_CLEAR_CNA_WEIGHT_1 0x00000008 +#define PC_INTERRUPT_CLEAR_CNA_WEIGHT_0 0x00000004 +#define PC_INTERRUPT_CLEAR_CNA_FEATURE_1 0x00000002 +#define PC_INTERRUPT_CLEAR_CNA_FEATURE_0 0x00000001 + +#define REG_PC_INTERRUPT_STATUS 0x00000028 +#define PC_INTERRUPT_STATUS_RESERVED_0__MASK 0xffffc000 +#define PC_INTERRUPT_STATUS_RESERVED_0__SHIFT 14 +static inline uint32_t PC_INTERRUPT_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << PC_INTERRUPT_STATUS_RESERVED_0__SHIFT) & PC_INTERRUPT_STATUS_RESERVED_0__MASK; +} +#define PC_INTERRUPT_STATUS_DMA_WRITE_ERROR 0x00002000 +#define PC_INTERRUPT_STATUS_DMA_READ_ERROR 0x00001000 +#define PC_INTERRUPT_STATUS_PPU_1 0x00000800 +#define PC_INTERRUPT_STATUS_PPU_0 0x00000400 +#define PC_INTERRUPT_STATUS_DPU_1 0x00000200 +#define PC_INTERRUPT_STATUS_DPU_0 0x00000100 +#define PC_INTERRUPT_STATUS_CORE_1 0x00000080 +#define PC_INTERRUPT_STATUS_CORE_0 0x00000040 +#define PC_INTERRUPT_STATUS_CNA_CSC_1 0x00000020 +#define PC_INTERRUPT_STATUS_CNA_CSC_0 0x00000010 +#define PC_INTERRUPT_STATUS_CNA_WEIGHT_1 0x00000008 +#define PC_INTERRUPT_STATUS_CNA_WEIGHT_0 0x00000004 +#define PC_INTERRUPT_STATUS_CNA_FEATURE_1 0x00000002 +#define PC_INTERRUPT_STATUS_CNA_FEATURE_0 0x00000001 + +#define REG_PC_INTERRUPT_RAW_STATUS 0x0000002c +#define PC_INTERRUPT_RAW_STATUS_RESERVED_0__MASK 0xffffc000 +#define PC_INTERRUPT_RAW_STATUS_RESERVED_0__SHIFT 14 +static inline uint32_t PC_INTERRUPT_RAW_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << PC_INTERRUPT_RAW_STATUS_RESERVED_0__SHIFT) & PC_INTERRUPT_RAW_STATUS_RESERVED_0__MASK; +} +#define PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR 0x00002000 +#define PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR 0x00001000 +#define PC_INTERRUPT_RAW_STATUS_PPU_1 0x00000800 +#define PC_INTERRUPT_RAW_STATUS_PPU_0 0x00000400 +#define PC_INTERRUPT_RAW_STATUS_DPU_1 0x00000200 +#define PC_INTERRUPT_RAW_STATUS_DPU_0 0x00000100 +#define PC_INTERRUPT_RAW_STATUS_CORE_1 0x00000080 +#define PC_INTERRUPT_RAW_STATUS_CORE_0 0x00000040 +#define PC_INTERRUPT_RAW_STATUS_CNA_CSC_1 0x00000020 +#define PC_INTERRUPT_RAW_STATUS_CNA_CSC_0 0x00000010 +#define PC_INTERRUPT_RAW_STATUS_CNA_WEIGHT_1 0x00000008 +#define PC_INTERRUPT_RAW_STATUS_CNA_WEIGHT_0 0x00000004 +#define PC_INTERRUPT_RAW_STATUS_CNA_FEATURE_1 0x00000002 +#define PC_INTERRUPT_RAW_STATUS_CNA_FEATURE_0 0x00000001 + +#define REG_PC_TASK_CON 0x00000030 +#define PC_TASK_CON_RESERVED_0__MASK 0xffffc000 +#define PC_TASK_CON_RESERVED_0__SHIFT 14 +static inline uint32_t PC_TASK_CON_RESERVED_0(uint32_t val) +{ + return ((val) << PC_TASK_CON_RESERVED_0__SHIFT) & PC_TASK_CON_RESERVED_0__MASK; +} +#define PC_TASK_CON_TASK_COUNT_CLEAR__MASK 0x00002000 +#define PC_TASK_CON_TASK_COUNT_CLEAR__SHIFT 13 +static inline uint32_t PC_TASK_CON_TASK_COUNT_CLEAR(uint32_t val) +{ + return ((val) << PC_TASK_CON_TASK_COUNT_CLEAR__SHIFT) & PC_TASK_CON_TASK_COUNT_CLEAR__MASK; +} +#define PC_TASK_CON_TASK_PP_EN__MASK 0x00001000 +#define PC_TASK_CON_TASK_PP_EN__SHIFT 12 +static inline uint32_t PC_TASK_CON_TASK_PP_EN(uint32_t val) +{ + return ((val) << PC_TASK_CON_TASK_PP_EN__SHIFT) & PC_TASK_CON_TASK_PP_EN__MASK; +} +#define PC_TASK_CON_TASK_NUMBER__MASK 0x00000fff +#define PC_TASK_CON_TASK_NUMBER__SHIFT 0 +static inline uint32_t PC_TASK_CON_TASK_NUMBER(uint32_t val) +{ + return ((val) << PC_TASK_CON_TASK_NUMBER__SHIFT) & PC_TASK_CON_TASK_NUMBER__MASK; +} + +#define REG_PC_TASK_DMA_BASE_ADDR 0x00000034 +#define PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__MASK 0xfffffff0 +#define PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__SHIFT 4 +static inline uint32_t PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(uint32_t val) +{ + return ((val) << PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__SHIFT) & PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR__MASK; +} +#define PC_TASK_DMA_BASE_ADDR_RESERVED_0__MASK 0x0000000f +#define PC_TASK_DMA_BASE_ADDR_RESERVED_0__SHIFT 0 +static inline uint32_t PC_TASK_DMA_BASE_ADDR_RESERVED_0(uint32_t val) +{ + return ((val) << PC_TASK_DMA_BASE_ADDR_RESERVED_0__SHIFT) & PC_TASK_DMA_BASE_ADDR_RESERVED_0__MASK; +} + +#define REG_PC_TASK_STATUS 0x0000003c +#define PC_TASK_STATUS_RESERVED_0__MASK 0xf0000000 +#define PC_TASK_STATUS_RESERVED_0__SHIFT 28 +static inline uint32_t PC_TASK_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << PC_TASK_STATUS_RESERVED_0__SHIFT) & PC_TASK_STATUS_RESERVED_0__MASK; +} +#define PC_TASK_STATUS_TASK_STATUS__MASK 0x0fffffff +#define PC_TASK_STATUS_TASK_STATUS__SHIFT 0 +static inline uint32_t PC_TASK_STATUS_TASK_STATUS(uint32_t val) +{ + return ((val) << PC_TASK_STATUS_TASK_STATUS__SHIFT) & PC_TASK_STATUS_TASK_STATUS__MASK; +} + +#define REG_CNA_S_STATUS 0x00001000 +#define CNA_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define CNA_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t CNA_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_S_STATUS_RESERVED_0__SHIFT) & CNA_S_STATUS_RESERVED_0__MASK; +} +#define CNA_S_STATUS_STATUS_1__MASK 0x00030000 +#define CNA_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t CNA_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << CNA_S_STATUS_STATUS_1__SHIFT) & CNA_S_STATUS_STATUS_1__MASK; +} +#define CNA_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define CNA_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t CNA_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_S_STATUS_RESERVED_1__SHIFT) & CNA_S_STATUS_RESERVED_1__MASK; +} +#define CNA_S_STATUS_STATUS_0__MASK 0x00000003 +#define CNA_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t CNA_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << CNA_S_STATUS_STATUS_0__SHIFT) & CNA_S_STATUS_STATUS_0__MASK; +} + +#define REG_CNA_S_POINTER 0x00001004 +#define CNA_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define CNA_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t CNA_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_S_POINTER_RESERVED_0__SHIFT) & CNA_S_POINTER_RESERVED_0__MASK; +} +#define CNA_S_POINTER_EXECUTER__MASK 0x00010000 +#define CNA_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t CNA_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << CNA_S_POINTER_EXECUTER__SHIFT) & CNA_S_POINTER_EXECUTER__MASK; +} +#define CNA_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define CNA_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t CNA_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_S_POINTER_RESERVED_1__SHIFT) & CNA_S_POINTER_RESERVED_1__MASK; +} +#define CNA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define CNA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t CNA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << CNA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & CNA_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define CNA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define CNA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t CNA_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << CNA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & CNA_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define CNA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define CNA_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t CNA_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << CNA_S_POINTER_POINTER_PP_MODE__SHIFT) & CNA_S_POINTER_POINTER_PP_MODE__MASK; +} +#define CNA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define CNA_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t CNA_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << CNA_S_POINTER_EXECUTER_PP_EN__SHIFT) & CNA_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define CNA_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define CNA_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t CNA_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << CNA_S_POINTER_POINTER_PP_EN__SHIFT) & CNA_S_POINTER_POINTER_PP_EN__MASK; +} +#define CNA_S_POINTER_POINTER__MASK 0x00000001 +#define CNA_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t CNA_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << CNA_S_POINTER_POINTER__SHIFT) & CNA_S_POINTER_POINTER__MASK; +} + +#define REG_CNA_OPERATION_ENABLE 0x00001008 +#define CNA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define CNA_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t CNA_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_OPERATION_ENABLE_RESERVED_0__SHIFT) & CNA_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define CNA_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define CNA_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t CNA_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << CNA_OPERATION_ENABLE_OP_EN__SHIFT) & CNA_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_CNA_CONV_CON1 0x0000100c +#define CNA_CONV_CON1_RESERVED_0__MASK 0x80000000 +#define CNA_CONV_CON1_RESERVED_0__SHIFT 31 +static inline uint32_t CNA_CONV_CON1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_RESERVED_0__SHIFT) & CNA_CONV_CON1_RESERVED_0__MASK; +} +#define CNA_CONV_CON1_NONALIGN_DMA__MASK 0x40000000 +#define CNA_CONV_CON1_NONALIGN_DMA__SHIFT 30 +static inline uint32_t CNA_CONV_CON1_NONALIGN_DMA(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_NONALIGN_DMA__SHIFT) & CNA_CONV_CON1_NONALIGN_DMA__MASK; +} +#define CNA_CONV_CON1_GROUP_LINE_OFF__MASK 0x20000000 +#define CNA_CONV_CON1_GROUP_LINE_OFF__SHIFT 29 +static inline uint32_t CNA_CONV_CON1_GROUP_LINE_OFF(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_GROUP_LINE_OFF__SHIFT) & CNA_CONV_CON1_GROUP_LINE_OFF__MASK; +} +#define CNA_CONV_CON1_RESERVED_1__MASK 0x1ffe0000 +#define CNA_CONV_CON1_RESERVED_1__SHIFT 17 +static inline uint32_t CNA_CONV_CON1_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_RESERVED_1__SHIFT) & CNA_CONV_CON1_RESERVED_1__MASK; +} +#define CNA_CONV_CON1_DECONV__MASK 0x00010000 +#define CNA_CONV_CON1_DECONV__SHIFT 16 +static inline uint32_t CNA_CONV_CON1_DECONV(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_DECONV__SHIFT) & CNA_CONV_CON1_DECONV__MASK; +} +#define CNA_CONV_CON1_ARGB_IN__MASK 0x0000f000 +#define CNA_CONV_CON1_ARGB_IN__SHIFT 12 +static inline uint32_t CNA_CONV_CON1_ARGB_IN(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_ARGB_IN__SHIFT) & CNA_CONV_CON1_ARGB_IN__MASK; +} +#define CNA_CONV_CON1_RESERVED_2__MASK 0x00000c00 +#define CNA_CONV_CON1_RESERVED_2__SHIFT 10 +static inline uint32_t CNA_CONV_CON1_RESERVED_2(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_RESERVED_2__SHIFT) & CNA_CONV_CON1_RESERVED_2__MASK; +} +#define CNA_CONV_CON1_PROC_PRECISION__MASK 0x00000380 +#define CNA_CONV_CON1_PROC_PRECISION__SHIFT 7 +static inline uint32_t CNA_CONV_CON1_PROC_PRECISION(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_PROC_PRECISION__SHIFT) & CNA_CONV_CON1_PROC_PRECISION__MASK; +} +#define CNA_CONV_CON1_IN_PRECISION__MASK 0x00000070 +#define CNA_CONV_CON1_IN_PRECISION__SHIFT 4 +static inline uint32_t CNA_CONV_CON1_IN_PRECISION(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_IN_PRECISION__SHIFT) & CNA_CONV_CON1_IN_PRECISION__MASK; +} +#define CNA_CONV_CON1_CONV_MODE__MASK 0x0000000f +#define CNA_CONV_CON1_CONV_MODE__SHIFT 0 +static inline uint32_t CNA_CONV_CON1_CONV_MODE(uint32_t val) +{ + return ((val) << CNA_CONV_CON1_CONV_MODE__SHIFT) & CNA_CONV_CON1_CONV_MODE__MASK; +} + +#define REG_CNA_CONV_CON2 0x00001010 +#define CNA_CONV_CON2_RESERVED_0__MASK 0xff000000 +#define CNA_CONV_CON2_RESERVED_0__SHIFT 24 +static inline uint32_t CNA_CONV_CON2_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_RESERVED_0__SHIFT) & CNA_CONV_CON2_RESERVED_0__MASK; +} +#define CNA_CONV_CON2_KERNEL_GROUP__MASK 0x00ff0000 +#define CNA_CONV_CON2_KERNEL_GROUP__SHIFT 16 +static inline uint32_t CNA_CONV_CON2_KERNEL_GROUP(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_KERNEL_GROUP__SHIFT) & CNA_CONV_CON2_KERNEL_GROUP__MASK; +} +#define CNA_CONV_CON2_RESERVED_1__MASK 0x0000c000 +#define CNA_CONV_CON2_RESERVED_1__SHIFT 14 +static inline uint32_t CNA_CONV_CON2_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_RESERVED_1__SHIFT) & CNA_CONV_CON2_RESERVED_1__MASK; +} +#define CNA_CONV_CON2_FEATURE_GRAINS__MASK 0x00003ff0 +#define CNA_CONV_CON2_FEATURE_GRAINS__SHIFT 4 +static inline uint32_t CNA_CONV_CON2_FEATURE_GRAINS(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_FEATURE_GRAINS__SHIFT) & CNA_CONV_CON2_FEATURE_GRAINS__MASK; +} +#define CNA_CONV_CON2_RESERVED_2__MASK 0x00000008 +#define CNA_CONV_CON2_RESERVED_2__SHIFT 3 +static inline uint32_t CNA_CONV_CON2_RESERVED_2(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_RESERVED_2__SHIFT) & CNA_CONV_CON2_RESERVED_2__MASK; +} +#define CNA_CONV_CON2_CSC_WO_EN__MASK 0x00000004 +#define CNA_CONV_CON2_CSC_WO_EN__SHIFT 2 +static inline uint32_t CNA_CONV_CON2_CSC_WO_EN(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_CSC_WO_EN__SHIFT) & CNA_CONV_CON2_CSC_WO_EN__MASK; +} +#define CNA_CONV_CON2_CSC_DO_EN__MASK 0x00000002 +#define CNA_CONV_CON2_CSC_DO_EN__SHIFT 1 +static inline uint32_t CNA_CONV_CON2_CSC_DO_EN(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_CSC_DO_EN__SHIFT) & CNA_CONV_CON2_CSC_DO_EN__MASK; +} +#define CNA_CONV_CON2_CMD_FIFO_SRST__MASK 0x00000001 +#define CNA_CONV_CON2_CMD_FIFO_SRST__SHIFT 0 +static inline uint32_t CNA_CONV_CON2_CMD_FIFO_SRST(uint32_t val) +{ + return ((val) << CNA_CONV_CON2_CMD_FIFO_SRST__SHIFT) & CNA_CONV_CON2_CMD_FIFO_SRST__MASK; +} + +#define REG_CNA_CONV_CON3 0x00001014 +#define CNA_CONV_CON3_RESERVED_0__MASK 0x80000000 +#define CNA_CONV_CON3_RESERVED_0__SHIFT 31 +static inline uint32_t CNA_CONV_CON3_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_RESERVED_0__SHIFT) & CNA_CONV_CON3_RESERVED_0__MASK; +} +#define CNA_CONV_CON3_NN_MODE__MASK 0x70000000 +#define CNA_CONV_CON3_NN_MODE__SHIFT 28 +static inline uint32_t CNA_CONV_CON3_NN_MODE(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_NN_MODE__SHIFT) & CNA_CONV_CON3_NN_MODE__MASK; +} +#define CNA_CONV_CON3_RESERVED_1__MASK 0x0c000000 +#define CNA_CONV_CON3_RESERVED_1__SHIFT 26 +static inline uint32_t CNA_CONV_CON3_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_RESERVED_1__SHIFT) & CNA_CONV_CON3_RESERVED_1__MASK; +} +#define CNA_CONV_CON3_ATROUS_Y_DILATION__MASK 0x03e00000 +#define CNA_CONV_CON3_ATROUS_Y_DILATION__SHIFT 21 +static inline uint32_t CNA_CONV_CON3_ATROUS_Y_DILATION(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_ATROUS_Y_DILATION__SHIFT) & CNA_CONV_CON3_ATROUS_Y_DILATION__MASK; +} +#define CNA_CONV_CON3_ATROUS_X_DILATION__MASK 0x001f0000 +#define CNA_CONV_CON3_ATROUS_X_DILATION__SHIFT 16 +static inline uint32_t CNA_CONV_CON3_ATROUS_X_DILATION(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_ATROUS_X_DILATION__SHIFT) & CNA_CONV_CON3_ATROUS_X_DILATION__MASK; +} +#define CNA_CONV_CON3_RESERVED_2__MASK 0x0000c000 +#define CNA_CONV_CON3_RESERVED_2__SHIFT 14 +static inline uint32_t CNA_CONV_CON3_RESERVED_2(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_RESERVED_2__SHIFT) & CNA_CONV_CON3_RESERVED_2__MASK; +} +#define CNA_CONV_CON3_DECONV_Y_STRIDE__MASK 0x00003800 +#define CNA_CONV_CON3_DECONV_Y_STRIDE__SHIFT 11 +static inline uint32_t CNA_CONV_CON3_DECONV_Y_STRIDE(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_DECONV_Y_STRIDE__SHIFT) & CNA_CONV_CON3_DECONV_Y_STRIDE__MASK; +} +#define CNA_CONV_CON3_DECONV_X_STRIDE__MASK 0x00000700 +#define CNA_CONV_CON3_DECONV_X_STRIDE__SHIFT 8 +static inline uint32_t CNA_CONV_CON3_DECONV_X_STRIDE(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_DECONV_X_STRIDE__SHIFT) & CNA_CONV_CON3_DECONV_X_STRIDE__MASK; +} +#define CNA_CONV_CON3_RESERVED_3__MASK 0x000000c0 +#define CNA_CONV_CON3_RESERVED_3__SHIFT 6 +static inline uint32_t CNA_CONV_CON3_RESERVED_3(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_RESERVED_3__SHIFT) & CNA_CONV_CON3_RESERVED_3__MASK; +} +#define CNA_CONV_CON3_CONV_Y_STRIDE__MASK 0x00000038 +#define CNA_CONV_CON3_CONV_Y_STRIDE__SHIFT 3 +static inline uint32_t CNA_CONV_CON3_CONV_Y_STRIDE(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_CONV_Y_STRIDE__SHIFT) & CNA_CONV_CON3_CONV_Y_STRIDE__MASK; +} +#define CNA_CONV_CON3_CONV_X_STRIDE__MASK 0x00000007 +#define CNA_CONV_CON3_CONV_X_STRIDE__SHIFT 0 +static inline uint32_t CNA_CONV_CON3_CONV_X_STRIDE(uint32_t val) +{ + return ((val) << CNA_CONV_CON3_CONV_X_STRIDE__SHIFT) & CNA_CONV_CON3_CONV_X_STRIDE__MASK; +} + +#define REG_CNA_DATA_SIZE0 0x00001020 +#define CNA_DATA_SIZE0_RESERVED_0__MASK 0xf8000000 +#define CNA_DATA_SIZE0_RESERVED_0__SHIFT 27 +static inline uint32_t CNA_DATA_SIZE0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE0_RESERVED_0__SHIFT) & CNA_DATA_SIZE0_RESERVED_0__MASK; +} +#define CNA_DATA_SIZE0_DATAIN_WIDTH__MASK 0x07ff0000 +#define CNA_DATA_SIZE0_DATAIN_WIDTH__SHIFT 16 +static inline uint32_t CNA_DATA_SIZE0_DATAIN_WIDTH(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE0_DATAIN_WIDTH__SHIFT) & CNA_DATA_SIZE0_DATAIN_WIDTH__MASK; +} +#define CNA_DATA_SIZE0_RESERVED_1__MASK 0x0000f800 +#define CNA_DATA_SIZE0_RESERVED_1__SHIFT 11 +static inline uint32_t CNA_DATA_SIZE0_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE0_RESERVED_1__SHIFT) & CNA_DATA_SIZE0_RESERVED_1__MASK; +} +#define CNA_DATA_SIZE0_DATAIN_HEIGHT__MASK 0x000007ff +#define CNA_DATA_SIZE0_DATAIN_HEIGHT__SHIFT 0 +static inline uint32_t CNA_DATA_SIZE0_DATAIN_HEIGHT(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE0_DATAIN_HEIGHT__SHIFT) & CNA_DATA_SIZE0_DATAIN_HEIGHT__MASK; +} + +#define REG_CNA_DATA_SIZE1 0x00001024 +#define CNA_DATA_SIZE1_RESERVED_0__MASK 0xc0000000 +#define CNA_DATA_SIZE1_RESERVED_0__SHIFT 30 +static inline uint32_t CNA_DATA_SIZE1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE1_RESERVED_0__SHIFT) & CNA_DATA_SIZE1_RESERVED_0__MASK; +} +#define CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__MASK 0x3fff0000 +#define CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__SHIFT 16 +static inline uint32_t CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__SHIFT) & CNA_DATA_SIZE1_DATAIN_CHANNEL_REAL__MASK; +} +#define CNA_DATA_SIZE1_DATAIN_CHANNEL__MASK 0x0000ffff +#define CNA_DATA_SIZE1_DATAIN_CHANNEL__SHIFT 0 +static inline uint32_t CNA_DATA_SIZE1_DATAIN_CHANNEL(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE1_DATAIN_CHANNEL__SHIFT) & CNA_DATA_SIZE1_DATAIN_CHANNEL__MASK; +} + +#define REG_CNA_DATA_SIZE2 0x00001028 +#define CNA_DATA_SIZE2_RESERVED_0__MASK 0xfffff800 +#define CNA_DATA_SIZE2_RESERVED_0__SHIFT 11 +static inline uint32_t CNA_DATA_SIZE2_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE2_RESERVED_0__SHIFT) & CNA_DATA_SIZE2_RESERVED_0__MASK; +} +#define CNA_DATA_SIZE2_DATAOUT_WIDTH__MASK 0x000007ff +#define CNA_DATA_SIZE2_DATAOUT_WIDTH__SHIFT 0 +static inline uint32_t CNA_DATA_SIZE2_DATAOUT_WIDTH(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE2_DATAOUT_WIDTH__SHIFT) & CNA_DATA_SIZE2_DATAOUT_WIDTH__MASK; +} + +#define REG_CNA_DATA_SIZE3 0x0000102c +#define CNA_DATA_SIZE3_RESERVED_0__MASK 0xff000000 +#define CNA_DATA_SIZE3_RESERVED_0__SHIFT 24 +static inline uint32_t CNA_DATA_SIZE3_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE3_RESERVED_0__SHIFT) & CNA_DATA_SIZE3_RESERVED_0__MASK; +} +#define CNA_DATA_SIZE3_SURF_MODE__MASK 0x00c00000 +#define CNA_DATA_SIZE3_SURF_MODE__SHIFT 22 +static inline uint32_t CNA_DATA_SIZE3_SURF_MODE(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE3_SURF_MODE__SHIFT) & CNA_DATA_SIZE3_SURF_MODE__MASK; +} +#define CNA_DATA_SIZE3_DATAOUT_ATOMICS__MASK 0x003fffff +#define CNA_DATA_SIZE3_DATAOUT_ATOMICS__SHIFT 0 +static inline uint32_t CNA_DATA_SIZE3_DATAOUT_ATOMICS(uint32_t val) +{ + return ((val) << CNA_DATA_SIZE3_DATAOUT_ATOMICS__SHIFT) & CNA_DATA_SIZE3_DATAOUT_ATOMICS__MASK; +} + +#define REG_CNA_WEIGHT_SIZE0 0x00001030 +#define CNA_WEIGHT_SIZE0_WEIGHT_BYTES__MASK 0xffffffff +#define CNA_WEIGHT_SIZE0_WEIGHT_BYTES__SHIFT 0 +static inline uint32_t CNA_WEIGHT_SIZE0_WEIGHT_BYTES(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE0_WEIGHT_BYTES__SHIFT) & CNA_WEIGHT_SIZE0_WEIGHT_BYTES__MASK; +} + +#define REG_CNA_WEIGHT_SIZE1 0x00001034 +#define CNA_WEIGHT_SIZE1_RESERVED_0__MASK 0xfff80000 +#define CNA_WEIGHT_SIZE1_RESERVED_0__SHIFT 19 +static inline uint32_t CNA_WEIGHT_SIZE1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE1_RESERVED_0__SHIFT) & CNA_WEIGHT_SIZE1_RESERVED_0__MASK; +} +#define CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__MASK 0x0007ffff +#define CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__SHIFT 0 +static inline uint32_t CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__SHIFT) & CNA_WEIGHT_SIZE1_WEIGHT_BYTES_PER_KERNEL__MASK; +} + +#define REG_CNA_WEIGHT_SIZE2 0x00001038 +#define CNA_WEIGHT_SIZE2_RESERVED_0__MASK 0xe0000000 +#define CNA_WEIGHT_SIZE2_RESERVED_0__SHIFT 29 +static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_RESERVED_0__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_0__MASK; +} +#define CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__MASK 0x1f000000 +#define CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__SHIFT 24 +static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_WIDTH(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_WIDTH__MASK; +} +#define CNA_WEIGHT_SIZE2_RESERVED_1__MASK 0x00e00000 +#define CNA_WEIGHT_SIZE2_RESERVED_1__SHIFT 21 +static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_RESERVED_1__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_1__MASK; +} +#define CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__MASK 0x001f0000 +#define CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__SHIFT 16 +static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_HEIGHT__MASK; +} +#define CNA_WEIGHT_SIZE2_RESERVED_2__MASK 0x0000c000 +#define CNA_WEIGHT_SIZE2_RESERVED_2__SHIFT 14 +static inline uint32_t CNA_WEIGHT_SIZE2_RESERVED_2(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_RESERVED_2__SHIFT) & CNA_WEIGHT_SIZE2_RESERVED_2__MASK; +} +#define CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__MASK 0x00003fff +#define CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__SHIFT 0 +static inline uint32_t CNA_WEIGHT_SIZE2_WEIGHT_KERNELS(uint32_t val) +{ + return ((val) << CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__SHIFT) & CNA_WEIGHT_SIZE2_WEIGHT_KERNELS__MASK; +} + +#define REG_CNA_CBUF_CON0 0x00001040 +#define CNA_CBUF_CON0_RESERVED_0__MASK 0xffffc000 +#define CNA_CBUF_CON0_RESERVED_0__SHIFT 14 +static inline uint32_t CNA_CBUF_CON0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_RESERVED_0__SHIFT) & CNA_CBUF_CON0_RESERVED_0__MASK; +} +#define CNA_CBUF_CON0_WEIGHT_REUSE__MASK 0x00002000 +#define CNA_CBUF_CON0_WEIGHT_REUSE__SHIFT 13 +static inline uint32_t CNA_CBUF_CON0_WEIGHT_REUSE(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_WEIGHT_REUSE__SHIFT) & CNA_CBUF_CON0_WEIGHT_REUSE__MASK; +} +#define CNA_CBUF_CON0_DATA_REUSE__MASK 0x00001000 +#define CNA_CBUF_CON0_DATA_REUSE__SHIFT 12 +static inline uint32_t CNA_CBUF_CON0_DATA_REUSE(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_DATA_REUSE__SHIFT) & CNA_CBUF_CON0_DATA_REUSE__MASK; +} +#define CNA_CBUF_CON0_RESERVED_1__MASK 0x00000800 +#define CNA_CBUF_CON0_RESERVED_1__SHIFT 11 +static inline uint32_t CNA_CBUF_CON0_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_RESERVED_1__SHIFT) & CNA_CBUF_CON0_RESERVED_1__MASK; +} +#define CNA_CBUF_CON0_FC_DATA_BANK__MASK 0x00000700 +#define CNA_CBUF_CON0_FC_DATA_BANK__SHIFT 8 +static inline uint32_t CNA_CBUF_CON0_FC_DATA_BANK(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_FC_DATA_BANK__SHIFT) & CNA_CBUF_CON0_FC_DATA_BANK__MASK; +} +#define CNA_CBUF_CON0_WEIGHT_BANK__MASK 0x000000f0 +#define CNA_CBUF_CON0_WEIGHT_BANK__SHIFT 4 +static inline uint32_t CNA_CBUF_CON0_WEIGHT_BANK(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_WEIGHT_BANK__SHIFT) & CNA_CBUF_CON0_WEIGHT_BANK__MASK; +} +#define CNA_CBUF_CON0_DATA_BANK__MASK 0x0000000f +#define CNA_CBUF_CON0_DATA_BANK__SHIFT 0 +static inline uint32_t CNA_CBUF_CON0_DATA_BANK(uint32_t val) +{ + return ((val) << CNA_CBUF_CON0_DATA_BANK__SHIFT) & CNA_CBUF_CON0_DATA_BANK__MASK; +} + +#define REG_CNA_CBUF_CON1 0x00001044 +#define CNA_CBUF_CON1_RESERVED_0__MASK 0xffffc000 +#define CNA_CBUF_CON1_RESERVED_0__SHIFT 14 +static inline uint32_t CNA_CBUF_CON1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CBUF_CON1_RESERVED_0__SHIFT) & CNA_CBUF_CON1_RESERVED_0__MASK; +} +#define CNA_CBUF_CON1_DATA_ENTRIES__MASK 0x00003fff +#define CNA_CBUF_CON1_DATA_ENTRIES__SHIFT 0 +static inline uint32_t CNA_CBUF_CON1_DATA_ENTRIES(uint32_t val) +{ + return ((val) << CNA_CBUF_CON1_DATA_ENTRIES__SHIFT) & CNA_CBUF_CON1_DATA_ENTRIES__MASK; +} + +#define REG_CNA_CVT_CON0 0x0000104c +#define CNA_CVT_CON0_RESERVED_0__MASK 0xf0000000 +#define CNA_CVT_CON0_RESERVED_0__SHIFT 28 +static inline uint32_t CNA_CVT_CON0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_RESERVED_0__SHIFT) & CNA_CVT_CON0_RESERVED_0__MASK; +} +#define CNA_CVT_CON0_CVT_TRUNCATE_3__MASK 0x0fc00000 +#define CNA_CVT_CON0_CVT_TRUNCATE_3__SHIFT 22 +static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_3(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_3__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_3__MASK; +} +#define CNA_CVT_CON0_CVT_TRUNCATE_2__MASK 0x003f0000 +#define CNA_CVT_CON0_CVT_TRUNCATE_2__SHIFT 16 +static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_2(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_2__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_2__MASK; +} +#define CNA_CVT_CON0_CVT_TRUNCATE_1__MASK 0x0000fc00 +#define CNA_CVT_CON0_CVT_TRUNCATE_1__SHIFT 10 +static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_1(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_1__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_1__MASK; +} +#define CNA_CVT_CON0_CVT_TRUNCATE_0__MASK 0x000003f0 +#define CNA_CVT_CON0_CVT_TRUNCATE_0__SHIFT 4 +static inline uint32_t CNA_CVT_CON0_CVT_TRUNCATE_0(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_TRUNCATE_0__SHIFT) & CNA_CVT_CON0_CVT_TRUNCATE_0__MASK; +} +#define CNA_CVT_CON0_DATA_SIGN__MASK 0x00000008 +#define CNA_CVT_CON0_DATA_SIGN__SHIFT 3 +static inline uint32_t CNA_CVT_CON0_DATA_SIGN(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_DATA_SIGN__SHIFT) & CNA_CVT_CON0_DATA_SIGN__MASK; +} +#define CNA_CVT_CON0_ROUND_TYPE__MASK 0x00000004 +#define CNA_CVT_CON0_ROUND_TYPE__SHIFT 2 +static inline uint32_t CNA_CVT_CON0_ROUND_TYPE(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_ROUND_TYPE__SHIFT) & CNA_CVT_CON0_ROUND_TYPE__MASK; +} +#define CNA_CVT_CON0_CVT_TYPE__MASK 0x00000002 +#define CNA_CVT_CON0_CVT_TYPE__SHIFT 1 +static inline uint32_t CNA_CVT_CON0_CVT_TYPE(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_TYPE__SHIFT) & CNA_CVT_CON0_CVT_TYPE__MASK; +} +#define CNA_CVT_CON0_CVT_BYPASS__MASK 0x00000001 +#define CNA_CVT_CON0_CVT_BYPASS__SHIFT 0 +static inline uint32_t CNA_CVT_CON0_CVT_BYPASS(uint32_t val) +{ + return ((val) << CNA_CVT_CON0_CVT_BYPASS__SHIFT) & CNA_CVT_CON0_CVT_BYPASS__MASK; +} + +#define REG_CNA_CVT_CON1 0x00001050 +#define CNA_CVT_CON1_CVT_SCALE0__MASK 0xffff0000 +#define CNA_CVT_CON1_CVT_SCALE0__SHIFT 16 +static inline uint32_t CNA_CVT_CON1_CVT_SCALE0(uint32_t val) +{ + return ((val) << CNA_CVT_CON1_CVT_SCALE0__SHIFT) & CNA_CVT_CON1_CVT_SCALE0__MASK; +} +#define CNA_CVT_CON1_CVT_OFFSET0__MASK 0x0000ffff +#define CNA_CVT_CON1_CVT_OFFSET0__SHIFT 0 +static inline uint32_t CNA_CVT_CON1_CVT_OFFSET0(uint32_t val) +{ + return ((val) << CNA_CVT_CON1_CVT_OFFSET0__SHIFT) & CNA_CVT_CON1_CVT_OFFSET0__MASK; +} + +#define REG_CNA_CVT_CON2 0x00001054 +#define CNA_CVT_CON2_CVT_SCALE1__MASK 0xffff0000 +#define CNA_CVT_CON2_CVT_SCALE1__SHIFT 16 +static inline uint32_t CNA_CVT_CON2_CVT_SCALE1(uint32_t val) +{ + return ((val) << CNA_CVT_CON2_CVT_SCALE1__SHIFT) & CNA_CVT_CON2_CVT_SCALE1__MASK; +} +#define CNA_CVT_CON2_CVT_OFFSET1__MASK 0x0000ffff +#define CNA_CVT_CON2_CVT_OFFSET1__SHIFT 0 +static inline uint32_t CNA_CVT_CON2_CVT_OFFSET1(uint32_t val) +{ + return ((val) << CNA_CVT_CON2_CVT_OFFSET1__SHIFT) & CNA_CVT_CON2_CVT_OFFSET1__MASK; +} + +#define REG_CNA_CVT_CON3 0x00001058 +#define CNA_CVT_CON3_CVT_SCALE2__MASK 0xffff0000 +#define CNA_CVT_CON3_CVT_SCALE2__SHIFT 16 +static inline uint32_t CNA_CVT_CON3_CVT_SCALE2(uint32_t val) +{ + return ((val) << CNA_CVT_CON3_CVT_SCALE2__SHIFT) & CNA_CVT_CON3_CVT_SCALE2__MASK; +} +#define CNA_CVT_CON3_CVT_OFFSET2__MASK 0x0000ffff +#define CNA_CVT_CON3_CVT_OFFSET2__SHIFT 0 +static inline uint32_t CNA_CVT_CON3_CVT_OFFSET2(uint32_t val) +{ + return ((val) << CNA_CVT_CON3_CVT_OFFSET2__SHIFT) & CNA_CVT_CON3_CVT_OFFSET2__MASK; +} + +#define REG_CNA_CVT_CON4 0x0000105c +#define CNA_CVT_CON4_CVT_SCALE3__MASK 0xffff0000 +#define CNA_CVT_CON4_CVT_SCALE3__SHIFT 16 +static inline uint32_t CNA_CVT_CON4_CVT_SCALE3(uint32_t val) +{ + return ((val) << CNA_CVT_CON4_CVT_SCALE3__SHIFT) & CNA_CVT_CON4_CVT_SCALE3__MASK; +} +#define CNA_CVT_CON4_CVT_OFFSET3__MASK 0x0000ffff +#define CNA_CVT_CON4_CVT_OFFSET3__SHIFT 0 +static inline uint32_t CNA_CVT_CON4_CVT_OFFSET3(uint32_t val) +{ + return ((val) << CNA_CVT_CON4_CVT_OFFSET3__SHIFT) & CNA_CVT_CON4_CVT_OFFSET3__MASK; +} + +#define REG_CNA_FC_CON0 0x00001060 +#define CNA_FC_CON0_FC_SKIP_DATA__MASK 0xffff0000 +#define CNA_FC_CON0_FC_SKIP_DATA__SHIFT 16 +static inline uint32_t CNA_FC_CON0_FC_SKIP_DATA(uint32_t val) +{ + return ((val) << CNA_FC_CON0_FC_SKIP_DATA__SHIFT) & CNA_FC_CON0_FC_SKIP_DATA__MASK; +} +#define CNA_FC_CON0_RESERVED_0__MASK 0x0000fffe +#define CNA_FC_CON0_RESERVED_0__SHIFT 1 +static inline uint32_t CNA_FC_CON0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_FC_CON0_RESERVED_0__SHIFT) & CNA_FC_CON0_RESERVED_0__MASK; +} +#define CNA_FC_CON0_FC_SKIP_EN__MASK 0x00000001 +#define CNA_FC_CON0_FC_SKIP_EN__SHIFT 0 +static inline uint32_t CNA_FC_CON0_FC_SKIP_EN(uint32_t val) +{ + return ((val) << CNA_FC_CON0_FC_SKIP_EN__SHIFT) & CNA_FC_CON0_FC_SKIP_EN__MASK; +} + +#define REG_CNA_FC_CON1 0x00001064 +#define CNA_FC_CON1_RESERVED_0__MASK 0xfffe0000 +#define CNA_FC_CON1_RESERVED_0__SHIFT 17 +static inline uint32_t CNA_FC_CON1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_FC_CON1_RESERVED_0__SHIFT) & CNA_FC_CON1_RESERVED_0__MASK; +} +#define CNA_FC_CON1_DATA_OFFSET__MASK 0x0001ffff +#define CNA_FC_CON1_DATA_OFFSET__SHIFT 0 +static inline uint32_t CNA_FC_CON1_DATA_OFFSET(uint32_t val) +{ + return ((val) << CNA_FC_CON1_DATA_OFFSET__SHIFT) & CNA_FC_CON1_DATA_OFFSET__MASK; +} + +#define REG_CNA_PAD_CON0 0x00001068 +#define CNA_PAD_CON0_RESERVED_0__MASK 0xffffff00 +#define CNA_PAD_CON0_RESERVED_0__SHIFT 8 +static inline uint32_t CNA_PAD_CON0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_PAD_CON0_RESERVED_0__SHIFT) & CNA_PAD_CON0_RESERVED_0__MASK; +} +#define CNA_PAD_CON0_PAD_LEFT__MASK 0x000000f0 +#define CNA_PAD_CON0_PAD_LEFT__SHIFT 4 +static inline uint32_t CNA_PAD_CON0_PAD_LEFT(uint32_t val) +{ + return ((val) << CNA_PAD_CON0_PAD_LEFT__SHIFT) & CNA_PAD_CON0_PAD_LEFT__MASK; +} +#define CNA_PAD_CON0_PAD_TOP__MASK 0x0000000f +#define CNA_PAD_CON0_PAD_TOP__SHIFT 0 +static inline uint32_t CNA_PAD_CON0_PAD_TOP(uint32_t val) +{ + return ((val) << CNA_PAD_CON0_PAD_TOP__SHIFT) & CNA_PAD_CON0_PAD_TOP__MASK; +} + +#define REG_CNA_FEATURE_DATA_ADDR 0x00001070 +#define CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__MASK 0xffffffff +#define CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__SHIFT 0 +static inline uint32_t CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR(uint32_t val) +{ + return ((val) << CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__SHIFT) & CNA_FEATURE_DATA_ADDR_FEATURE_BASE_ADDR__MASK; +} + +#define REG_CNA_FC_CON2 0x00001074 +#define CNA_FC_CON2_RESERVED_0__MASK 0xfffe0000 +#define CNA_FC_CON2_RESERVED_0__SHIFT 17 +static inline uint32_t CNA_FC_CON2_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_FC_CON2_RESERVED_0__SHIFT) & CNA_FC_CON2_RESERVED_0__MASK; +} +#define CNA_FC_CON2_WEIGHT_OFFSET__MASK 0x0001ffff +#define CNA_FC_CON2_WEIGHT_OFFSET__SHIFT 0 +static inline uint32_t CNA_FC_CON2_WEIGHT_OFFSET(uint32_t val) +{ + return ((val) << CNA_FC_CON2_WEIGHT_OFFSET__SHIFT) & CNA_FC_CON2_WEIGHT_OFFSET__MASK; +} + +#define REG_CNA_DMA_CON0 0x00001078 +#define CNA_DMA_CON0_OV4K_BYPASS__MASK 0x80000000 +#define CNA_DMA_CON0_OV4K_BYPASS__SHIFT 31 +static inline uint32_t CNA_DMA_CON0_OV4K_BYPASS(uint32_t val) +{ + return ((val) << CNA_DMA_CON0_OV4K_BYPASS__SHIFT) & CNA_DMA_CON0_OV4K_BYPASS__MASK; +} +#define CNA_DMA_CON0_RESERVED_0__MASK 0x7ff00000 +#define CNA_DMA_CON0_RESERVED_0__SHIFT 20 +static inline uint32_t CNA_DMA_CON0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DMA_CON0_RESERVED_0__SHIFT) & CNA_DMA_CON0_RESERVED_0__MASK; +} +#define CNA_DMA_CON0_WEIGHT_BURST_LEN__MASK 0x000f0000 +#define CNA_DMA_CON0_WEIGHT_BURST_LEN__SHIFT 16 +static inline uint32_t CNA_DMA_CON0_WEIGHT_BURST_LEN(uint32_t val) +{ + return ((val) << CNA_DMA_CON0_WEIGHT_BURST_LEN__SHIFT) & CNA_DMA_CON0_WEIGHT_BURST_LEN__MASK; +} +#define CNA_DMA_CON0_RESERVED_1__MASK 0x0000fff0 +#define CNA_DMA_CON0_RESERVED_1__SHIFT 4 +static inline uint32_t CNA_DMA_CON0_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_DMA_CON0_RESERVED_1__SHIFT) & CNA_DMA_CON0_RESERVED_1__MASK; +} +#define CNA_DMA_CON0_DATA_BURST_LEN__MASK 0x0000000f +#define CNA_DMA_CON0_DATA_BURST_LEN__SHIFT 0 +static inline uint32_t CNA_DMA_CON0_DATA_BURST_LEN(uint32_t val) +{ + return ((val) << CNA_DMA_CON0_DATA_BURST_LEN__SHIFT) & CNA_DMA_CON0_DATA_BURST_LEN__MASK; +} + +#define REG_CNA_DMA_CON1 0x0000107c +#define CNA_DMA_CON1_RESERVED_0__MASK 0xf0000000 +#define CNA_DMA_CON1_RESERVED_0__SHIFT 28 +static inline uint32_t CNA_DMA_CON1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DMA_CON1_RESERVED_0__SHIFT) & CNA_DMA_CON1_RESERVED_0__MASK; +} +#define CNA_DMA_CON1_LINE_STRIDE__MASK 0x0fffffff +#define CNA_DMA_CON1_LINE_STRIDE__SHIFT 0 +static inline uint32_t CNA_DMA_CON1_LINE_STRIDE(uint32_t val) +{ + return ((val) << CNA_DMA_CON1_LINE_STRIDE__SHIFT) & CNA_DMA_CON1_LINE_STRIDE__MASK; +} + +#define REG_CNA_DMA_CON2 0x00001080 +#define CNA_DMA_CON2_RESERVED_0__MASK 0xf0000000 +#define CNA_DMA_CON2_RESERVED_0__SHIFT 28 +static inline uint32_t CNA_DMA_CON2_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DMA_CON2_RESERVED_0__SHIFT) & CNA_DMA_CON2_RESERVED_0__MASK; +} +#define CNA_DMA_CON2_SURF_STRIDE__MASK 0x0fffffff +#define CNA_DMA_CON2_SURF_STRIDE__SHIFT 0 +static inline uint32_t CNA_DMA_CON2_SURF_STRIDE(uint32_t val) +{ + return ((val) << CNA_DMA_CON2_SURF_STRIDE__SHIFT) & CNA_DMA_CON2_SURF_STRIDE__MASK; +} + +#define REG_CNA_FC_DATA_SIZE0 0x00001084 +#define CNA_FC_DATA_SIZE0_RESERVED_0__MASK 0xc0000000 +#define CNA_FC_DATA_SIZE0_RESERVED_0__SHIFT 30 +static inline uint32_t CNA_FC_DATA_SIZE0_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE0_RESERVED_0__SHIFT) & CNA_FC_DATA_SIZE0_RESERVED_0__MASK; +} +#define CNA_FC_DATA_SIZE0_DMA_WIDTH__MASK 0x3fff0000 +#define CNA_FC_DATA_SIZE0_DMA_WIDTH__SHIFT 16 +static inline uint32_t CNA_FC_DATA_SIZE0_DMA_WIDTH(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE0_DMA_WIDTH__SHIFT) & CNA_FC_DATA_SIZE0_DMA_WIDTH__MASK; +} +#define CNA_FC_DATA_SIZE0_RESERVED_1__MASK 0x0000f800 +#define CNA_FC_DATA_SIZE0_RESERVED_1__SHIFT 11 +static inline uint32_t CNA_FC_DATA_SIZE0_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE0_RESERVED_1__SHIFT) & CNA_FC_DATA_SIZE0_RESERVED_1__MASK; +} +#define CNA_FC_DATA_SIZE0_DMA_HEIGHT__MASK 0x000007ff +#define CNA_FC_DATA_SIZE0_DMA_HEIGHT__SHIFT 0 +static inline uint32_t CNA_FC_DATA_SIZE0_DMA_HEIGHT(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE0_DMA_HEIGHT__SHIFT) & CNA_FC_DATA_SIZE0_DMA_HEIGHT__MASK; +} + +#define REG_CNA_FC_DATA_SIZE1 0x00001088 +#define CNA_FC_DATA_SIZE1_RESERVED_0__MASK 0xffff0000 +#define CNA_FC_DATA_SIZE1_RESERVED_0__SHIFT 16 +static inline uint32_t CNA_FC_DATA_SIZE1_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE1_RESERVED_0__SHIFT) & CNA_FC_DATA_SIZE1_RESERVED_0__MASK; +} +#define CNA_FC_DATA_SIZE1_DMA_CHANNEL__MASK 0x0000ffff +#define CNA_FC_DATA_SIZE1_DMA_CHANNEL__SHIFT 0 +static inline uint32_t CNA_FC_DATA_SIZE1_DMA_CHANNEL(uint32_t val) +{ + return ((val) << CNA_FC_DATA_SIZE1_DMA_CHANNEL__SHIFT) & CNA_FC_DATA_SIZE1_DMA_CHANNEL__MASK; +} + +#define REG_CNA_CLK_GATE 0x00001090 +#define CNA_CLK_GATE_RESERVED_0__MASK 0xffffffe0 +#define CNA_CLK_GATE_RESERVED_0__SHIFT 5 +static inline uint32_t CNA_CLK_GATE_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_RESERVED_0__SHIFT) & CNA_CLK_GATE_RESERVED_0__MASK; +} +#define CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__MASK 0x00000010 +#define CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__SHIFT 4 +static inline uint32_t CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CBUF_CS_DISABLE_CLKGATE__MASK; +} +#define CNA_CLK_GATE_RESERVED_1__MASK 0x00000008 +#define CNA_CLK_GATE_RESERVED_1__SHIFT 3 +static inline uint32_t CNA_CLK_GATE_RESERVED_1(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_RESERVED_1__SHIFT) & CNA_CLK_GATE_RESERVED_1__MASK; +} +#define CNA_CLK_GATE_CSC_DISABLE_CLKGATE__MASK 0x00000004 +#define CNA_CLK_GATE_CSC_DISABLE_CLKGATE__SHIFT 2 +static inline uint32_t CNA_CLK_GATE_CSC_DISABLE_CLKGATE(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_CSC_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CSC_DISABLE_CLKGATE__MASK; +} +#define CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__MASK 0x00000002 +#define CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__SHIFT 1 +static inline uint32_t CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CNA_WEIGHT_DISABLE_CLKGATE__MASK; +} +#define CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__MASK 0x00000001 +#define CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__SHIFT 0 +static inline uint32_t CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE(uint32_t val) +{ + return ((val) << CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__SHIFT) & CNA_CLK_GATE_CNA_FEATURE_DISABLE_CLKGATE__MASK; +} + +#define REG_CNA_DCOMP_CTRL 0x00001100 +#define CNA_DCOMP_CTRL_RESERVED_0__MASK 0xfffffff0 +#define CNA_DCOMP_CTRL_RESERVED_0__SHIFT 4 +static inline uint32_t CNA_DCOMP_CTRL_RESERVED_0(uint32_t val) +{ + return ((val) << CNA_DCOMP_CTRL_RESERVED_0__SHIFT) & CNA_DCOMP_CTRL_RESERVED_0__MASK; +} +#define CNA_DCOMP_CTRL_WT_DEC_BYPASS__MASK 0x00000008 +#define CNA_DCOMP_CTRL_WT_DEC_BYPASS__SHIFT 3 +static inline uint32_t CNA_DCOMP_CTRL_WT_DEC_BYPASS(uint32_t val) +{ + return ((val) << CNA_DCOMP_CTRL_WT_DEC_BYPASS__SHIFT) & CNA_DCOMP_CTRL_WT_DEC_BYPASS__MASK; +} +#define CNA_DCOMP_CTRL_DECOMP_CONTROL__MASK 0x00000007 +#define CNA_DCOMP_CTRL_DECOMP_CONTROL__SHIFT 0 +static inline uint32_t CNA_DCOMP_CTRL_DECOMP_CONTROL(uint32_t val) +{ + return ((val) << CNA_DCOMP_CTRL_DECOMP_CONTROL__SHIFT) & CNA_DCOMP_CTRL_DECOMP_CONTROL__MASK; +} + +#define REG_CNA_DCOMP_REGNUM 0x00001104 +#define CNA_DCOMP_REGNUM_DCOMP_REGNUM__MASK 0xffffffff +#define CNA_DCOMP_REGNUM_DCOMP_REGNUM__SHIFT 0 +static inline uint32_t CNA_DCOMP_REGNUM_DCOMP_REGNUM(uint32_t val) +{ + return ((val) << CNA_DCOMP_REGNUM_DCOMP_REGNUM__SHIFT) & CNA_DCOMP_REGNUM_DCOMP_REGNUM__MASK; +} + +#define REG_CNA_DCOMP_ADDR0 0x00001110 +#define CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__MASK 0xffffffff +#define CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__SHIFT 0 +static inline uint32_t CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0(uint32_t val) +{ + return ((val) << CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__SHIFT) & CNA_DCOMP_ADDR0_DECOMPRESS_ADDR0__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT0 0x00001140 +#define CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__SHIFT) & CNA_DCOMP_AMOUNT0_DCOMP_AMOUNT0__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT1 0x00001144 +#define CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__SHIFT) & CNA_DCOMP_AMOUNT1_DCOMP_AMOUNT1__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT2 0x00001148 +#define CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__SHIFT) & CNA_DCOMP_AMOUNT2_DCOMP_AMOUNT2__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT3 0x0000114c +#define CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__SHIFT) & CNA_DCOMP_AMOUNT3_DCOMP_AMOUNT3__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT4 0x00001150 +#define CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__SHIFT) & CNA_DCOMP_AMOUNT4_DCOMP_AMOUNT4__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT5 0x00001154 +#define CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__SHIFT) & CNA_DCOMP_AMOUNT5_DCOMP_AMOUNT5__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT6 0x00001158 +#define CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__SHIFT) & CNA_DCOMP_AMOUNT6_DCOMP_AMOUNT6__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT7 0x0000115c +#define CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__SHIFT) & CNA_DCOMP_AMOUNT7_DCOMP_AMOUNT7__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT8 0x00001160 +#define CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__SHIFT) & CNA_DCOMP_AMOUNT8_DCOMP_AMOUNT8__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT9 0x00001164 +#define CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__SHIFT) & CNA_DCOMP_AMOUNT9_DCOMP_AMOUNT9__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT10 0x00001168 +#define CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__SHIFT) & CNA_DCOMP_AMOUNT10_DCOMP_AMOUNT10__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT11 0x0000116c +#define CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__SHIFT) & CNA_DCOMP_AMOUNT11_DCOMP_AMOUNT11__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT12 0x00001170 +#define CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__SHIFT) & CNA_DCOMP_AMOUNT12_DCOMP_AMOUNT12__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT13 0x00001174 +#define CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__SHIFT) & CNA_DCOMP_AMOUNT13_DCOMP_AMOUNT13__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT14 0x00001178 +#define CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__SHIFT) & CNA_DCOMP_AMOUNT14_DCOMP_AMOUNT14__MASK; +} + +#define REG_CNA_DCOMP_AMOUNT15 0x0000117c +#define CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__MASK 0xffffffff +#define CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__SHIFT 0 +static inline uint32_t CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15(uint32_t val) +{ + return ((val) << CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__SHIFT) & CNA_DCOMP_AMOUNT15_DCOMP_AMOUNT15__MASK; +} + +#define REG_CNA_CVT_CON5 0x00001180 +#define CNA_CVT_CON5_PER_CHANNEL_CVT_EN__MASK 0xffffffff +#define CNA_CVT_CON5_PER_CHANNEL_CVT_EN__SHIFT 0 +static inline uint32_t CNA_CVT_CON5_PER_CHANNEL_CVT_EN(uint32_t val) +{ + return ((val) << CNA_CVT_CON5_PER_CHANNEL_CVT_EN__SHIFT) & CNA_CVT_CON5_PER_CHANNEL_CVT_EN__MASK; +} + +#define REG_CNA_PAD_CON1 0x00001184 +#define CNA_PAD_CON1_PAD_VALUE__MASK 0xffffffff +#define CNA_PAD_CON1_PAD_VALUE__SHIFT 0 +static inline uint32_t CNA_PAD_CON1_PAD_VALUE(uint32_t val) +{ + return ((val) << CNA_PAD_CON1_PAD_VALUE__SHIFT) & CNA_PAD_CON1_PAD_VALUE__MASK; +} + +#define REG_CORE_S_STATUS 0x00003000 +#define CORE_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define CORE_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t CORE_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_S_STATUS_RESERVED_0__SHIFT) & CORE_S_STATUS_RESERVED_0__MASK; +} +#define CORE_S_STATUS_STATUS_1__MASK 0x00030000 +#define CORE_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t CORE_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << CORE_S_STATUS_STATUS_1__SHIFT) & CORE_S_STATUS_STATUS_1__MASK; +} +#define CORE_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define CORE_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t CORE_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << CORE_S_STATUS_RESERVED_1__SHIFT) & CORE_S_STATUS_RESERVED_1__MASK; +} +#define CORE_S_STATUS_STATUS_0__MASK 0x00000003 +#define CORE_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t CORE_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << CORE_S_STATUS_STATUS_0__SHIFT) & CORE_S_STATUS_STATUS_0__MASK; +} + +#define REG_CORE_S_POINTER 0x00003004 +#define CORE_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define CORE_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t CORE_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_S_POINTER_RESERVED_0__SHIFT) & CORE_S_POINTER_RESERVED_0__MASK; +} +#define CORE_S_POINTER_EXECUTER__MASK 0x00010000 +#define CORE_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t CORE_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << CORE_S_POINTER_EXECUTER__SHIFT) & CORE_S_POINTER_EXECUTER__MASK; +} +#define CORE_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define CORE_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t CORE_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << CORE_S_POINTER_RESERVED_1__SHIFT) & CORE_S_POINTER_RESERVED_1__MASK; +} +#define CORE_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define CORE_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t CORE_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << CORE_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & CORE_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define CORE_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define CORE_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t CORE_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << CORE_S_POINTER_POINTER_PP_CLEAR__SHIFT) & CORE_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define CORE_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define CORE_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t CORE_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << CORE_S_POINTER_POINTER_PP_MODE__SHIFT) & CORE_S_POINTER_POINTER_PP_MODE__MASK; +} +#define CORE_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define CORE_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t CORE_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << CORE_S_POINTER_EXECUTER_PP_EN__SHIFT) & CORE_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define CORE_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define CORE_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t CORE_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << CORE_S_POINTER_POINTER_PP_EN__SHIFT) & CORE_S_POINTER_POINTER_PP_EN__MASK; +} +#define CORE_S_POINTER_POINTER__MASK 0x00000001 +#define CORE_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t CORE_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << CORE_S_POINTER_POINTER__SHIFT) & CORE_S_POINTER_POINTER__MASK; +} + +#define REG_CORE_OPERATION_ENABLE 0x00003008 +#define CORE_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define CORE_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t CORE_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_OPERATION_ENABLE_RESERVED_0__SHIFT) & CORE_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define CORE_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define CORE_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t CORE_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << CORE_OPERATION_ENABLE_OP_EN__SHIFT) & CORE_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_CORE_MAC_GATING 0x0000300c +#define CORE_MAC_GATING_RESERVED_0__MASK 0xf8000000 +#define CORE_MAC_GATING_RESERVED_0__SHIFT 27 +static inline uint32_t CORE_MAC_GATING_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_MAC_GATING_RESERVED_0__SHIFT) & CORE_MAC_GATING_RESERVED_0__MASK; +} +#define CORE_MAC_GATING_SLCG_OP_EN__MASK 0x07ffffff +#define CORE_MAC_GATING_SLCG_OP_EN__SHIFT 0 +static inline uint32_t CORE_MAC_GATING_SLCG_OP_EN(uint32_t val) +{ + return ((val) << CORE_MAC_GATING_SLCG_OP_EN__SHIFT) & CORE_MAC_GATING_SLCG_OP_EN__MASK; +} + +#define REG_CORE_MISC_CFG 0x00003010 +#define CORE_MISC_CFG_RESERVED_0__MASK 0xfff00000 +#define CORE_MISC_CFG_RESERVED_0__SHIFT 20 +static inline uint32_t CORE_MISC_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_RESERVED_0__SHIFT) & CORE_MISC_CFG_RESERVED_0__MASK; +} +#define CORE_MISC_CFG_SOFT_GATING__MASK 0x000fc000 +#define CORE_MISC_CFG_SOFT_GATING__SHIFT 14 +static inline uint32_t CORE_MISC_CFG_SOFT_GATING(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_SOFT_GATING__SHIFT) & CORE_MISC_CFG_SOFT_GATING__MASK; +} +#define CORE_MISC_CFG_RESERVED_1__MASK 0x00003800 +#define CORE_MISC_CFG_RESERVED_1__SHIFT 11 +static inline uint32_t CORE_MISC_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_RESERVED_1__SHIFT) & CORE_MISC_CFG_RESERVED_1__MASK; +} +#define CORE_MISC_CFG_PROC_PRECISION__MASK 0x00000700 +#define CORE_MISC_CFG_PROC_PRECISION__SHIFT 8 +static inline uint32_t CORE_MISC_CFG_PROC_PRECISION(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_PROC_PRECISION__SHIFT) & CORE_MISC_CFG_PROC_PRECISION__MASK; +} +#define CORE_MISC_CFG_RESERVED_2__MASK 0x000000fc +#define CORE_MISC_CFG_RESERVED_2__SHIFT 2 +static inline uint32_t CORE_MISC_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_RESERVED_2__SHIFT) & CORE_MISC_CFG_RESERVED_2__MASK; +} +#define CORE_MISC_CFG_DW_EN__MASK 0x00000002 +#define CORE_MISC_CFG_DW_EN__SHIFT 1 +static inline uint32_t CORE_MISC_CFG_DW_EN(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_DW_EN__SHIFT) & CORE_MISC_CFG_DW_EN__MASK; +} +#define CORE_MISC_CFG_QD_EN__MASK 0x00000001 +#define CORE_MISC_CFG_QD_EN__SHIFT 0 +static inline uint32_t CORE_MISC_CFG_QD_EN(uint32_t val) +{ + return ((val) << CORE_MISC_CFG_QD_EN__SHIFT) & CORE_MISC_CFG_QD_EN__MASK; +} + +#define REG_CORE_DATAOUT_SIZE_0 0x00003014 +#define CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__MASK 0xffff0000 +#define CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__SHIFT 16 +static inline uint32_t CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT(uint32_t val) +{ + return ((val) << CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__SHIFT) & CORE_DATAOUT_SIZE_0_DATAOUT_HEIGHT__MASK; +} +#define CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__MASK 0x0000ffff +#define CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__SHIFT 0 +static inline uint32_t CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH(uint32_t val) +{ + return ((val) << CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__SHIFT) & CORE_DATAOUT_SIZE_0_DATAOUT_WIDTH__MASK; +} + +#define REG_CORE_DATAOUT_SIZE_1 0x00003018 +#define CORE_DATAOUT_SIZE_1_RESERVED_0__MASK 0xffff0000 +#define CORE_DATAOUT_SIZE_1_RESERVED_0__SHIFT 16 +static inline uint32_t CORE_DATAOUT_SIZE_1_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_DATAOUT_SIZE_1_RESERVED_0__SHIFT) & CORE_DATAOUT_SIZE_1_RESERVED_0__MASK; +} +#define CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__MASK 0x0000ffff +#define CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__SHIFT 0 +static inline uint32_t CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL(uint32_t val) +{ + return ((val) << CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__SHIFT) & CORE_DATAOUT_SIZE_1_DATAOUT_CHANNEL__MASK; +} + +#define REG_CORE_CLIP_TRUNCATE 0x0000301c +#define CORE_CLIP_TRUNCATE_RESERVED_0__MASK 0xffffff80 +#define CORE_CLIP_TRUNCATE_RESERVED_0__SHIFT 7 +static inline uint32_t CORE_CLIP_TRUNCATE_RESERVED_0(uint32_t val) +{ + return ((val) << CORE_CLIP_TRUNCATE_RESERVED_0__SHIFT) & CORE_CLIP_TRUNCATE_RESERVED_0__MASK; +} +#define CORE_CLIP_TRUNCATE_ROUND_TYPE__MASK 0x00000040 +#define CORE_CLIP_TRUNCATE_ROUND_TYPE__SHIFT 6 +static inline uint32_t CORE_CLIP_TRUNCATE_ROUND_TYPE(uint32_t val) +{ + return ((val) << CORE_CLIP_TRUNCATE_ROUND_TYPE__SHIFT) & CORE_CLIP_TRUNCATE_ROUND_TYPE__MASK; +} +#define CORE_CLIP_TRUNCATE_RESERVED_1__MASK 0x00000020 +#define CORE_CLIP_TRUNCATE_RESERVED_1__SHIFT 5 +static inline uint32_t CORE_CLIP_TRUNCATE_RESERVED_1(uint32_t val) +{ + return ((val) << CORE_CLIP_TRUNCATE_RESERVED_1__SHIFT) & CORE_CLIP_TRUNCATE_RESERVED_1__MASK; +} +#define CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__MASK 0x0000001f +#define CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__SHIFT 0 +static inline uint32_t CORE_CLIP_TRUNCATE_CLIP_TRUNCATE(uint32_t val) +{ + return ((val) << CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__SHIFT) & CORE_CLIP_TRUNCATE_CLIP_TRUNCATE__MASK; +} + +#define REG_DPU_S_STATUS 0x00004000 +#define DPU_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define DPU_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t DPU_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_S_STATUS_RESERVED_0__SHIFT) & DPU_S_STATUS_RESERVED_0__MASK; +} +#define DPU_S_STATUS_STATUS_1__MASK 0x00030000 +#define DPU_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t DPU_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << DPU_S_STATUS_STATUS_1__SHIFT) & DPU_S_STATUS_STATUS_1__MASK; +} +#define DPU_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define DPU_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t DPU_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_S_STATUS_RESERVED_1__SHIFT) & DPU_S_STATUS_RESERVED_1__MASK; +} +#define DPU_S_STATUS_STATUS_0__MASK 0x00000003 +#define DPU_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t DPU_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << DPU_S_STATUS_STATUS_0__SHIFT) & DPU_S_STATUS_STATUS_0__MASK; +} + +#define REG_DPU_S_POINTER 0x00004004 +#define DPU_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define DPU_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t DPU_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_S_POINTER_RESERVED_0__SHIFT) & DPU_S_POINTER_RESERVED_0__MASK; +} +#define DPU_S_POINTER_EXECUTER__MASK 0x00010000 +#define DPU_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t DPU_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << DPU_S_POINTER_EXECUTER__SHIFT) & DPU_S_POINTER_EXECUTER__MASK; +} +#define DPU_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define DPU_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t DPU_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_S_POINTER_RESERVED_1__SHIFT) & DPU_S_POINTER_RESERVED_1__MASK; +} +#define DPU_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define DPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t DPU_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << DPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & DPU_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define DPU_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define DPU_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t DPU_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << DPU_S_POINTER_POINTER_PP_CLEAR__SHIFT) & DPU_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define DPU_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define DPU_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t DPU_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << DPU_S_POINTER_POINTER_PP_MODE__SHIFT) & DPU_S_POINTER_POINTER_PP_MODE__MASK; +} +#define DPU_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define DPU_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t DPU_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << DPU_S_POINTER_EXECUTER_PP_EN__SHIFT) & DPU_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define DPU_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define DPU_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t DPU_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << DPU_S_POINTER_POINTER_PP_EN__SHIFT) & DPU_S_POINTER_POINTER_PP_EN__MASK; +} +#define DPU_S_POINTER_POINTER__MASK 0x00000001 +#define DPU_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t DPU_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << DPU_S_POINTER_POINTER__SHIFT) & DPU_S_POINTER_POINTER__MASK; +} + +#define REG_DPU_OPERATION_ENABLE 0x00004008 +#define DPU_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define DPU_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t DPU_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_OPERATION_ENABLE_RESERVED_0__SHIFT) & DPU_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define DPU_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define DPU_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t DPU_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << DPU_OPERATION_ENABLE_OP_EN__SHIFT) & DPU_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_DPU_FEATURE_MODE_CFG 0x0000400c +#define DPU_FEATURE_MODE_CFG_COMB_USE__MASK 0x80000000 +#define DPU_FEATURE_MODE_CFG_COMB_USE__SHIFT 31 +static inline uint32_t DPU_FEATURE_MODE_CFG_COMB_USE(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_COMB_USE__SHIFT) & DPU_FEATURE_MODE_CFG_COMB_USE__MASK; +} +#define DPU_FEATURE_MODE_CFG_TP_EN__MASK 0x40000000 +#define DPU_FEATURE_MODE_CFG_TP_EN__SHIFT 30 +static inline uint32_t DPU_FEATURE_MODE_CFG_TP_EN(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_TP_EN__SHIFT) & DPU_FEATURE_MODE_CFG_TP_EN__MASK; +} +#define DPU_FEATURE_MODE_CFG_RGP_TYPE__MASK 0x3c000000 +#define DPU_FEATURE_MODE_CFG_RGP_TYPE__SHIFT 26 +static inline uint32_t DPU_FEATURE_MODE_CFG_RGP_TYPE(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_RGP_TYPE__SHIFT) & DPU_FEATURE_MODE_CFG_RGP_TYPE__MASK; +} +#define DPU_FEATURE_MODE_CFG_NONALIGN__MASK 0x02000000 +#define DPU_FEATURE_MODE_CFG_NONALIGN__SHIFT 25 +static inline uint32_t DPU_FEATURE_MODE_CFG_NONALIGN(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_NONALIGN__SHIFT) & DPU_FEATURE_MODE_CFG_NONALIGN__MASK; +} +#define DPU_FEATURE_MODE_CFG_SURF_LEN__MASK 0x01fffe00 +#define DPU_FEATURE_MODE_CFG_SURF_LEN__SHIFT 9 +static inline uint32_t DPU_FEATURE_MODE_CFG_SURF_LEN(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_SURF_LEN__SHIFT) & DPU_FEATURE_MODE_CFG_SURF_LEN__MASK; +} +#define DPU_FEATURE_MODE_CFG_BURST_LEN__MASK 0x000001e0 +#define DPU_FEATURE_MODE_CFG_BURST_LEN__SHIFT 5 +static inline uint32_t DPU_FEATURE_MODE_CFG_BURST_LEN(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_BURST_LEN__SHIFT) & DPU_FEATURE_MODE_CFG_BURST_LEN__MASK; +} +#define DPU_FEATURE_MODE_CFG_CONV_MODE__MASK 0x00000018 +#define DPU_FEATURE_MODE_CFG_CONV_MODE__SHIFT 3 +static inline uint32_t DPU_FEATURE_MODE_CFG_CONV_MODE(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_CONV_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_CONV_MODE__MASK; +} +#define DPU_FEATURE_MODE_CFG_OUTPUT_MODE__MASK 0x00000006 +#define DPU_FEATURE_MODE_CFG_OUTPUT_MODE__SHIFT 1 +static inline uint32_t DPU_FEATURE_MODE_CFG_OUTPUT_MODE(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_OUTPUT_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_OUTPUT_MODE__MASK; +} +#define DPU_FEATURE_MODE_CFG_FLYING_MODE__MASK 0x00000001 +#define DPU_FEATURE_MODE_CFG_FLYING_MODE__SHIFT 0 +static inline uint32_t DPU_FEATURE_MODE_CFG_FLYING_MODE(uint32_t val) +{ + return ((val) << DPU_FEATURE_MODE_CFG_FLYING_MODE__SHIFT) & DPU_FEATURE_MODE_CFG_FLYING_MODE__MASK; +} + +#define REG_DPU_DATA_FORMAT 0x00004010 +#define DPU_DATA_FORMAT_OUT_PRECISION__MASK 0xe0000000 +#define DPU_DATA_FORMAT_OUT_PRECISION__SHIFT 29 +static inline uint32_t DPU_DATA_FORMAT_OUT_PRECISION(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_OUT_PRECISION__SHIFT) & DPU_DATA_FORMAT_OUT_PRECISION__MASK; +} +#define DPU_DATA_FORMAT_IN_PRECISION__MASK 0x1c000000 +#define DPU_DATA_FORMAT_IN_PRECISION__SHIFT 26 +static inline uint32_t DPU_DATA_FORMAT_IN_PRECISION(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_IN_PRECISION__SHIFT) & DPU_DATA_FORMAT_IN_PRECISION__MASK; +} +#define DPU_DATA_FORMAT_EW_TRUNCATE_NEG__MASK 0x03ff0000 +#define DPU_DATA_FORMAT_EW_TRUNCATE_NEG__SHIFT 16 +static inline uint32_t DPU_DATA_FORMAT_EW_TRUNCATE_NEG(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_EW_TRUNCATE_NEG__SHIFT) & DPU_DATA_FORMAT_EW_TRUNCATE_NEG__MASK; +} +#define DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__MASK 0x0000fc00 +#define DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__SHIFT 10 +static inline uint32_t DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__SHIFT) & DPU_DATA_FORMAT_BN_MUL_SHIFT_VALUE_NEG__MASK; +} +#define DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__MASK 0x000003f0 +#define DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__SHIFT 4 +static inline uint32_t DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__SHIFT) & DPU_DATA_FORMAT_BS_MUL_SHIFT_VALUE_NEG__MASK; +} +#define DPU_DATA_FORMAT_MC_SURF_OUT__MASK 0x00000008 +#define DPU_DATA_FORMAT_MC_SURF_OUT__SHIFT 3 +static inline uint32_t DPU_DATA_FORMAT_MC_SURF_OUT(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_MC_SURF_OUT__SHIFT) & DPU_DATA_FORMAT_MC_SURF_OUT__MASK; +} +#define DPU_DATA_FORMAT_PROC_PRECISION__MASK 0x00000007 +#define DPU_DATA_FORMAT_PROC_PRECISION__SHIFT 0 +static inline uint32_t DPU_DATA_FORMAT_PROC_PRECISION(uint32_t val) +{ + return ((val) << DPU_DATA_FORMAT_PROC_PRECISION__SHIFT) & DPU_DATA_FORMAT_PROC_PRECISION__MASK; +} + +#define REG_DPU_OFFSET_PEND 0x00004014 +#define DPU_OFFSET_PEND_RESERVED_0__MASK 0xffff0000 +#define DPU_OFFSET_PEND_RESERVED_0__SHIFT 16 +static inline uint32_t DPU_OFFSET_PEND_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_OFFSET_PEND_RESERVED_0__SHIFT) & DPU_OFFSET_PEND_RESERVED_0__MASK; +} +#define DPU_OFFSET_PEND_OFFSET_PEND__MASK 0x0000ffff +#define DPU_OFFSET_PEND_OFFSET_PEND__SHIFT 0 +static inline uint32_t DPU_OFFSET_PEND_OFFSET_PEND(uint32_t val) +{ + return ((val) << DPU_OFFSET_PEND_OFFSET_PEND__SHIFT) & DPU_OFFSET_PEND_OFFSET_PEND__MASK; +} + +#define REG_DPU_DST_BASE_ADDR 0x00004020 +#define DPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK 0xffffffff +#define DPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT 0 +static inline uint32_t DPU_DST_BASE_ADDR_DST_BASE_ADDR(uint32_t val) +{ + return ((val) << DPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT) & DPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK; +} + +#define REG_DPU_DST_SURF_STRIDE 0x00004024 +#define DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK 0xfffffff0 +#define DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT 4 +static inline uint32_t DPU_DST_SURF_STRIDE_DST_SURF_STRIDE(uint32_t val) +{ + return ((val) << DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT) & DPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK; +} +#define DPU_DST_SURF_STRIDE_RESERVED_0__MASK 0x0000000f +#define DPU_DST_SURF_STRIDE_RESERVED_0__SHIFT 0 +static inline uint32_t DPU_DST_SURF_STRIDE_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_DST_SURF_STRIDE_RESERVED_0__SHIFT) & DPU_DST_SURF_STRIDE_RESERVED_0__MASK; +} + +#define REG_DPU_DATA_CUBE_WIDTH 0x00004030 +#define DPU_DATA_CUBE_WIDTH_RESERVED_0__MASK 0xffffe000 +#define DPU_DATA_CUBE_WIDTH_RESERVED_0__SHIFT 13 +static inline uint32_t DPU_DATA_CUBE_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_WIDTH_RESERVED_0__SHIFT) & DPU_DATA_CUBE_WIDTH_RESERVED_0__MASK; +} +#define DPU_DATA_CUBE_WIDTH_WIDTH__MASK 0x00001fff +#define DPU_DATA_CUBE_WIDTH_WIDTH__SHIFT 0 +static inline uint32_t DPU_DATA_CUBE_WIDTH_WIDTH(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_WIDTH_WIDTH__SHIFT) & DPU_DATA_CUBE_WIDTH_WIDTH__MASK; +} + +#define REG_DPU_DATA_CUBE_HEIGHT 0x00004034 +#define DPU_DATA_CUBE_HEIGHT_RESERVED_0__MASK 0xfe000000 +#define DPU_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT 25 +static inline uint32_t DPU_DATA_CUBE_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT) & DPU_DATA_CUBE_HEIGHT_RESERVED_0__MASK; +} +#define DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__MASK 0x01c00000 +#define DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__SHIFT 22 +static inline uint32_t DPU_DATA_CUBE_HEIGHT_MINMAX_CTL(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__SHIFT) & DPU_DATA_CUBE_HEIGHT_MINMAX_CTL__MASK; +} +#define DPU_DATA_CUBE_HEIGHT_RESERVED_1__MASK 0x003fe000 +#define DPU_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_DATA_CUBE_HEIGHT_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT) & DPU_DATA_CUBE_HEIGHT_RESERVED_1__MASK; +} +#define DPU_DATA_CUBE_HEIGHT_HEIGHT__MASK 0x00001fff +#define DPU_DATA_CUBE_HEIGHT_HEIGHT__SHIFT 0 +static inline uint32_t DPU_DATA_CUBE_HEIGHT_HEIGHT(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_HEIGHT_HEIGHT__SHIFT) & DPU_DATA_CUBE_HEIGHT_HEIGHT__MASK; +} + +#define REG_DPU_DATA_CUBE_NOTCH_ADDR 0x00004038 +#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__MASK 0xe0000000 +#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__SHIFT 29 +static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_0__MASK; +} +#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__MASK 0x1fff0000 +#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__SHIFT 16 +static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_1__MASK; +} +#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__MASK 0x0000e000 +#define DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_RESERVED_1__MASK; +} +#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__MASK 0x00001fff +#define DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__SHIFT 0 +static inline uint32_t DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__SHIFT) & DPU_DATA_CUBE_NOTCH_ADDR_NOTCH_ADDR_0__MASK; +} + +#define REG_DPU_DATA_CUBE_CHANNEL 0x0000403c +#define DPU_DATA_CUBE_CHANNEL_RESERVED_0__MASK 0xe0000000 +#define DPU_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT 29 +static inline uint32_t DPU_DATA_CUBE_CHANNEL_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT) & DPU_DATA_CUBE_CHANNEL_RESERVED_0__MASK; +} +#define DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__MASK 0x1fff0000 +#define DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__SHIFT 16 +static inline uint32_t DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__SHIFT) & DPU_DATA_CUBE_CHANNEL_ORIG_CHANNEL__MASK; +} +#define DPU_DATA_CUBE_CHANNEL_RESERVED_1__MASK 0x0000e000 +#define DPU_DATA_CUBE_CHANNEL_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_DATA_CUBE_CHANNEL_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_CHANNEL_RESERVED_1__SHIFT) & DPU_DATA_CUBE_CHANNEL_RESERVED_1__MASK; +} +#define DPU_DATA_CUBE_CHANNEL_CHANNEL__MASK 0x00001fff +#define DPU_DATA_CUBE_CHANNEL_CHANNEL__SHIFT 0 +static inline uint32_t DPU_DATA_CUBE_CHANNEL_CHANNEL(uint32_t val) +{ + return ((val) << DPU_DATA_CUBE_CHANNEL_CHANNEL__SHIFT) & DPU_DATA_CUBE_CHANNEL_CHANNEL__MASK; +} + +#define REG_DPU_BS_CFG 0x00004040 +#define DPU_BS_CFG_RESERVED_0__MASK 0xfff00000 +#define DPU_BS_CFG_RESERVED_0__SHIFT 20 +static inline uint32_t DPU_BS_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BS_CFG_RESERVED_0__SHIFT) & DPU_BS_CFG_RESERVED_0__MASK; +} +#define DPU_BS_CFG_BS_ALU_ALGO__MASK 0x000f0000 +#define DPU_BS_CFG_BS_ALU_ALGO__SHIFT 16 +static inline uint32_t DPU_BS_CFG_BS_ALU_ALGO(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_ALU_ALGO__SHIFT) & DPU_BS_CFG_BS_ALU_ALGO__MASK; +} +#define DPU_BS_CFG_RESERVED_1__MASK 0x0000fe00 +#define DPU_BS_CFG_RESERVED_1__SHIFT 9 +static inline uint32_t DPU_BS_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_BS_CFG_RESERVED_1__SHIFT) & DPU_BS_CFG_RESERVED_1__MASK; +} +#define DPU_BS_CFG_BS_ALU_SRC__MASK 0x00000100 +#define DPU_BS_CFG_BS_ALU_SRC__SHIFT 8 +static inline uint32_t DPU_BS_CFG_BS_ALU_SRC(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_ALU_SRC__SHIFT) & DPU_BS_CFG_BS_ALU_SRC__MASK; +} +#define DPU_BS_CFG_BS_RELUX_EN__MASK 0x00000080 +#define DPU_BS_CFG_BS_RELUX_EN__SHIFT 7 +static inline uint32_t DPU_BS_CFG_BS_RELUX_EN(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_RELUX_EN__SHIFT) & DPU_BS_CFG_BS_RELUX_EN__MASK; +} +#define DPU_BS_CFG_BS_RELU_BYPASS__MASK 0x00000040 +#define DPU_BS_CFG_BS_RELU_BYPASS__SHIFT 6 +static inline uint32_t DPU_BS_CFG_BS_RELU_BYPASS(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_RELU_BYPASS__SHIFT) & DPU_BS_CFG_BS_RELU_BYPASS__MASK; +} +#define DPU_BS_CFG_BS_MUL_PRELU__MASK 0x00000020 +#define DPU_BS_CFG_BS_MUL_PRELU__SHIFT 5 +static inline uint32_t DPU_BS_CFG_BS_MUL_PRELU(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_MUL_PRELU__SHIFT) & DPU_BS_CFG_BS_MUL_PRELU__MASK; +} +#define DPU_BS_CFG_BS_MUL_BYPASS__MASK 0x00000010 +#define DPU_BS_CFG_BS_MUL_BYPASS__SHIFT 4 +static inline uint32_t DPU_BS_CFG_BS_MUL_BYPASS(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_MUL_BYPASS__SHIFT) & DPU_BS_CFG_BS_MUL_BYPASS__MASK; +} +#define DPU_BS_CFG_RESERVED_2__MASK 0x0000000c +#define DPU_BS_CFG_RESERVED_2__SHIFT 2 +static inline uint32_t DPU_BS_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << DPU_BS_CFG_RESERVED_2__SHIFT) & DPU_BS_CFG_RESERVED_2__MASK; +} +#define DPU_BS_CFG_BS_ALU_BYPASS__MASK 0x00000002 +#define DPU_BS_CFG_BS_ALU_BYPASS__SHIFT 1 +static inline uint32_t DPU_BS_CFG_BS_ALU_BYPASS(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_ALU_BYPASS__SHIFT) & DPU_BS_CFG_BS_ALU_BYPASS__MASK; +} +#define DPU_BS_CFG_BS_BYPASS__MASK 0x00000001 +#define DPU_BS_CFG_BS_BYPASS__SHIFT 0 +static inline uint32_t DPU_BS_CFG_BS_BYPASS(uint32_t val) +{ + return ((val) << DPU_BS_CFG_BS_BYPASS__SHIFT) & DPU_BS_CFG_BS_BYPASS__MASK; +} + +#define REG_DPU_BS_ALU_CFG 0x00004044 +#define DPU_BS_ALU_CFG_BS_ALU_OPERAND__MASK 0xffffffff +#define DPU_BS_ALU_CFG_BS_ALU_OPERAND__SHIFT 0 +static inline uint32_t DPU_BS_ALU_CFG_BS_ALU_OPERAND(uint32_t val) +{ + return ((val) << DPU_BS_ALU_CFG_BS_ALU_OPERAND__SHIFT) & DPU_BS_ALU_CFG_BS_ALU_OPERAND__MASK; +} + +#define REG_DPU_BS_MUL_CFG 0x00004048 +#define DPU_BS_MUL_CFG_BS_MUL_OPERAND__MASK 0xffff0000 +#define DPU_BS_MUL_CFG_BS_MUL_OPERAND__SHIFT 16 +static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_OPERAND(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_BS_MUL_OPERAND__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_OPERAND__MASK; +} +#define DPU_BS_MUL_CFG_RESERVED_0__MASK 0x0000c000 +#define DPU_BS_MUL_CFG_RESERVED_0__SHIFT 14 +static inline uint32_t DPU_BS_MUL_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_RESERVED_0__SHIFT) & DPU_BS_MUL_CFG_RESERVED_0__MASK; +} +#define DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__MASK 0x00003f00 +#define DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__SHIFT 8 +static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_SHIFT_VALUE__MASK; +} +#define DPU_BS_MUL_CFG_RESERVED_1__MASK 0x000000fc +#define DPU_BS_MUL_CFG_RESERVED_1__SHIFT 2 +static inline uint32_t DPU_BS_MUL_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_RESERVED_1__SHIFT) & DPU_BS_MUL_CFG_RESERVED_1__MASK; +} +#define DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__MASK 0x00000002 +#define DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__SHIFT 1 +static inline uint32_t DPU_BS_MUL_CFG_BS_TRUNCATE_SRC(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__SHIFT) & DPU_BS_MUL_CFG_BS_TRUNCATE_SRC__MASK; +} +#define DPU_BS_MUL_CFG_BS_MUL_SRC__MASK 0x00000001 +#define DPU_BS_MUL_CFG_BS_MUL_SRC__SHIFT 0 +static inline uint32_t DPU_BS_MUL_CFG_BS_MUL_SRC(uint32_t val) +{ + return ((val) << DPU_BS_MUL_CFG_BS_MUL_SRC__SHIFT) & DPU_BS_MUL_CFG_BS_MUL_SRC__MASK; +} + +#define REG_DPU_BS_RELUX_CMP_VALUE 0x0000404c +#define DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__MASK 0xffffffff +#define DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__SHIFT 0 +static inline uint32_t DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT(uint32_t val) +{ + return ((val) << DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__SHIFT) & DPU_BS_RELUX_CMP_VALUE_BS_RELUX_CMP_DAT__MASK; +} + +#define REG_DPU_BS_OW_CFG 0x00004050 +#define DPU_BS_OW_CFG_RGP_CNTER__MASK 0xf0000000 +#define DPU_BS_OW_CFG_RGP_CNTER__SHIFT 28 +static inline uint32_t DPU_BS_OW_CFG_RGP_CNTER(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_RGP_CNTER__SHIFT) & DPU_BS_OW_CFG_RGP_CNTER__MASK; +} +#define DPU_BS_OW_CFG_TP_ORG_EN__MASK 0x08000000 +#define DPU_BS_OW_CFG_TP_ORG_EN__SHIFT 27 +static inline uint32_t DPU_BS_OW_CFG_TP_ORG_EN(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_TP_ORG_EN__SHIFT) & DPU_BS_OW_CFG_TP_ORG_EN__MASK; +} +#define DPU_BS_OW_CFG_RESERVED_0__MASK 0x07fff800 +#define DPU_BS_OW_CFG_RESERVED_0__SHIFT 11 +static inline uint32_t DPU_BS_OW_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_RESERVED_0__SHIFT) & DPU_BS_OW_CFG_RESERVED_0__MASK; +} +#define DPU_BS_OW_CFG_SIZE_E_2__MASK 0x00000700 +#define DPU_BS_OW_CFG_SIZE_E_2__SHIFT 8 +static inline uint32_t DPU_BS_OW_CFG_SIZE_E_2(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_SIZE_E_2__SHIFT) & DPU_BS_OW_CFG_SIZE_E_2__MASK; +} +#define DPU_BS_OW_CFG_SIZE_E_1__MASK 0x000000e0 +#define DPU_BS_OW_CFG_SIZE_E_1__SHIFT 5 +static inline uint32_t DPU_BS_OW_CFG_SIZE_E_1(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_SIZE_E_1__SHIFT) & DPU_BS_OW_CFG_SIZE_E_1__MASK; +} +#define DPU_BS_OW_CFG_SIZE_E_0__MASK 0x0000001c +#define DPU_BS_OW_CFG_SIZE_E_0__SHIFT 2 +static inline uint32_t DPU_BS_OW_CFG_SIZE_E_0(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_SIZE_E_0__SHIFT) & DPU_BS_OW_CFG_SIZE_E_0__MASK; +} +#define DPU_BS_OW_CFG_OD_BYPASS__MASK 0x00000002 +#define DPU_BS_OW_CFG_OD_BYPASS__SHIFT 1 +static inline uint32_t DPU_BS_OW_CFG_OD_BYPASS(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_OD_BYPASS__SHIFT) & DPU_BS_OW_CFG_OD_BYPASS__MASK; +} +#define DPU_BS_OW_CFG_OW_SRC__MASK 0x00000001 +#define DPU_BS_OW_CFG_OW_SRC__SHIFT 0 +static inline uint32_t DPU_BS_OW_CFG_OW_SRC(uint32_t val) +{ + return ((val) << DPU_BS_OW_CFG_OW_SRC__SHIFT) & DPU_BS_OW_CFG_OW_SRC__MASK; +} + +#define REG_DPU_BS_OW_OP 0x00004054 +#define DPU_BS_OW_OP_RESERVED_0__MASK 0xffff0000 +#define DPU_BS_OW_OP_RESERVED_0__SHIFT 16 +static inline uint32_t DPU_BS_OW_OP_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BS_OW_OP_RESERVED_0__SHIFT) & DPU_BS_OW_OP_RESERVED_0__MASK; +} +#define DPU_BS_OW_OP_OW_OP__MASK 0x0000ffff +#define DPU_BS_OW_OP_OW_OP__SHIFT 0 +static inline uint32_t DPU_BS_OW_OP_OW_OP(uint32_t val) +{ + return ((val) << DPU_BS_OW_OP_OW_OP__SHIFT) & DPU_BS_OW_OP_OW_OP__MASK; +} + +#define REG_DPU_WDMA_SIZE_0 0x00004058 +#define DPU_WDMA_SIZE_0_RESERVED_0__MASK 0xf0000000 +#define DPU_WDMA_SIZE_0_RESERVED_0__SHIFT 28 +static inline uint32_t DPU_WDMA_SIZE_0_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_0_RESERVED_0__SHIFT) & DPU_WDMA_SIZE_0_RESERVED_0__MASK; +} +#define DPU_WDMA_SIZE_0_TP_PRECISION__MASK 0x08000000 +#define DPU_WDMA_SIZE_0_TP_PRECISION__SHIFT 27 +static inline uint32_t DPU_WDMA_SIZE_0_TP_PRECISION(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_0_TP_PRECISION__SHIFT) & DPU_WDMA_SIZE_0_TP_PRECISION__MASK; +} +#define DPU_WDMA_SIZE_0_SIZE_C_WDMA__MASK 0x07ff0000 +#define DPU_WDMA_SIZE_0_SIZE_C_WDMA__SHIFT 16 +static inline uint32_t DPU_WDMA_SIZE_0_SIZE_C_WDMA(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_0_SIZE_C_WDMA__SHIFT) & DPU_WDMA_SIZE_0_SIZE_C_WDMA__MASK; +} +#define DPU_WDMA_SIZE_0_RESERVED_1__MASK 0x0000e000 +#define DPU_WDMA_SIZE_0_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_WDMA_SIZE_0_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_0_RESERVED_1__SHIFT) & DPU_WDMA_SIZE_0_RESERVED_1__MASK; +} +#define DPU_WDMA_SIZE_0_CHANNEL_WDMA__MASK 0x00001fff +#define DPU_WDMA_SIZE_0_CHANNEL_WDMA__SHIFT 0 +static inline uint32_t DPU_WDMA_SIZE_0_CHANNEL_WDMA(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_0_CHANNEL_WDMA__SHIFT) & DPU_WDMA_SIZE_0_CHANNEL_WDMA__MASK; +} + +#define REG_DPU_WDMA_SIZE_1 0x0000405c +#define DPU_WDMA_SIZE_1_RESERVED_0__MASK 0xe0000000 +#define DPU_WDMA_SIZE_1_RESERVED_0__SHIFT 29 +static inline uint32_t DPU_WDMA_SIZE_1_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_1_RESERVED_0__SHIFT) & DPU_WDMA_SIZE_1_RESERVED_0__MASK; +} +#define DPU_WDMA_SIZE_1_HEIGHT_WDMA__MASK 0x1fff0000 +#define DPU_WDMA_SIZE_1_HEIGHT_WDMA__SHIFT 16 +static inline uint32_t DPU_WDMA_SIZE_1_HEIGHT_WDMA(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_1_HEIGHT_WDMA__SHIFT) & DPU_WDMA_SIZE_1_HEIGHT_WDMA__MASK; +} +#define DPU_WDMA_SIZE_1_RESERVED_1__MASK 0x0000e000 +#define DPU_WDMA_SIZE_1_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_WDMA_SIZE_1_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_1_RESERVED_1__SHIFT) & DPU_WDMA_SIZE_1_RESERVED_1__MASK; +} +#define DPU_WDMA_SIZE_1_WIDTH_WDMA__MASK 0x00001fff +#define DPU_WDMA_SIZE_1_WIDTH_WDMA__SHIFT 0 +static inline uint32_t DPU_WDMA_SIZE_1_WIDTH_WDMA(uint32_t val) +{ + return ((val) << DPU_WDMA_SIZE_1_WIDTH_WDMA__SHIFT) & DPU_WDMA_SIZE_1_WIDTH_WDMA__MASK; +} + +#define REG_DPU_BN_CFG 0x00004060 +#define DPU_BN_CFG_RESERVED_0__MASK 0xfff00000 +#define DPU_BN_CFG_RESERVED_0__SHIFT 20 +static inline uint32_t DPU_BN_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BN_CFG_RESERVED_0__SHIFT) & DPU_BN_CFG_RESERVED_0__MASK; +} +#define DPU_BN_CFG_BN_ALU_ALGO__MASK 0x000f0000 +#define DPU_BN_CFG_BN_ALU_ALGO__SHIFT 16 +static inline uint32_t DPU_BN_CFG_BN_ALU_ALGO(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_ALU_ALGO__SHIFT) & DPU_BN_CFG_BN_ALU_ALGO__MASK; +} +#define DPU_BN_CFG_RESERVED_1__MASK 0x0000fe00 +#define DPU_BN_CFG_RESERVED_1__SHIFT 9 +static inline uint32_t DPU_BN_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_BN_CFG_RESERVED_1__SHIFT) & DPU_BN_CFG_RESERVED_1__MASK; +} +#define DPU_BN_CFG_BN_ALU_SRC__MASK 0x00000100 +#define DPU_BN_CFG_BN_ALU_SRC__SHIFT 8 +static inline uint32_t DPU_BN_CFG_BN_ALU_SRC(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_ALU_SRC__SHIFT) & DPU_BN_CFG_BN_ALU_SRC__MASK; +} +#define DPU_BN_CFG_BN_RELUX_EN__MASK 0x00000080 +#define DPU_BN_CFG_BN_RELUX_EN__SHIFT 7 +static inline uint32_t DPU_BN_CFG_BN_RELUX_EN(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_RELUX_EN__SHIFT) & DPU_BN_CFG_BN_RELUX_EN__MASK; +} +#define DPU_BN_CFG_BN_RELU_BYPASS__MASK 0x00000040 +#define DPU_BN_CFG_BN_RELU_BYPASS__SHIFT 6 +static inline uint32_t DPU_BN_CFG_BN_RELU_BYPASS(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_RELU_BYPASS__SHIFT) & DPU_BN_CFG_BN_RELU_BYPASS__MASK; +} +#define DPU_BN_CFG_BN_MUL_PRELU__MASK 0x00000020 +#define DPU_BN_CFG_BN_MUL_PRELU__SHIFT 5 +static inline uint32_t DPU_BN_CFG_BN_MUL_PRELU(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_MUL_PRELU__SHIFT) & DPU_BN_CFG_BN_MUL_PRELU__MASK; +} +#define DPU_BN_CFG_BN_MUL_BYPASS__MASK 0x00000010 +#define DPU_BN_CFG_BN_MUL_BYPASS__SHIFT 4 +static inline uint32_t DPU_BN_CFG_BN_MUL_BYPASS(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_MUL_BYPASS__SHIFT) & DPU_BN_CFG_BN_MUL_BYPASS__MASK; +} +#define DPU_BN_CFG_RESERVED_2__MASK 0x0000000c +#define DPU_BN_CFG_RESERVED_2__SHIFT 2 +static inline uint32_t DPU_BN_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << DPU_BN_CFG_RESERVED_2__SHIFT) & DPU_BN_CFG_RESERVED_2__MASK; +} +#define DPU_BN_CFG_BN_ALU_BYPASS__MASK 0x00000002 +#define DPU_BN_CFG_BN_ALU_BYPASS__SHIFT 1 +static inline uint32_t DPU_BN_CFG_BN_ALU_BYPASS(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_ALU_BYPASS__SHIFT) & DPU_BN_CFG_BN_ALU_BYPASS__MASK; +} +#define DPU_BN_CFG_BN_BYPASS__MASK 0x00000001 +#define DPU_BN_CFG_BN_BYPASS__SHIFT 0 +static inline uint32_t DPU_BN_CFG_BN_BYPASS(uint32_t val) +{ + return ((val) << DPU_BN_CFG_BN_BYPASS__SHIFT) & DPU_BN_CFG_BN_BYPASS__MASK; +} + +#define REG_DPU_BN_ALU_CFG 0x00004064 +#define DPU_BN_ALU_CFG_BN_ALU_OPERAND__MASK 0xffffffff +#define DPU_BN_ALU_CFG_BN_ALU_OPERAND__SHIFT 0 +static inline uint32_t DPU_BN_ALU_CFG_BN_ALU_OPERAND(uint32_t val) +{ + return ((val) << DPU_BN_ALU_CFG_BN_ALU_OPERAND__SHIFT) & DPU_BN_ALU_CFG_BN_ALU_OPERAND__MASK; +} + +#define REG_DPU_BN_MUL_CFG 0x00004068 +#define DPU_BN_MUL_CFG_BN_MUL_OPERAND__MASK 0xffff0000 +#define DPU_BN_MUL_CFG_BN_MUL_OPERAND__SHIFT 16 +static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_OPERAND(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_BN_MUL_OPERAND__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_OPERAND__MASK; +} +#define DPU_BN_MUL_CFG_RESERVED_0__MASK 0x0000c000 +#define DPU_BN_MUL_CFG_RESERVED_0__SHIFT 14 +static inline uint32_t DPU_BN_MUL_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_RESERVED_0__SHIFT) & DPU_BN_MUL_CFG_RESERVED_0__MASK; +} +#define DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__MASK 0x00003f00 +#define DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__SHIFT 8 +static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_SHIFT_VALUE__MASK; +} +#define DPU_BN_MUL_CFG_RESERVED_1__MASK 0x000000fc +#define DPU_BN_MUL_CFG_RESERVED_1__SHIFT 2 +static inline uint32_t DPU_BN_MUL_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_RESERVED_1__SHIFT) & DPU_BN_MUL_CFG_RESERVED_1__MASK; +} +#define DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__MASK 0x00000002 +#define DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__SHIFT 1 +static inline uint32_t DPU_BN_MUL_CFG_BN_TRUNCATE_SRC(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__SHIFT) & DPU_BN_MUL_CFG_BN_TRUNCATE_SRC__MASK; +} +#define DPU_BN_MUL_CFG_BN_MUL_SRC__MASK 0x00000001 +#define DPU_BN_MUL_CFG_BN_MUL_SRC__SHIFT 0 +static inline uint32_t DPU_BN_MUL_CFG_BN_MUL_SRC(uint32_t val) +{ + return ((val) << DPU_BN_MUL_CFG_BN_MUL_SRC__SHIFT) & DPU_BN_MUL_CFG_BN_MUL_SRC__MASK; +} + +#define REG_DPU_BN_RELUX_CMP_VALUE 0x0000406c +#define DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__MASK 0xffffffff +#define DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__SHIFT 0 +static inline uint32_t DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT(uint32_t val) +{ + return ((val) << DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__SHIFT) & DPU_BN_RELUX_CMP_VALUE_BN_RELUX_CMP_DAT__MASK; +} + +#define REG_DPU_EW_CFG 0x00004070 +#define DPU_EW_CFG_EW_CVT_TYPE__MASK 0x80000000 +#define DPU_EW_CFG_EW_CVT_TYPE__SHIFT 31 +static inline uint32_t DPU_EW_CFG_EW_CVT_TYPE(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_CVT_TYPE__SHIFT) & DPU_EW_CFG_EW_CVT_TYPE__MASK; +} +#define DPU_EW_CFG_EW_CVT_ROUND__MASK 0x40000000 +#define DPU_EW_CFG_EW_CVT_ROUND__SHIFT 30 +static inline uint32_t DPU_EW_CFG_EW_CVT_ROUND(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_CVT_ROUND__SHIFT) & DPU_EW_CFG_EW_CVT_ROUND__MASK; +} +#define DPU_EW_CFG_EW_DATA_MODE__MASK 0x30000000 +#define DPU_EW_CFG_EW_DATA_MODE__SHIFT 28 +static inline uint32_t DPU_EW_CFG_EW_DATA_MODE(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_DATA_MODE__SHIFT) & DPU_EW_CFG_EW_DATA_MODE__MASK; +} +#define DPU_EW_CFG_RESERVED_0__MASK 0x0f000000 +#define DPU_EW_CFG_RESERVED_0__SHIFT 24 +static inline uint32_t DPU_EW_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_EW_CFG_RESERVED_0__SHIFT) & DPU_EW_CFG_RESERVED_0__MASK; +} +#define DPU_EW_CFG_EDATA_SIZE__MASK 0x00c00000 +#define DPU_EW_CFG_EDATA_SIZE__SHIFT 22 +static inline uint32_t DPU_EW_CFG_EDATA_SIZE(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EDATA_SIZE__SHIFT) & DPU_EW_CFG_EDATA_SIZE__MASK; +} +#define DPU_EW_CFG_EW_EQUAL_EN__MASK 0x00200000 +#define DPU_EW_CFG_EW_EQUAL_EN__SHIFT 21 +static inline uint32_t DPU_EW_CFG_EW_EQUAL_EN(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_EQUAL_EN__SHIFT) & DPU_EW_CFG_EW_EQUAL_EN__MASK; +} +#define DPU_EW_CFG_EW_BINARY_EN__MASK 0x00100000 +#define DPU_EW_CFG_EW_BINARY_EN__SHIFT 20 +static inline uint32_t DPU_EW_CFG_EW_BINARY_EN(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_BINARY_EN__SHIFT) & DPU_EW_CFG_EW_BINARY_EN__MASK; +} +#define DPU_EW_CFG_EW_ALU_ALGO__MASK 0x000f0000 +#define DPU_EW_CFG_EW_ALU_ALGO__SHIFT 16 +static inline uint32_t DPU_EW_CFG_EW_ALU_ALGO(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_ALU_ALGO__SHIFT) & DPU_EW_CFG_EW_ALU_ALGO__MASK; +} +#define DPU_EW_CFG_RESERVED_1__MASK 0x0000f800 +#define DPU_EW_CFG_RESERVED_1__SHIFT 11 +static inline uint32_t DPU_EW_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_EW_CFG_RESERVED_1__SHIFT) & DPU_EW_CFG_RESERVED_1__MASK; +} +#define DPU_EW_CFG_EW_RELUX_EN__MASK 0x00000400 +#define DPU_EW_CFG_EW_RELUX_EN__SHIFT 10 +static inline uint32_t DPU_EW_CFG_EW_RELUX_EN(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_RELUX_EN__SHIFT) & DPU_EW_CFG_EW_RELUX_EN__MASK; +} +#define DPU_EW_CFG_EW_RELU_BYPASS__MASK 0x00000200 +#define DPU_EW_CFG_EW_RELU_BYPASS__SHIFT 9 +static inline uint32_t DPU_EW_CFG_EW_RELU_BYPASS(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_RELU_BYPASS__SHIFT) & DPU_EW_CFG_EW_RELU_BYPASS__MASK; +} +#define DPU_EW_CFG_EW_OP_CVT_BYPASS__MASK 0x00000100 +#define DPU_EW_CFG_EW_OP_CVT_BYPASS__SHIFT 8 +static inline uint32_t DPU_EW_CFG_EW_OP_CVT_BYPASS(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_OP_CVT_BYPASS__SHIFT) & DPU_EW_CFG_EW_OP_CVT_BYPASS__MASK; +} +#define DPU_EW_CFG_EW_LUT_BYPASS__MASK 0x00000080 +#define DPU_EW_CFG_EW_LUT_BYPASS__SHIFT 7 +static inline uint32_t DPU_EW_CFG_EW_LUT_BYPASS(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_LUT_BYPASS__SHIFT) & DPU_EW_CFG_EW_LUT_BYPASS__MASK; +} +#define DPU_EW_CFG_EW_OP_SRC__MASK 0x00000040 +#define DPU_EW_CFG_EW_OP_SRC__SHIFT 6 +static inline uint32_t DPU_EW_CFG_EW_OP_SRC(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_OP_SRC__SHIFT) & DPU_EW_CFG_EW_OP_SRC__MASK; +} +#define DPU_EW_CFG_EW_MUL_PRELU__MASK 0x00000020 +#define DPU_EW_CFG_EW_MUL_PRELU__SHIFT 5 +static inline uint32_t DPU_EW_CFG_EW_MUL_PRELU(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_MUL_PRELU__SHIFT) & DPU_EW_CFG_EW_MUL_PRELU__MASK; +} +#define DPU_EW_CFG_RESERVED_2__MASK 0x00000018 +#define DPU_EW_CFG_RESERVED_2__SHIFT 3 +static inline uint32_t DPU_EW_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << DPU_EW_CFG_RESERVED_2__SHIFT) & DPU_EW_CFG_RESERVED_2__MASK; +} +#define DPU_EW_CFG_EW_OP_TYPE__MASK 0x00000004 +#define DPU_EW_CFG_EW_OP_TYPE__SHIFT 2 +static inline uint32_t DPU_EW_CFG_EW_OP_TYPE(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_OP_TYPE__SHIFT) & DPU_EW_CFG_EW_OP_TYPE__MASK; +} +#define DPU_EW_CFG_EW_OP_BYPASS__MASK 0x00000002 +#define DPU_EW_CFG_EW_OP_BYPASS__SHIFT 1 +static inline uint32_t DPU_EW_CFG_EW_OP_BYPASS(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_OP_BYPASS__SHIFT) & DPU_EW_CFG_EW_OP_BYPASS__MASK; +} +#define DPU_EW_CFG_EW_BYPASS__MASK 0x00000001 +#define DPU_EW_CFG_EW_BYPASS__SHIFT 0 +static inline uint32_t DPU_EW_CFG_EW_BYPASS(uint32_t val) +{ + return ((val) << DPU_EW_CFG_EW_BYPASS__SHIFT) & DPU_EW_CFG_EW_BYPASS__MASK; +} + +#define REG_DPU_EW_CVT_OFFSET_VALUE 0x00004074 +#define DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__MASK 0xffffffff +#define DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__SHIFT 0 +static inline uint32_t DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET(uint32_t val) +{ + return ((val) << DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__SHIFT) & DPU_EW_CVT_OFFSET_VALUE_EW_OP_CVT_OFFSET__MASK; +} + +#define REG_DPU_EW_CVT_SCALE_VALUE 0x00004078 +#define DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__MASK 0xffc00000 +#define DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__SHIFT 22 +static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE(uint32_t val) +{ + return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_TRUNCATE__MASK; +} +#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__MASK 0x003f0000 +#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__SHIFT 16 +static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT(uint32_t val) +{ + return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SHIFT__MASK; +} +#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__MASK 0x0000ffff +#define DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__SHIFT 0 +static inline uint32_t DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE(uint32_t val) +{ + return ((val) << DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__SHIFT) & DPU_EW_CVT_SCALE_VALUE_EW_OP_CVT_SCALE__MASK; +} + +#define REG_DPU_EW_RELUX_CMP_VALUE 0x0000407c +#define DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__MASK 0xffffffff +#define DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__SHIFT 0 +static inline uint32_t DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT(uint32_t val) +{ + return ((val) << DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__SHIFT) & DPU_EW_RELUX_CMP_VALUE_EW_RELUX_CMP_DAT__MASK; +} + +#define REG_DPU_OUT_CVT_OFFSET 0x00004080 +#define DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__MASK 0xffffffff +#define DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__SHIFT 0 +static inline uint32_t DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__SHIFT) & DPU_OUT_CVT_OFFSET_OUT_CVT_OFFSET__MASK; +} + +#define REG_DPU_OUT_CVT_SCALE 0x00004084 +#define DPU_OUT_CVT_SCALE_RESERVED_0__MASK 0xfffe0000 +#define DPU_OUT_CVT_SCALE_RESERVED_0__SHIFT 17 +static inline uint32_t DPU_OUT_CVT_SCALE_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SCALE_RESERVED_0__SHIFT) & DPU_OUT_CVT_SCALE_RESERVED_0__MASK; +} +#define DPU_OUT_CVT_SCALE_FP32TOFP16_EN__MASK 0x00010000 +#define DPU_OUT_CVT_SCALE_FP32TOFP16_EN__SHIFT 16 +static inline uint32_t DPU_OUT_CVT_SCALE_FP32TOFP16_EN(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SCALE_FP32TOFP16_EN__SHIFT) & DPU_OUT_CVT_SCALE_FP32TOFP16_EN__MASK; +} +#define DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__MASK 0x0000ffff +#define DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__SHIFT 0 +static inline uint32_t DPU_OUT_CVT_SCALE_OUT_CVT_SCALE(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__SHIFT) & DPU_OUT_CVT_SCALE_OUT_CVT_SCALE__MASK; +} + +#define REG_DPU_OUT_CVT_SHIFT 0x00004088 +#define DPU_OUT_CVT_SHIFT_CVT_TYPE__MASK 0x80000000 +#define DPU_OUT_CVT_SHIFT_CVT_TYPE__SHIFT 31 +static inline uint32_t DPU_OUT_CVT_SHIFT_CVT_TYPE(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SHIFT_CVT_TYPE__SHIFT) & DPU_OUT_CVT_SHIFT_CVT_TYPE__MASK; +} +#define DPU_OUT_CVT_SHIFT_CVT_ROUND__MASK 0x40000000 +#define DPU_OUT_CVT_SHIFT_CVT_ROUND__SHIFT 30 +static inline uint32_t DPU_OUT_CVT_SHIFT_CVT_ROUND(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SHIFT_CVT_ROUND__SHIFT) & DPU_OUT_CVT_SHIFT_CVT_ROUND__MASK; +} +#define DPU_OUT_CVT_SHIFT_RESERVED_0__MASK 0x3ff00000 +#define DPU_OUT_CVT_SHIFT_RESERVED_0__SHIFT 20 +static inline uint32_t DPU_OUT_CVT_SHIFT_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SHIFT_RESERVED_0__SHIFT) & DPU_OUT_CVT_SHIFT_RESERVED_0__MASK; +} +#define DPU_OUT_CVT_SHIFT_MINUS_EXP__MASK 0x000ff000 +#define DPU_OUT_CVT_SHIFT_MINUS_EXP__SHIFT 12 +static inline uint32_t DPU_OUT_CVT_SHIFT_MINUS_EXP(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SHIFT_MINUS_EXP__SHIFT) & DPU_OUT_CVT_SHIFT_MINUS_EXP__MASK; +} +#define DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__MASK 0x00000fff +#define DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__SHIFT 0 +static inline uint32_t DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT(uint32_t val) +{ + return ((val) << DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__SHIFT) & DPU_OUT_CVT_SHIFT_OUT_CVT_SHIFT__MASK; +} + +#define REG_DPU_EW_OP_VALUE_0 0x00004090 +#define DPU_EW_OP_VALUE_0_EW_OPERAND_0__MASK 0xffffffff +#define DPU_EW_OP_VALUE_0_EW_OPERAND_0__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_0_EW_OPERAND_0(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_0_EW_OPERAND_0__SHIFT) & DPU_EW_OP_VALUE_0_EW_OPERAND_0__MASK; +} + +#define REG_DPU_EW_OP_VALUE_1 0x00004094 +#define DPU_EW_OP_VALUE_1_EW_OPERAND_1__MASK 0xffffffff +#define DPU_EW_OP_VALUE_1_EW_OPERAND_1__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_1_EW_OPERAND_1(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_1_EW_OPERAND_1__SHIFT) & DPU_EW_OP_VALUE_1_EW_OPERAND_1__MASK; +} + +#define REG_DPU_EW_OP_VALUE_2 0x00004098 +#define DPU_EW_OP_VALUE_2_EW_OPERAND_2__MASK 0xffffffff +#define DPU_EW_OP_VALUE_2_EW_OPERAND_2__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_2_EW_OPERAND_2(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_2_EW_OPERAND_2__SHIFT) & DPU_EW_OP_VALUE_2_EW_OPERAND_2__MASK; +} + +#define REG_DPU_EW_OP_VALUE_3 0x0000409c +#define DPU_EW_OP_VALUE_3_EW_OPERAND_3__MASK 0xffffffff +#define DPU_EW_OP_VALUE_3_EW_OPERAND_3__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_3_EW_OPERAND_3(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_3_EW_OPERAND_3__SHIFT) & DPU_EW_OP_VALUE_3_EW_OPERAND_3__MASK; +} + +#define REG_DPU_EW_OP_VALUE_4 0x000040a0 +#define DPU_EW_OP_VALUE_4_EW_OPERAND_4__MASK 0xffffffff +#define DPU_EW_OP_VALUE_4_EW_OPERAND_4__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_4_EW_OPERAND_4(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_4_EW_OPERAND_4__SHIFT) & DPU_EW_OP_VALUE_4_EW_OPERAND_4__MASK; +} + +#define REG_DPU_EW_OP_VALUE_5 0x000040a4 +#define DPU_EW_OP_VALUE_5_EW_OPERAND_5__MASK 0xffffffff +#define DPU_EW_OP_VALUE_5_EW_OPERAND_5__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_5_EW_OPERAND_5(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_5_EW_OPERAND_5__SHIFT) & DPU_EW_OP_VALUE_5_EW_OPERAND_5__MASK; +} + +#define REG_DPU_EW_OP_VALUE_6 0x000040a8 +#define DPU_EW_OP_VALUE_6_EW_OPERAND_6__MASK 0xffffffff +#define DPU_EW_OP_VALUE_6_EW_OPERAND_6__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_6_EW_OPERAND_6(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_6_EW_OPERAND_6__SHIFT) & DPU_EW_OP_VALUE_6_EW_OPERAND_6__MASK; +} + +#define REG_DPU_EW_OP_VALUE_7 0x000040ac +#define DPU_EW_OP_VALUE_7_EW_OPERAND_7__MASK 0xffffffff +#define DPU_EW_OP_VALUE_7_EW_OPERAND_7__SHIFT 0 +static inline uint32_t DPU_EW_OP_VALUE_7_EW_OPERAND_7(uint32_t val) +{ + return ((val) << DPU_EW_OP_VALUE_7_EW_OPERAND_7__SHIFT) & DPU_EW_OP_VALUE_7_EW_OPERAND_7__MASK; +} + +#define REG_DPU_SURFACE_ADD 0x000040c0 +#define DPU_SURFACE_ADD_SURF_ADD__MASK 0xfffffff0 +#define DPU_SURFACE_ADD_SURF_ADD__SHIFT 4 +static inline uint32_t DPU_SURFACE_ADD_SURF_ADD(uint32_t val) +{ + return ((val) << DPU_SURFACE_ADD_SURF_ADD__SHIFT) & DPU_SURFACE_ADD_SURF_ADD__MASK; +} +#define DPU_SURFACE_ADD_RESERVED_0__MASK 0x0000000f +#define DPU_SURFACE_ADD_RESERVED_0__SHIFT 0 +static inline uint32_t DPU_SURFACE_ADD_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_SURFACE_ADD_RESERVED_0__SHIFT) & DPU_SURFACE_ADD_RESERVED_0__MASK; +} + +#define REG_DPU_LUT_ACCESS_CFG 0x00004100 +#define DPU_LUT_ACCESS_CFG_RESERVED_0__MASK 0xfffc0000 +#define DPU_LUT_ACCESS_CFG_RESERVED_0__SHIFT 18 +static inline uint32_t DPU_LUT_ACCESS_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_CFG_RESERVED_0__SHIFT) & DPU_LUT_ACCESS_CFG_RESERVED_0__MASK; +} +#define DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__MASK 0x00020000 +#define DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__SHIFT 17 +static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_ACCESS_TYPE__MASK; +} +#define DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__MASK 0x00010000 +#define DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__SHIFT 16 +static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_TABLE_ID(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_TABLE_ID__MASK; +} +#define DPU_LUT_ACCESS_CFG_RESERVED_1__MASK 0x0000fc00 +#define DPU_LUT_ACCESS_CFG_RESERVED_1__SHIFT 10 +static inline uint32_t DPU_LUT_ACCESS_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_CFG_RESERVED_1__SHIFT) & DPU_LUT_ACCESS_CFG_RESERVED_1__MASK; +} +#define DPU_LUT_ACCESS_CFG_LUT_ADDR__MASK 0x000003ff +#define DPU_LUT_ACCESS_CFG_LUT_ADDR__SHIFT 0 +static inline uint32_t DPU_LUT_ACCESS_CFG_LUT_ADDR(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_CFG_LUT_ADDR__SHIFT) & DPU_LUT_ACCESS_CFG_LUT_ADDR__MASK; +} + +#define REG_DPU_LUT_ACCESS_DATA 0x00004104 +#define DPU_LUT_ACCESS_DATA_RESERVED_0__MASK 0xffff0000 +#define DPU_LUT_ACCESS_DATA_RESERVED_0__SHIFT 16 +static inline uint32_t DPU_LUT_ACCESS_DATA_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_DATA_RESERVED_0__SHIFT) & DPU_LUT_ACCESS_DATA_RESERVED_0__MASK; +} +#define DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__MASK 0x0000ffff +#define DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__SHIFT 0 +static inline uint32_t DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA(uint32_t val) +{ + return ((val) << DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__SHIFT) & DPU_LUT_ACCESS_DATA_LUT_ACCESS_DATA__MASK; +} + +#define REG_DPU_LUT_CFG 0x00004108 +#define DPU_LUT_CFG_RESERVED_0__MASK 0xffffff00 +#define DPU_LUT_CFG_RESERVED_0__SHIFT 8 +static inline uint32_t DPU_LUT_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_RESERVED_0__SHIFT) & DPU_LUT_CFG_RESERVED_0__MASK; +} +#define DPU_LUT_CFG_LUT_CAL_SEL__MASK 0x00000080 +#define DPU_LUT_CFG_LUT_CAL_SEL__SHIFT 7 +static inline uint32_t DPU_LUT_CFG_LUT_CAL_SEL(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_CAL_SEL__SHIFT) & DPU_LUT_CFG_LUT_CAL_SEL__MASK; +} +#define DPU_LUT_CFG_LUT_HYBRID_PRIORITY__MASK 0x00000040 +#define DPU_LUT_CFG_LUT_HYBRID_PRIORITY__SHIFT 6 +static inline uint32_t DPU_LUT_CFG_LUT_HYBRID_PRIORITY(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_HYBRID_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_HYBRID_PRIORITY__MASK; +} +#define DPU_LUT_CFG_LUT_OFLOW_PRIORITY__MASK 0x00000020 +#define DPU_LUT_CFG_LUT_OFLOW_PRIORITY__SHIFT 5 +static inline uint32_t DPU_LUT_CFG_LUT_OFLOW_PRIORITY(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_OFLOW_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_OFLOW_PRIORITY__MASK; +} +#define DPU_LUT_CFG_LUT_UFLOW_PRIORITY__MASK 0x00000010 +#define DPU_LUT_CFG_LUT_UFLOW_PRIORITY__SHIFT 4 +static inline uint32_t DPU_LUT_CFG_LUT_UFLOW_PRIORITY(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_UFLOW_PRIORITY__SHIFT) & DPU_LUT_CFG_LUT_UFLOW_PRIORITY__MASK; +} +#define DPU_LUT_CFG_LUT_LO_LE_MUX__MASK 0x0000000c +#define DPU_LUT_CFG_LUT_LO_LE_MUX__SHIFT 2 +static inline uint32_t DPU_LUT_CFG_LUT_LO_LE_MUX(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_LO_LE_MUX__SHIFT) & DPU_LUT_CFG_LUT_LO_LE_MUX__MASK; +} +#define DPU_LUT_CFG_LUT_EXPAND_EN__MASK 0x00000002 +#define DPU_LUT_CFG_LUT_EXPAND_EN__SHIFT 1 +static inline uint32_t DPU_LUT_CFG_LUT_EXPAND_EN(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_EXPAND_EN__SHIFT) & DPU_LUT_CFG_LUT_EXPAND_EN__MASK; +} +#define DPU_LUT_CFG_LUT_ROAD_SEL__MASK 0x00000001 +#define DPU_LUT_CFG_LUT_ROAD_SEL__SHIFT 0 +static inline uint32_t DPU_LUT_CFG_LUT_ROAD_SEL(uint32_t val) +{ + return ((val) << DPU_LUT_CFG_LUT_ROAD_SEL__SHIFT) & DPU_LUT_CFG_LUT_ROAD_SEL__MASK; +} + +#define REG_DPU_LUT_INFO 0x0000410c +#define DPU_LUT_INFO_RESERVED_0__MASK 0xff000000 +#define DPU_LUT_INFO_RESERVED_0__SHIFT 24 +static inline uint32_t DPU_LUT_INFO_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_INFO_RESERVED_0__SHIFT) & DPU_LUT_INFO_RESERVED_0__MASK; +} +#define DPU_LUT_INFO_LUT_LO_INDEX_SELECT__MASK 0x00ff0000 +#define DPU_LUT_INFO_LUT_LO_INDEX_SELECT__SHIFT 16 +static inline uint32_t DPU_LUT_INFO_LUT_LO_INDEX_SELECT(uint32_t val) +{ + return ((val) << DPU_LUT_INFO_LUT_LO_INDEX_SELECT__SHIFT) & DPU_LUT_INFO_LUT_LO_INDEX_SELECT__MASK; +} +#define DPU_LUT_INFO_LUT_LE_INDEX_SELECT__MASK 0x0000ff00 +#define DPU_LUT_INFO_LUT_LE_INDEX_SELECT__SHIFT 8 +static inline uint32_t DPU_LUT_INFO_LUT_LE_INDEX_SELECT(uint32_t val) +{ + return ((val) << DPU_LUT_INFO_LUT_LE_INDEX_SELECT__SHIFT) & DPU_LUT_INFO_LUT_LE_INDEX_SELECT__MASK; +} +#define DPU_LUT_INFO_RESERVED_1__MASK 0x000000ff +#define DPU_LUT_INFO_RESERVED_1__SHIFT 0 +static inline uint32_t DPU_LUT_INFO_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_LUT_INFO_RESERVED_1__SHIFT) & DPU_LUT_INFO_RESERVED_1__MASK; +} + +#define REG_DPU_LUT_LE_START 0x00004110 +#define DPU_LUT_LE_START_LUT_LE_START__MASK 0xffffffff +#define DPU_LUT_LE_START_LUT_LE_START__SHIFT 0 +static inline uint32_t DPU_LUT_LE_START_LUT_LE_START(uint32_t val) +{ + return ((val) << DPU_LUT_LE_START_LUT_LE_START__SHIFT) & DPU_LUT_LE_START_LUT_LE_START__MASK; +} + +#define REG_DPU_LUT_LE_END 0x00004114 +#define DPU_LUT_LE_END_LUT_LE_END__MASK 0xffffffff +#define DPU_LUT_LE_END_LUT_LE_END__SHIFT 0 +static inline uint32_t DPU_LUT_LE_END_LUT_LE_END(uint32_t val) +{ + return ((val) << DPU_LUT_LE_END_LUT_LE_END__SHIFT) & DPU_LUT_LE_END_LUT_LE_END__MASK; +} + +#define REG_DPU_LUT_LO_START 0x00004118 +#define DPU_LUT_LO_START_LUT_LO_START__MASK 0xffffffff +#define DPU_LUT_LO_START_LUT_LO_START__SHIFT 0 +static inline uint32_t DPU_LUT_LO_START_LUT_LO_START(uint32_t val) +{ + return ((val) << DPU_LUT_LO_START_LUT_LO_START__SHIFT) & DPU_LUT_LO_START_LUT_LO_START__MASK; +} + +#define REG_DPU_LUT_LO_END 0x0000411c +#define DPU_LUT_LO_END_LUT_LO_END__MASK 0xffffffff +#define DPU_LUT_LO_END_LUT_LO_END__SHIFT 0 +static inline uint32_t DPU_LUT_LO_END_LUT_LO_END(uint32_t val) +{ + return ((val) << DPU_LUT_LO_END_LUT_LO_END__SHIFT) & DPU_LUT_LO_END_LUT_LO_END__MASK; +} + +#define REG_DPU_LUT_LE_SLOPE_SCALE 0x00004120 +#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__MASK 0xffff0000 +#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__SHIFT 16 +static inline uint32_t DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE(uint32_t val) +{ + return ((val) << DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__SHIFT) & DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_OFLOW_SCALE__MASK; +} +#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__MASK 0x0000ffff +#define DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__SHIFT 0 +static inline uint32_t DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE(uint32_t val) +{ + return ((val) << DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__SHIFT) & DPU_LUT_LE_SLOPE_SCALE_LUT_LE_SLOPE_UFLOW_SCALE__MASK; +} + +#define REG_DPU_LUT_LE_SLOPE_SHIFT 0x00004124 +#define DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__MASK 0xfffffc00 +#define DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__SHIFT 10 +static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_RESERVED_0__MASK; +} +#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__MASK 0x000003e0 +#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__SHIFT 5 +static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT(uint32_t val) +{ + return ((val) << DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_OFLOW_SHIFT__MASK; +} +#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__MASK 0x0000001f +#define DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__SHIFT 0 +static inline uint32_t DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT(uint32_t val) +{ + return ((val) << DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__SHIFT) & DPU_LUT_LE_SLOPE_SHIFT_LUT_LE_SLOPE_UFLOW_SHIFT__MASK; +} + +#define REG_DPU_LUT_LO_SLOPE_SCALE 0x00004128 +#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__MASK 0xffff0000 +#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__SHIFT 16 +static inline uint32_t DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE(uint32_t val) +{ + return ((val) << DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__SHIFT) & DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_OFLOW_SCALE__MASK; +} +#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__MASK 0x0000ffff +#define DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__SHIFT 0 +static inline uint32_t DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE(uint32_t val) +{ + return ((val) << DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__SHIFT) & DPU_LUT_LO_SLOPE_SCALE_LUT_LO_SLOPE_UFLOW_SCALE__MASK; +} + +#define REG_DPU_LUT_LO_SLOPE_SHIFT 0x0000412c +#define DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__MASK 0xfffffc00 +#define DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__SHIFT 10 +static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_RESERVED_0__MASK; +} +#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__MASK 0x000003e0 +#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__SHIFT 5 +static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT(uint32_t val) +{ + return ((val) << DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_OFLOW_SHIFT__MASK; +} +#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__MASK 0x0000001f +#define DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__SHIFT 0 +static inline uint32_t DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT(uint32_t val) +{ + return ((val) << DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__SHIFT) & DPU_LUT_LO_SLOPE_SHIFT_LUT_LO_SLOPE_UFLOW_SHIFT__MASK; +} + +#define REG_DPU_RDMA_RDMA_S_STATUS 0x00005000 +#define DPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define DPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t DPU_RDMA_RDMA_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK 0x00030000 +#define DPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t DPU_RDMA_RDMA_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT) & DPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK; +} +#define DPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define DPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t DPU_RDMA_RDMA_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK; +} +#define DPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK 0x00000003 +#define DPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT) & DPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK; +} + +#define REG_DPU_RDMA_RDMA_S_POINTER 0x00005004 +#define DPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define DPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK 0x00010000 +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define DPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT) & DPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK; +} +#define DPU_RDMA_RDMA_S_POINTER_POINTER__MASK 0x00000001 +#define DPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT) & DPU_RDMA_RDMA_S_POINTER_POINTER__MASK; +} + +#define REG_DPU_RDMA_RDMA_OPERATION_ENABLE 0x00005008 +#define DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT) & DPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_DPU_RDMA_RDMA_DATA_CUBE_WIDTH 0x0000500c +#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__MASK 0xffffe000 +#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__SHIFT 13 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_WIDTH_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__MASK 0x00001fff +#define DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_WIDTH_WIDTH__MASK; +} + +#define REG_DPU_RDMA_RDMA_DATA_CUBE_HEIGHT 0x00005010 +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__MASK 0xe0000000 +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT 29 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__MASK 0x1fff0000 +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__SHIFT 16 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_EW_LINE_NOTCH_ADDR__MASK; +} +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__MASK 0x0000e000 +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT 13 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_RESERVED_1__MASK; +} +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__MASK 0x00001fff +#define DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_HEIGHT_HEIGHT__MASK; +} + +#define REG_DPU_RDMA_RDMA_DATA_CUBE_CHANNEL 0x00005014 +#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__MASK 0xffffe000 +#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT 13 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__MASK 0x00001fff +#define DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__SHIFT) & DPU_RDMA_RDMA_DATA_CUBE_CHANNEL_CHANNEL__MASK; +} + +#define REG_DPU_RDMA_RDMA_SRC_BASE_ADDR 0x00005018 +#define DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK 0xffffffff +#define DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK; +} + +#define REG_DPU_RDMA_RDMA_BRDMA_CFG 0x0000501c +#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__MASK 0xffffffe0 +#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__SHIFT 5 +static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__MASK 0x0000001e +#define DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_BRDMA_DATA_USE__MASK; +} +#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__MASK 0x00000001 +#define DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_BRDMA_CFG_RESERVED_1__MASK; +} + +#define REG_DPU_RDMA_RDMA_BS_BASE_ADDR 0x00005020 +#define DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__MASK 0xffffffff +#define DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_BS_BASE_ADDR_BS_BASE_ADDR__MASK; +} + +#define REG_DPU_RDMA_RDMA_NRDMA_CFG 0x00005028 +#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__MASK 0xffffffe0 +#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__SHIFT 5 +static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__MASK 0x0000001e +#define DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_NRDMA_DATA_USE__MASK; +} +#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__MASK 0x00000001 +#define DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_NRDMA_CFG_RESERVED_1__MASK; +} + +#define REG_DPU_RDMA_RDMA_BN_BASE_ADDR 0x0000502c +#define DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__MASK 0xffffffff +#define DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_BN_BASE_ADDR_BN_BASE_ADDR__MASK; +} + +#define REG_DPU_RDMA_RDMA_ERDMA_CFG 0x00005034 +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__MASK 0xc0000000 +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__SHIFT 30 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_MODE__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__MASK 0x20000000 +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__SHIFT 29 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_SURF_MODE__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__MASK 0x10000000 +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__SHIFT 28 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_NONALIGN__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__MASK 0x0ffffff0 +#define DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__MASK 0x0000000c +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__SHIFT 2 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DATA_SIZE__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__MASK 0x00000002 +#define DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_OV4K_BYPASS__MASK; +} +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__MASK 0x00000001 +#define DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__SHIFT) & DPU_RDMA_RDMA_ERDMA_CFG_ERDMA_DISABLE__MASK; +} + +#define REG_DPU_RDMA_RDMA_EW_BASE_ADDR 0x00005038 +#define DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__MASK 0xffffffff +#define DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__SHIFT) & DPU_RDMA_RDMA_EW_BASE_ADDR_EW_BASE_ADDR__MASK; +} + +#define REG_DPU_RDMA_RDMA_EW_SURF_STRIDE 0x00005040 +#define DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__MASK 0xfffffff0 +#define DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__SHIFT) & DPU_RDMA_RDMA_EW_SURF_STRIDE_EW_SURF_STRIDE__MASK; +} +#define DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__MASK 0x0000000f +#define DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_EW_SURF_STRIDE_RESERVED_0__MASK; +} + +#define REG_DPU_RDMA_RDMA_FEATURE_MODE_CFG 0x00005044 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__MASK 0xfffc0000 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__SHIFT 18 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__MASK 0x00038000 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__SHIFT 15 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_IN_PRECISION__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__MASK 0x00007800 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__SHIFT 11 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_BURST_LEN__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__MASK 0x00000700 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__SHIFT 8 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_COMB_USE__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__MASK 0x000000e0 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__SHIFT 5 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_PROC_PRECISION__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__MASK 0x00000010 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_DISABLE__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__MASK 0x00000008 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__SHIFT 3 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_MRDMA_FP16TOFP32_EN__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__MASK 0x00000006 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__SHIFT 1 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_CONV_MODE__MASK; +} +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__MASK 0x00000001 +#define DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__SHIFT) & DPU_RDMA_RDMA_FEATURE_MODE_CFG_FLYING_MODE__MASK; +} + +#define REG_DPU_RDMA_RDMA_SRC_DMA_CFG 0x00005048 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__MASK 0xfff80000 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__SHIFT 19 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_LINE_NOTCH_ADDR__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__MASK 0x0007c000 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__SHIFT 14 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__MASK 0x00002000 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__SHIFT 13 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_POOLING_METHOD__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__MASK 0x00001000 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__SHIFT 12 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_UNPOOLING_EN__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__MASK 0x00000e00 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__SHIFT 9 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_HEIGHT__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__MASK 0x000001c0 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__SHIFT 6 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_STRIDE_WIDTH__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__MASK 0x00000038 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__SHIFT 3 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_HEIGHT__MASK; +} +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__MASK 0x00000007 +#define DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__SHIFT) & DPU_RDMA_RDMA_SRC_DMA_CFG_KERNEL_WIDTH__MASK; +} + +#define REG_DPU_RDMA_RDMA_SURF_NOTCH 0x0000504c +#define DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__MASK 0xfffffff0 +#define DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__SHIFT) & DPU_RDMA_RDMA_SURF_NOTCH_SURF_NOTCH_ADDR__MASK; +} +#define DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__MASK 0x0000000f +#define DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_SURF_NOTCH_RESERVED_0__MASK; +} + +#define REG_DPU_RDMA_RDMA_PAD_CFG 0x00005064 +#define DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__MASK 0xffff0000 +#define DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__SHIFT 16 +static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_VALUE__MASK; +} +#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__MASK 0x0000ff80 +#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__SHIFT 7 +static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_RESERVED_0__MASK; +} +#define DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__MASK 0x00000070 +#define DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_TOP(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_TOP__MASK; +} +#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__MASK 0x00000008 +#define DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__SHIFT 3 +static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_RESERVED_1__MASK; +} +#define DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__MASK 0x00000007 +#define DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__SHIFT) & DPU_RDMA_RDMA_PAD_CFG_PAD_LEFT__MASK; +} + +#define REG_DPU_RDMA_RDMA_WEIGHT 0x00005068 +#define DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__MASK 0xff000000 +#define DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__SHIFT 24 +static inline uint32_t DPU_RDMA_RDMA_WEIGHT_E_WEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_E_WEIGHT__MASK; +} +#define DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__MASK 0x00ff0000 +#define DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__SHIFT 16 +static inline uint32_t DPU_RDMA_RDMA_WEIGHT_N_WEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_N_WEIGHT__MASK; +} +#define DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__MASK 0x0000ff00 +#define DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__SHIFT 8 +static inline uint32_t DPU_RDMA_RDMA_WEIGHT_B_WEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_B_WEIGHT__MASK; +} +#define DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__MASK 0x000000ff +#define DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_WEIGHT_M_WEIGHT(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__SHIFT) & DPU_RDMA_RDMA_WEIGHT_M_WEIGHT__MASK; +} + +#define REG_DPU_RDMA_RDMA_EW_SURF_NOTCH 0x0000506c +#define DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__MASK 0xfffffff0 +#define DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__SHIFT 4 +static inline uint32_t DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__SHIFT) & DPU_RDMA_RDMA_EW_SURF_NOTCH_EW_SURF_NOTCH__MASK; +} +#define DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__MASK 0x0000000f +#define DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__SHIFT 0 +static inline uint32_t DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0(uint32_t val) +{ + return ((val) << DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__SHIFT) & DPU_RDMA_RDMA_EW_SURF_NOTCH_RESERVED_0__MASK; +} + +#define REG_PPU_S_STATUS 0x00006000 +#define PPU_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define PPU_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t PPU_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_S_STATUS_RESERVED_0__SHIFT) & PPU_S_STATUS_RESERVED_0__MASK; +} +#define PPU_S_STATUS_STATUS_1__MASK 0x00030000 +#define PPU_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t PPU_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << PPU_S_STATUS_STATUS_1__SHIFT) & PPU_S_STATUS_STATUS_1__MASK; +} +#define PPU_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define PPU_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t PPU_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_S_STATUS_RESERVED_1__SHIFT) & PPU_S_STATUS_RESERVED_1__MASK; +} +#define PPU_S_STATUS_STATUS_0__MASK 0x00000003 +#define PPU_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t PPU_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << PPU_S_STATUS_STATUS_0__SHIFT) & PPU_S_STATUS_STATUS_0__MASK; +} + +#define REG_PPU_S_POINTER 0x00006004 +#define PPU_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define PPU_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t PPU_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_S_POINTER_RESERVED_0__SHIFT) & PPU_S_POINTER_RESERVED_0__MASK; +} +#define PPU_S_POINTER_EXECUTER__MASK 0x00010000 +#define PPU_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t PPU_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << PPU_S_POINTER_EXECUTER__SHIFT) & PPU_S_POINTER_EXECUTER__MASK; +} +#define PPU_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define PPU_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t PPU_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_S_POINTER_RESERVED_1__SHIFT) & PPU_S_POINTER_RESERVED_1__MASK; +} +#define PPU_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define PPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t PPU_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << PPU_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & PPU_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define PPU_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define PPU_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t PPU_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << PPU_S_POINTER_POINTER_PP_CLEAR__SHIFT) & PPU_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define PPU_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define PPU_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t PPU_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << PPU_S_POINTER_POINTER_PP_MODE__SHIFT) & PPU_S_POINTER_POINTER_PP_MODE__MASK; +} +#define PPU_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define PPU_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t PPU_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << PPU_S_POINTER_EXECUTER_PP_EN__SHIFT) & PPU_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define PPU_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define PPU_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t PPU_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << PPU_S_POINTER_POINTER_PP_EN__SHIFT) & PPU_S_POINTER_POINTER_PP_EN__MASK; +} +#define PPU_S_POINTER_POINTER__MASK 0x00000001 +#define PPU_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t PPU_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << PPU_S_POINTER_POINTER__SHIFT) & PPU_S_POINTER_POINTER__MASK; +} + +#define REG_PPU_OPERATION_ENABLE 0x00006008 +#define PPU_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define PPU_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t PPU_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_OPERATION_ENABLE_RESERVED_0__SHIFT) & PPU_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define PPU_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define PPU_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t PPU_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << PPU_OPERATION_ENABLE_OP_EN__SHIFT) & PPU_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_PPU_DATA_CUBE_IN_WIDTH 0x0000600c +#define PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_IN_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_WIDTH_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK 0x00001fff +#define PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT) & PPU_DATA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK; +} + +#define REG_PPU_DATA_CUBE_IN_HEIGHT 0x00006010 +#define PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_HEIGHT_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK 0x00001fff +#define PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT) & PPU_DATA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK; +} + +#define REG_PPU_DATA_CUBE_IN_CHANNEL 0x00006014 +#define PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT) & PPU_DATA_CUBE_IN_CHANNEL_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK 0x00001fff +#define PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT) & PPU_DATA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK; +} + +#define REG_PPU_DATA_CUBE_OUT_WIDTH 0x00006018 +#define PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_WIDTH_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__MASK 0x00001fff +#define PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__SHIFT) & PPU_DATA_CUBE_OUT_WIDTH_CUBE_OUT_WIDTH__MASK; +} + +#define REG_PPU_DATA_CUBE_OUT_HEIGHT 0x0000601c +#define PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_HEIGHT_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__MASK 0x00001fff +#define PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__SHIFT) & PPU_DATA_CUBE_OUT_HEIGHT_CUBE_OUT_HEIGHT__MASK; +} + +#define REG_PPU_DATA_CUBE_OUT_CHANNEL 0x00006020 +#define PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__MASK 0xffffe000 +#define PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__SHIFT) & PPU_DATA_CUBE_OUT_CHANNEL_RESERVED_0__MASK; +} +#define PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__MASK 0x00001fff +#define PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__SHIFT 0 +static inline uint32_t PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL(uint32_t val) +{ + return ((val) << PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__SHIFT) & PPU_DATA_CUBE_OUT_CHANNEL_CUBE_OUT_CHANNEL__MASK; +} + +#define REG_PPU_OPERATION_MODE_CFG 0x00006024 +#define PPU_OPERATION_MODE_CFG_RESERVED_0__MASK 0x80000000 +#define PPU_OPERATION_MODE_CFG_RESERVED_0__SHIFT 31 +static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_0__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_0__MASK; +} +#define PPU_OPERATION_MODE_CFG_INDEX_EN__MASK 0x40000000 +#define PPU_OPERATION_MODE_CFG_INDEX_EN__SHIFT 30 +static inline uint32_t PPU_OPERATION_MODE_CFG_INDEX_EN(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_INDEX_EN__SHIFT) & PPU_OPERATION_MODE_CFG_INDEX_EN__MASK; +} +#define PPU_OPERATION_MODE_CFG_RESERVED_1__MASK 0x20000000 +#define PPU_OPERATION_MODE_CFG_RESERVED_1__SHIFT 29 +static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_1__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_1__MASK; +} +#define PPU_OPERATION_MODE_CFG_NOTCH_ADDR__MASK 0x1fff0000 +#define PPU_OPERATION_MODE_CFG_NOTCH_ADDR__SHIFT 16 +static inline uint32_t PPU_OPERATION_MODE_CFG_NOTCH_ADDR(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_NOTCH_ADDR__SHIFT) & PPU_OPERATION_MODE_CFG_NOTCH_ADDR__MASK; +} +#define PPU_OPERATION_MODE_CFG_RESERVED_2__MASK 0x0000ff00 +#define PPU_OPERATION_MODE_CFG_RESERVED_2__SHIFT 8 +static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_2__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_2__MASK; +} +#define PPU_OPERATION_MODE_CFG_USE_CNT__MASK 0x000000e0 +#define PPU_OPERATION_MODE_CFG_USE_CNT__SHIFT 5 +static inline uint32_t PPU_OPERATION_MODE_CFG_USE_CNT(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_USE_CNT__SHIFT) & PPU_OPERATION_MODE_CFG_USE_CNT__MASK; +} +#define PPU_OPERATION_MODE_CFG_FLYING_MODE__MASK 0x00000010 +#define PPU_OPERATION_MODE_CFG_FLYING_MODE__SHIFT 4 +static inline uint32_t PPU_OPERATION_MODE_CFG_FLYING_MODE(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_FLYING_MODE__SHIFT) & PPU_OPERATION_MODE_CFG_FLYING_MODE__MASK; +} +#define PPU_OPERATION_MODE_CFG_RESERVED_3__MASK 0x0000000c +#define PPU_OPERATION_MODE_CFG_RESERVED_3__SHIFT 2 +static inline uint32_t PPU_OPERATION_MODE_CFG_RESERVED_3(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_RESERVED_3__SHIFT) & PPU_OPERATION_MODE_CFG_RESERVED_3__MASK; +} +#define PPU_OPERATION_MODE_CFG_POOLING_METHOD__MASK 0x00000003 +#define PPU_OPERATION_MODE_CFG_POOLING_METHOD__SHIFT 0 +static inline uint32_t PPU_OPERATION_MODE_CFG_POOLING_METHOD(uint32_t val) +{ + return ((val) << PPU_OPERATION_MODE_CFG_POOLING_METHOD__SHIFT) & PPU_OPERATION_MODE_CFG_POOLING_METHOD__MASK; +} + +#define REG_PPU_POOLING_KERNEL_CFG 0x00006034 +#define PPU_POOLING_KERNEL_CFG_RESERVED_0__MASK 0xff000000 +#define PPU_POOLING_KERNEL_CFG_RESERVED_0__SHIFT 24 +static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_0__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_0__MASK; +} +#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__MASK 0x00f00000 +#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__SHIFT 20 +static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_HEIGHT__MASK; +} +#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__MASK 0x000f0000 +#define PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__SHIFT 16 +static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_STRIDE_WIDTH__MASK; +} +#define PPU_POOLING_KERNEL_CFG_RESERVED_1__MASK 0x0000f000 +#define PPU_POOLING_KERNEL_CFG_RESERVED_1__SHIFT 12 +static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_1__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_1__MASK; +} +#define PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__MASK 0x00000f00 +#define PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__SHIFT 8 +static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_HEIGHT__MASK; +} +#define PPU_POOLING_KERNEL_CFG_RESERVED_2__MASK 0x000000f0 +#define PPU_POOLING_KERNEL_CFG_RESERVED_2__SHIFT 4 +static inline uint32_t PPU_POOLING_KERNEL_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_RESERVED_2__SHIFT) & PPU_POOLING_KERNEL_CFG_RESERVED_2__MASK; +} +#define PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__MASK 0x0000000f +#define PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__SHIFT 0 +static inline uint32_t PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH(uint32_t val) +{ + return ((val) << PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__SHIFT) & PPU_POOLING_KERNEL_CFG_KERNEL_WIDTH__MASK; +} + +#define REG_PPU_RECIP_KERNEL_WIDTH 0x00006038 +#define PPU_RECIP_KERNEL_WIDTH_RESERVED_0__MASK 0xfffe0000 +#define PPU_RECIP_KERNEL_WIDTH_RESERVED_0__SHIFT 17 +static inline uint32_t PPU_RECIP_KERNEL_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RECIP_KERNEL_WIDTH_RESERVED_0__SHIFT) & PPU_RECIP_KERNEL_WIDTH_RESERVED_0__MASK; +} +#define PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__MASK 0x0001ffff +#define PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__SHIFT 0 +static inline uint32_t PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH(uint32_t val) +{ + return ((val) << PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__SHIFT) & PPU_RECIP_KERNEL_WIDTH_RECIP_KERNEL_WIDTH__MASK; +} + +#define REG_PPU_RECIP_KERNEL_HEIGHT 0x0000603c +#define PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__MASK 0xfffe0000 +#define PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__SHIFT 17 +static inline uint32_t PPU_RECIP_KERNEL_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__SHIFT) & PPU_RECIP_KERNEL_HEIGHT_RESERVED_0__MASK; +} +#define PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__MASK 0x0001ffff +#define PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__SHIFT 0 +static inline uint32_t PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT(uint32_t val) +{ + return ((val) << PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__SHIFT) & PPU_RECIP_KERNEL_HEIGHT_RECIP_KERNEL_HEIGHT__MASK; +} + +#define REG_PPU_POOLING_PADDING_CFG 0x00006040 +#define PPU_POOLING_PADDING_CFG_RESERVED_0__MASK 0xffff8000 +#define PPU_POOLING_PADDING_CFG_RESERVED_0__SHIFT 15 +static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_0__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_0__MASK; +} +#define PPU_POOLING_PADDING_CFG_PAD_BOTTOM__MASK 0x00007000 +#define PPU_POOLING_PADDING_CFG_PAD_BOTTOM__SHIFT 12 +static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_BOTTOM(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_PAD_BOTTOM__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_BOTTOM__MASK; +} +#define PPU_POOLING_PADDING_CFG_RESERVED_1__MASK 0x00000800 +#define PPU_POOLING_PADDING_CFG_RESERVED_1__SHIFT 11 +static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_1__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_1__MASK; +} +#define PPU_POOLING_PADDING_CFG_PAD_RIGHT__MASK 0x00000700 +#define PPU_POOLING_PADDING_CFG_PAD_RIGHT__SHIFT 8 +static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_RIGHT(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_PAD_RIGHT__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_RIGHT__MASK; +} +#define PPU_POOLING_PADDING_CFG_RESERVED_2__MASK 0x00000080 +#define PPU_POOLING_PADDING_CFG_RESERVED_2__SHIFT 7 +static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_2(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_2__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_2__MASK; +} +#define PPU_POOLING_PADDING_CFG_PAD_TOP__MASK 0x00000070 +#define PPU_POOLING_PADDING_CFG_PAD_TOP__SHIFT 4 +static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_TOP(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_PAD_TOP__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_TOP__MASK; +} +#define PPU_POOLING_PADDING_CFG_RESERVED_3__MASK 0x00000008 +#define PPU_POOLING_PADDING_CFG_RESERVED_3__SHIFT 3 +static inline uint32_t PPU_POOLING_PADDING_CFG_RESERVED_3(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_RESERVED_3__SHIFT) & PPU_POOLING_PADDING_CFG_RESERVED_3__MASK; +} +#define PPU_POOLING_PADDING_CFG_PAD_LEFT__MASK 0x00000007 +#define PPU_POOLING_PADDING_CFG_PAD_LEFT__SHIFT 0 +static inline uint32_t PPU_POOLING_PADDING_CFG_PAD_LEFT(uint32_t val) +{ + return ((val) << PPU_POOLING_PADDING_CFG_PAD_LEFT__SHIFT) & PPU_POOLING_PADDING_CFG_PAD_LEFT__MASK; +} + +#define REG_PPU_PADDING_VALUE_1_CFG 0x00006044 +#define PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__MASK 0xffffffff +#define PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__SHIFT 0 +static inline uint32_t PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0(uint32_t val) +{ + return ((val) << PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__SHIFT) & PPU_PADDING_VALUE_1_CFG_PAD_VALUE_0__MASK; +} + +#define REG_PPU_PADDING_VALUE_2_CFG 0x00006048 +#define PPU_PADDING_VALUE_2_CFG_RESERVED_0__MASK 0xfffffff8 +#define PPU_PADDING_VALUE_2_CFG_RESERVED_0__SHIFT 3 +static inline uint32_t PPU_PADDING_VALUE_2_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_PADDING_VALUE_2_CFG_RESERVED_0__SHIFT) & PPU_PADDING_VALUE_2_CFG_RESERVED_0__MASK; +} +#define PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__MASK 0x00000007 +#define PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__SHIFT 0 +static inline uint32_t PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1(uint32_t val) +{ + return ((val) << PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__SHIFT) & PPU_PADDING_VALUE_2_CFG_PAD_VALUE_1__MASK; +} + +#define REG_PPU_DST_BASE_ADDR 0x00006070 +#define PPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK 0xfffffff0 +#define PPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT 4 +static inline uint32_t PPU_DST_BASE_ADDR_DST_BASE_ADDR(uint32_t val) +{ + return ((val) << PPU_DST_BASE_ADDR_DST_BASE_ADDR__SHIFT) & PPU_DST_BASE_ADDR_DST_BASE_ADDR__MASK; +} +#define PPU_DST_BASE_ADDR_RESERVED_0__MASK 0x0000000f +#define PPU_DST_BASE_ADDR_RESERVED_0__SHIFT 0 +static inline uint32_t PPU_DST_BASE_ADDR_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DST_BASE_ADDR_RESERVED_0__SHIFT) & PPU_DST_BASE_ADDR_RESERVED_0__MASK; +} + +#define REG_PPU_DST_SURF_STRIDE 0x0000607c +#define PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK 0xfffffff0 +#define PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT 4 +static inline uint32_t PPU_DST_SURF_STRIDE_DST_SURF_STRIDE(uint32_t val) +{ + return ((val) << PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__SHIFT) & PPU_DST_SURF_STRIDE_DST_SURF_STRIDE__MASK; +} +#define PPU_DST_SURF_STRIDE_RESERVED_0__MASK 0x0000000f +#define PPU_DST_SURF_STRIDE_RESERVED_0__SHIFT 0 +static inline uint32_t PPU_DST_SURF_STRIDE_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_DST_SURF_STRIDE_RESERVED_0__SHIFT) & PPU_DST_SURF_STRIDE_RESERVED_0__MASK; +} + +#define REG_PPU_DATA_FORMAT 0x00006084 +#define PPU_DATA_FORMAT_INDEX_ADD__MASK 0xfffffff0 +#define PPU_DATA_FORMAT_INDEX_ADD__SHIFT 4 +static inline uint32_t PPU_DATA_FORMAT_INDEX_ADD(uint32_t val) +{ + return ((val) << PPU_DATA_FORMAT_INDEX_ADD__SHIFT) & PPU_DATA_FORMAT_INDEX_ADD__MASK; +} +#define PPU_DATA_FORMAT_DPU_FLYIN__MASK 0x00000008 +#define PPU_DATA_FORMAT_DPU_FLYIN__SHIFT 3 +static inline uint32_t PPU_DATA_FORMAT_DPU_FLYIN(uint32_t val) +{ + return ((val) << PPU_DATA_FORMAT_DPU_FLYIN__SHIFT) & PPU_DATA_FORMAT_DPU_FLYIN__MASK; +} +#define PPU_DATA_FORMAT_PROC_PRECISION__MASK 0x00000007 +#define PPU_DATA_FORMAT_PROC_PRECISION__SHIFT 0 +static inline uint32_t PPU_DATA_FORMAT_PROC_PRECISION(uint32_t val) +{ + return ((val) << PPU_DATA_FORMAT_PROC_PRECISION__SHIFT) & PPU_DATA_FORMAT_PROC_PRECISION__MASK; +} + +#define REG_PPU_MISC_CTRL 0x000060dc +#define PPU_MISC_CTRL_SURF_LEN__MASK 0xffff0000 +#define PPU_MISC_CTRL_SURF_LEN__SHIFT 16 +static inline uint32_t PPU_MISC_CTRL_SURF_LEN(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_SURF_LEN__SHIFT) & PPU_MISC_CTRL_SURF_LEN__MASK; +} +#define PPU_MISC_CTRL_RESERVED_0__MASK 0x0000fe00 +#define PPU_MISC_CTRL_RESERVED_0__SHIFT 9 +static inline uint32_t PPU_MISC_CTRL_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_RESERVED_0__SHIFT) & PPU_MISC_CTRL_RESERVED_0__MASK; +} +#define PPU_MISC_CTRL_MC_SURF_OUT__MASK 0x00000100 +#define PPU_MISC_CTRL_MC_SURF_OUT__SHIFT 8 +static inline uint32_t PPU_MISC_CTRL_MC_SURF_OUT(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_MC_SURF_OUT__SHIFT) & PPU_MISC_CTRL_MC_SURF_OUT__MASK; +} +#define PPU_MISC_CTRL_NONALIGN__MASK 0x00000080 +#define PPU_MISC_CTRL_NONALIGN__SHIFT 7 +static inline uint32_t PPU_MISC_CTRL_NONALIGN(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_NONALIGN__SHIFT) & PPU_MISC_CTRL_NONALIGN__MASK; +} +#define PPU_MISC_CTRL_RESERVED_1__MASK 0x00000070 +#define PPU_MISC_CTRL_RESERVED_1__SHIFT 4 +static inline uint32_t PPU_MISC_CTRL_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_RESERVED_1__SHIFT) & PPU_MISC_CTRL_RESERVED_1__MASK; +} +#define PPU_MISC_CTRL_BURST_LEN__MASK 0x0000000f +#define PPU_MISC_CTRL_BURST_LEN__SHIFT 0 +static inline uint32_t PPU_MISC_CTRL_BURST_LEN(uint32_t val) +{ + return ((val) << PPU_MISC_CTRL_BURST_LEN__SHIFT) & PPU_MISC_CTRL_BURST_LEN__MASK; +} + +#define REG_PPU_RDMA_RDMA_S_STATUS 0x00007000 +#define PPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK 0xfffc0000 +#define PPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT 18 +static inline uint32_t PPU_RDMA_RDMA_S_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_STATUS_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_S_STATUS_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK 0x00030000 +#define PPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT 16 +static inline uint32_t PPU_RDMA_RDMA_S_STATUS_STATUS_1(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_STATUS_STATUS_1__SHIFT) & PPU_RDMA_RDMA_S_STATUS_STATUS_1__MASK; +} +#define PPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK 0x0000fffc +#define PPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT 2 +static inline uint32_t PPU_RDMA_RDMA_S_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_STATUS_RESERVED_1__SHIFT) & PPU_RDMA_RDMA_S_STATUS_RESERVED_1__MASK; +} +#define PPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK 0x00000003 +#define PPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_S_STATUS_STATUS_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_STATUS_STATUS_0__SHIFT) & PPU_RDMA_RDMA_S_STATUS_STATUS_0__MASK; +} + +#define REG_PPU_RDMA_RDMA_S_POINTER 0x00007004 +#define PPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK 0xfffe0000 +#define PPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT 17 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_S_POINTER_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK 0x00010000 +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT 16 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK 0x0000ffc0 +#define PPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT 6 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_RESERVED_1(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_RESERVED_1__SHIFT) & PPU_RDMA_RDMA_S_POINTER_RESERVED_1__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK 0x00000020 +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT 5 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_CLEAR__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK 0x00000010 +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT 4 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_CLEAR__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK 0x00000008 +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT 3 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_MODE__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK 0x00000004 +#define PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT 2 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__SHIFT) & PPU_RDMA_RDMA_S_POINTER_EXECUTER_PP_EN__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK 0x00000002 +#define PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT 1 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER_PP_EN__MASK; +} +#define PPU_RDMA_RDMA_S_POINTER_POINTER__MASK 0x00000001 +#define PPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_S_POINTER_POINTER(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_S_POINTER_POINTER__SHIFT) & PPU_RDMA_RDMA_S_POINTER_POINTER__MASK; +} + +#define REG_PPU_RDMA_RDMA_OPERATION_ENABLE 0x00007008 +#define PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK 0xfffffffe +#define PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT 1 +static inline uint32_t PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK 0x00000001 +#define PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__SHIFT) & PPU_RDMA_RDMA_OPERATION_ENABLE_OP_EN__MASK; +} + +#define REG_PPU_RDMA_RDMA_CUBE_IN_WIDTH 0x0000700c +#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__MASK 0xffffe000 +#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_WIDTH_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK 0x00001fff +#define PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_WIDTH_CUBE_IN_WIDTH__MASK; +} + +#define REG_PPU_RDMA_RDMA_CUBE_IN_HEIGHT 0x00007010 +#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__MASK 0xffffe000 +#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_HEIGHT_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK 0x00001fff +#define PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_HEIGHT_CUBE_IN_HEIGHT__MASK; +} + +#define REG_PPU_RDMA_RDMA_CUBE_IN_CHANNEL 0x00007014 +#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__MASK 0xffffe000 +#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT 13 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_CHANNEL_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK 0x00001fff +#define PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__SHIFT) & PPU_RDMA_RDMA_CUBE_IN_CHANNEL_CUBE_IN_CHANNEL__MASK; +} + +#define REG_PPU_RDMA_RDMA_SRC_BASE_ADDR 0x0000701c +#define PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK 0xffffffff +#define PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__SHIFT) & PPU_RDMA_RDMA_SRC_BASE_ADDR_SRC_BASE_ADDR__MASK; +} + +#define REG_PPU_RDMA_RDMA_SRC_LINE_STRIDE 0x00007024 +#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__MASK 0xfffffff0 +#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__SHIFT 4 +static inline uint32_t PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__SHIFT) & PPU_RDMA_RDMA_SRC_LINE_STRIDE_SRC_LINE_STRIDE__MASK; +} +#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__MASK 0x0000000f +#define PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_SRC_LINE_STRIDE_RESERVED_0__MASK; +} + +#define REG_PPU_RDMA_RDMA_SRC_SURF_STRIDE 0x00007028 +#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__MASK 0xfffffff0 +#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__SHIFT 4 +static inline uint32_t PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__SHIFT) & PPU_RDMA_RDMA_SRC_SURF_STRIDE_SRC_SURF_STRIDE__MASK; +} +#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__MASK 0x0000000f +#define PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_SRC_SURF_STRIDE_RESERVED_0__MASK; +} + +#define REG_PPU_RDMA_RDMA_DATA_FORMAT 0x00007030 +#define PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__MASK 0xfffffffc +#define PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__SHIFT 2 +static inline uint32_t PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__SHIFT) & PPU_RDMA_RDMA_DATA_FORMAT_RESERVED_0__MASK; +} +#define PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__MASK 0x00000003 +#define PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__SHIFT 0 +static inline uint32_t PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION(uint32_t val) +{ + return ((val) << PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__SHIFT) & PPU_RDMA_RDMA_DATA_FORMAT_IN_PRECISION__MASK; +} + +#define REG_DDMA_CFG_OUTSTANDING 0x00008000 +#define DDMA_CFG_OUTSTANDING_RESERVED_0__MASK 0xffff0000 +#define DDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT 16 +static inline uint32_t DDMA_CFG_OUTSTANDING_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT) & DDMA_CFG_OUTSTANDING_RESERVED_0__MASK; +} +#define DDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK 0x0000ff00 +#define DDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT 8 +static inline uint32_t DDMA_CFG_OUTSTANDING_WR_OS_CNT(uint32_t val) +{ + return ((val) << DDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT) & DDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK; +} +#define DDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK 0x000000ff +#define DDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT 0 +static inline uint32_t DDMA_CFG_OUTSTANDING_RD_OS_CNT(uint32_t val) +{ + return ((val) << DDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT) & DDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK; +} + +#define REG_DDMA_RD_WEIGHT_0 0x00008004 +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK 0xff000000 +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT 24 +static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK; +} +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK 0x00ff0000 +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT 16 +static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK; +} +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK 0x0000ff00 +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT 8 +static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK; +} +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK 0x000000ff +#define DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT 0 +static inline uint32_t DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT) & DDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK; +} + +#define REG_DDMA_WR_WEIGHT_0 0x00008008 +#define DDMA_WR_WEIGHT_0_RESERVED_0__MASK 0xffff0000 +#define DDMA_WR_WEIGHT_0_RESERVED_0__SHIFT 16 +static inline uint32_t DDMA_WR_WEIGHT_0_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_WR_WEIGHT_0_RESERVED_0__SHIFT) & DDMA_WR_WEIGHT_0_RESERVED_0__MASK; +} +#define DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK 0x0000ff00 +#define DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT 8 +static inline uint32_t DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP(uint32_t val) +{ + return ((val) << DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT) & DDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK; +} +#define DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK 0x000000ff +#define DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT 0 +static inline uint32_t DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU(uint32_t val) +{ + return ((val) << DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT) & DDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK; +} + +#define REG_DDMA_CFG_ID_ERROR 0x0000800c +#define DDMA_CFG_ID_ERROR_RESERVED_0__MASK 0xfffffc00 +#define DDMA_CFG_ID_ERROR_RESERVED_0__SHIFT 10 +static inline uint32_t DDMA_CFG_ID_ERROR_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_ID_ERROR_RESERVED_0__SHIFT) & DDMA_CFG_ID_ERROR_RESERVED_0__MASK; +} +#define DDMA_CFG_ID_ERROR_WR_RESP_ID__MASK 0x000003c0 +#define DDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT 6 +static inline uint32_t DDMA_CFG_ID_ERROR_WR_RESP_ID(uint32_t val) +{ + return ((val) << DDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT) & DDMA_CFG_ID_ERROR_WR_RESP_ID__MASK; +} +#define DDMA_CFG_ID_ERROR_RESERVED_1__MASK 0x00000020 +#define DDMA_CFG_ID_ERROR_RESERVED_1__SHIFT 5 +static inline uint32_t DDMA_CFG_ID_ERROR_RESERVED_1(uint32_t val) +{ + return ((val) << DDMA_CFG_ID_ERROR_RESERVED_1__SHIFT) & DDMA_CFG_ID_ERROR_RESERVED_1__MASK; +} +#define DDMA_CFG_ID_ERROR_RD_RESP_ID__MASK 0x0000001f +#define DDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT 0 +static inline uint32_t DDMA_CFG_ID_ERROR_RD_RESP_ID(uint32_t val) +{ + return ((val) << DDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT) & DDMA_CFG_ID_ERROR_RD_RESP_ID__MASK; +} + +#define REG_DDMA_RD_WEIGHT_1 0x00008010 +#define DDMA_RD_WEIGHT_1_RESERVED_0__MASK 0xffffff00 +#define DDMA_RD_WEIGHT_1_RESERVED_0__SHIFT 8 +static inline uint32_t DDMA_RD_WEIGHT_1_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_1_RESERVED_0__SHIFT) & DDMA_RD_WEIGHT_1_RESERVED_0__MASK; +} +#define DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK 0x000000ff +#define DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT 0 +static inline uint32_t DDMA_RD_WEIGHT_1_RD_WEIGHT_PC(uint32_t val) +{ + return ((val) << DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT) & DDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK; +} + +#define REG_DDMA_CFG_DMA_FIFO_CLR 0x00008014 +#define DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK 0xfffffffe +#define DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT 1 +static inline uint32_t DDMA_CFG_DMA_FIFO_CLR_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT) & DDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK; +} +#define DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK 0x00000001 +#define DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT) & DDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK; +} + +#define REG_DDMA_CFG_DMA_ARB 0x00008018 +#define DDMA_CFG_DMA_ARB_RESERVED_0__MASK 0xfffffc00 +#define DDMA_CFG_DMA_ARB_RESERVED_0__SHIFT 10 +static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_RESERVED_0__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_0__MASK; +} +#define DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK 0x00000200 +#define DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT 9 +static inline uint32_t DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT) & DDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK; +} +#define DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK 0x00000100 +#define DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT 8 +static inline uint32_t DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT) & DDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK; +} +#define DDMA_CFG_DMA_ARB_RESERVED_1__MASK 0x00000080 +#define DDMA_CFG_DMA_ARB_RESERVED_1__SHIFT 7 +static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_1(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_RESERVED_1__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_1__MASK; +} +#define DDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK 0x00000070 +#define DDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT 4 +static inline uint32_t DDMA_CFG_DMA_ARB_WR_FIX_ARB(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT) & DDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK; +} +#define DDMA_CFG_DMA_ARB_RESERVED_2__MASK 0x00000008 +#define DDMA_CFG_DMA_ARB_RESERVED_2__SHIFT 3 +static inline uint32_t DDMA_CFG_DMA_ARB_RESERVED_2(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_RESERVED_2__SHIFT) & DDMA_CFG_DMA_ARB_RESERVED_2__MASK; +} +#define DDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK 0x00000007 +#define DDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_ARB_RD_FIX_ARB(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT) & DDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK; +} + +#define REG_DDMA_CFG_DMA_RD_QOS 0x00008020 +#define DDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK 0xfffffc00 +#define DDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT 10 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT) & DDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK; +} +#define DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK 0x00000300 +#define DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT 8 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_PC_QOS(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK; +} +#define DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK 0x000000c0 +#define DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT 6 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK; +} +#define DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK 0x00000030 +#define DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT 4 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK; +} +#define DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK 0x0000000c +#define DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT 2 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK; +} +#define DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK 0x00000003 +#define DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT) & DDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK; +} + +#define REG_DDMA_CFG_DMA_RD_CFG 0x00008024 +#define DDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK 0xffffe000 +#define DDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT 13 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT) & DDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK; +} +#define DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK 0x00001000 +#define DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT 12 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARLOCK(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK; +} +#define DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK 0x00000f00 +#define DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT 8 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARCACHE(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK; +} +#define DDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK 0x000000e0 +#define DDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT 5 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARPROT(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK; +} +#define DDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK 0x00000018 +#define DDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT 3 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARBURST(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK; +} +#define DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK 0x00000007 +#define DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_RD_CFG_RD_ARSIZE(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT) & DDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK; +} + +#define REG_DDMA_CFG_DMA_WR_CFG 0x00008028 +#define DDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK 0xffffe000 +#define DDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT 13 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT) & DDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK; +} +#define DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK 0x00001000 +#define DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT 12 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWLOCK(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK; +} +#define DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK 0x00000f00 +#define DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT 8 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWCACHE(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK; +} +#define DDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK 0x000000e0 +#define DDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT 5 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWPROT(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK; +} +#define DDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK 0x00000018 +#define DDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT 3 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWBURST(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK; +} +#define DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK 0x00000007 +#define DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_WR_CFG_WR_AWSIZE(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT) & DDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK; +} + +#define REG_DDMA_CFG_DMA_WSTRB 0x0000802c +#define DDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK 0xffffffff +#define DDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT 0 +static inline uint32_t DDMA_CFG_DMA_WSTRB_WR_WSTRB(uint32_t val) +{ + return ((val) << DDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT) & DDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK; +} + +#define REG_DDMA_CFG_STATUS 0x00008030 +#define DDMA_CFG_STATUS_RESERVED_0__MASK 0xfffffe00 +#define DDMA_CFG_STATUS_RESERVED_0__SHIFT 9 +static inline uint32_t DDMA_CFG_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << DDMA_CFG_STATUS_RESERVED_0__SHIFT) & DDMA_CFG_STATUS_RESERVED_0__MASK; +} +#define DDMA_CFG_STATUS_IDEL__MASK 0x00000100 +#define DDMA_CFG_STATUS_IDEL__SHIFT 8 +static inline uint32_t DDMA_CFG_STATUS_IDEL(uint32_t val) +{ + return ((val) << DDMA_CFG_STATUS_IDEL__SHIFT) & DDMA_CFG_STATUS_IDEL__MASK; +} +#define DDMA_CFG_STATUS_RESERVED_1__MASK 0x000000ff +#define DDMA_CFG_STATUS_RESERVED_1__SHIFT 0 +static inline uint32_t DDMA_CFG_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << DDMA_CFG_STATUS_RESERVED_1__SHIFT) & DDMA_CFG_STATUS_RESERVED_1__MASK; +} + +#define REG_SDMA_CFG_OUTSTANDING 0x00009000 +#define SDMA_CFG_OUTSTANDING_RESERVED_0__MASK 0xffff0000 +#define SDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT 16 +static inline uint32_t SDMA_CFG_OUTSTANDING_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_OUTSTANDING_RESERVED_0__SHIFT) & SDMA_CFG_OUTSTANDING_RESERVED_0__MASK; +} +#define SDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK 0x0000ff00 +#define SDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT 8 +static inline uint32_t SDMA_CFG_OUTSTANDING_WR_OS_CNT(uint32_t val) +{ + return ((val) << SDMA_CFG_OUTSTANDING_WR_OS_CNT__SHIFT) & SDMA_CFG_OUTSTANDING_WR_OS_CNT__MASK; +} +#define SDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK 0x000000ff +#define SDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT 0 +static inline uint32_t SDMA_CFG_OUTSTANDING_RD_OS_CNT(uint32_t val) +{ + return ((val) << SDMA_CFG_OUTSTANDING_RD_OS_CNT__SHIFT) & SDMA_CFG_OUTSTANDING_RD_OS_CNT__MASK; +} + +#define REG_SDMA_RD_WEIGHT_0 0x00009004 +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK 0xff000000 +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT 24 +static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_PDP__MASK; +} +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK 0x00ff0000 +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT 16 +static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_DPU__MASK; +} +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK 0x0000ff00 +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT 8 +static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_KERNEL__MASK; +} +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK 0x000000ff +#define SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT 0 +static inline uint32_t SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__SHIFT) & SDMA_RD_WEIGHT_0_RD_WEIGHT_FEATURE__MASK; +} + +#define REG_SDMA_WR_WEIGHT_0 0x00009008 +#define SDMA_WR_WEIGHT_0_RESERVED_0__MASK 0xffff0000 +#define SDMA_WR_WEIGHT_0_RESERVED_0__SHIFT 16 +static inline uint32_t SDMA_WR_WEIGHT_0_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_WR_WEIGHT_0_RESERVED_0__SHIFT) & SDMA_WR_WEIGHT_0_RESERVED_0__MASK; +} +#define SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK 0x0000ff00 +#define SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT 8 +static inline uint32_t SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP(uint32_t val) +{ + return ((val) << SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__SHIFT) & SDMA_WR_WEIGHT_0_WR_WEIGHT_PDP__MASK; +} +#define SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK 0x000000ff +#define SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT 0 +static inline uint32_t SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU(uint32_t val) +{ + return ((val) << SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__SHIFT) & SDMA_WR_WEIGHT_0_WR_WEIGHT_DPU__MASK; +} + +#define REG_SDMA_CFG_ID_ERROR 0x0000900c +#define SDMA_CFG_ID_ERROR_RESERVED_0__MASK 0xfffffc00 +#define SDMA_CFG_ID_ERROR_RESERVED_0__SHIFT 10 +static inline uint32_t SDMA_CFG_ID_ERROR_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_ID_ERROR_RESERVED_0__SHIFT) & SDMA_CFG_ID_ERROR_RESERVED_0__MASK; +} +#define SDMA_CFG_ID_ERROR_WR_RESP_ID__MASK 0x000003c0 +#define SDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT 6 +static inline uint32_t SDMA_CFG_ID_ERROR_WR_RESP_ID(uint32_t val) +{ + return ((val) << SDMA_CFG_ID_ERROR_WR_RESP_ID__SHIFT) & SDMA_CFG_ID_ERROR_WR_RESP_ID__MASK; +} +#define SDMA_CFG_ID_ERROR_RESERVED_1__MASK 0x00000020 +#define SDMA_CFG_ID_ERROR_RESERVED_1__SHIFT 5 +static inline uint32_t SDMA_CFG_ID_ERROR_RESERVED_1(uint32_t val) +{ + return ((val) << SDMA_CFG_ID_ERROR_RESERVED_1__SHIFT) & SDMA_CFG_ID_ERROR_RESERVED_1__MASK; +} +#define SDMA_CFG_ID_ERROR_RD_RESP_ID__MASK 0x0000001f +#define SDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT 0 +static inline uint32_t SDMA_CFG_ID_ERROR_RD_RESP_ID(uint32_t val) +{ + return ((val) << SDMA_CFG_ID_ERROR_RD_RESP_ID__SHIFT) & SDMA_CFG_ID_ERROR_RD_RESP_ID__MASK; +} + +#define REG_SDMA_RD_WEIGHT_1 0x00009010 +#define SDMA_RD_WEIGHT_1_RESERVED_0__MASK 0xffffff00 +#define SDMA_RD_WEIGHT_1_RESERVED_0__SHIFT 8 +static inline uint32_t SDMA_RD_WEIGHT_1_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_1_RESERVED_0__SHIFT) & SDMA_RD_WEIGHT_1_RESERVED_0__MASK; +} +#define SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK 0x000000ff +#define SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT 0 +static inline uint32_t SDMA_RD_WEIGHT_1_RD_WEIGHT_PC(uint32_t val) +{ + return ((val) << SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__SHIFT) & SDMA_RD_WEIGHT_1_RD_WEIGHT_PC__MASK; +} + +#define REG_SDMA_CFG_DMA_FIFO_CLR 0x00009014 +#define SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK 0xfffffffe +#define SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT 1 +static inline uint32_t SDMA_CFG_DMA_FIFO_CLR_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__SHIFT) & SDMA_CFG_DMA_FIFO_CLR_RESERVED_0__MASK; +} +#define SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK 0x00000001 +#define SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__SHIFT) & SDMA_CFG_DMA_FIFO_CLR_DMA_FIFO_CLR__MASK; +} + +#define REG_SDMA_CFG_DMA_ARB 0x00009018 +#define SDMA_CFG_DMA_ARB_RESERVED_0__MASK 0xfffffc00 +#define SDMA_CFG_DMA_ARB_RESERVED_0__SHIFT 10 +static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_RESERVED_0__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_0__MASK; +} +#define SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK 0x00000200 +#define SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT 9 +static inline uint32_t SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__SHIFT) & SDMA_CFG_DMA_ARB_WR_ARBIT_MODEL__MASK; +} +#define SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK 0x00000100 +#define SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT 8 +static inline uint32_t SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__SHIFT) & SDMA_CFG_DMA_ARB_RD_ARBIT_MODEL__MASK; +} +#define SDMA_CFG_DMA_ARB_RESERVED_1__MASK 0x00000080 +#define SDMA_CFG_DMA_ARB_RESERVED_1__SHIFT 7 +static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_1(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_RESERVED_1__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_1__MASK; +} +#define SDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK 0x00000070 +#define SDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT 4 +static inline uint32_t SDMA_CFG_DMA_ARB_WR_FIX_ARB(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_WR_FIX_ARB__SHIFT) & SDMA_CFG_DMA_ARB_WR_FIX_ARB__MASK; +} +#define SDMA_CFG_DMA_ARB_RESERVED_2__MASK 0x00000008 +#define SDMA_CFG_DMA_ARB_RESERVED_2__SHIFT 3 +static inline uint32_t SDMA_CFG_DMA_ARB_RESERVED_2(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_RESERVED_2__SHIFT) & SDMA_CFG_DMA_ARB_RESERVED_2__MASK; +} +#define SDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK 0x00000007 +#define SDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_ARB_RD_FIX_ARB(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_ARB_RD_FIX_ARB__SHIFT) & SDMA_CFG_DMA_ARB_RD_FIX_ARB__MASK; +} + +#define REG_SDMA_CFG_DMA_RD_QOS 0x00009020 +#define SDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK 0xfffffc00 +#define SDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT 10 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RESERVED_0__SHIFT) & SDMA_CFG_DMA_RD_QOS_RESERVED_0__MASK; +} +#define SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK 0x00000300 +#define SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT 8 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_PC_QOS(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_PC_QOS__MASK; +} +#define SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK 0x000000c0 +#define SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT 6 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_PPU_QOS__MASK; +} +#define SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK 0x00000030 +#define SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT 4 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_DPU_QOS__MASK; +} +#define SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK 0x0000000c +#define SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT 2 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_KERNEL_QOS__MASK; +} +#define SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK 0x00000003 +#define SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__SHIFT) & SDMA_CFG_DMA_RD_QOS_RD_FEATURE_QOS__MASK; +} + +#define REG_SDMA_CFG_DMA_RD_CFG 0x00009024 +#define SDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK 0xffffe000 +#define SDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT 13 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RESERVED_0__SHIFT) & SDMA_CFG_DMA_RD_CFG_RESERVED_0__MASK; +} +#define SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK 0x00001000 +#define SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT 12 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARLOCK(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARLOCK__MASK; +} +#define SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK 0x00000f00 +#define SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT 8 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARCACHE(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARCACHE__MASK; +} +#define SDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK 0x000000e0 +#define SDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT 5 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARPROT(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARPROT__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARPROT__MASK; +} +#define SDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK 0x00000018 +#define SDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT 3 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARBURST(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARBURST__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARBURST__MASK; +} +#define SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK 0x00000007 +#define SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_RD_CFG_RD_ARSIZE(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__SHIFT) & SDMA_CFG_DMA_RD_CFG_RD_ARSIZE__MASK; +} + +#define REG_SDMA_CFG_DMA_WR_CFG 0x00009028 +#define SDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK 0xffffe000 +#define SDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT 13 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_RESERVED_0__SHIFT) & SDMA_CFG_DMA_WR_CFG_RESERVED_0__MASK; +} +#define SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK 0x00001000 +#define SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT 12 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWLOCK(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWLOCK__MASK; +} +#define SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK 0x00000f00 +#define SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT 8 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWCACHE(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWCACHE__MASK; +} +#define SDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK 0x000000e0 +#define SDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT 5 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWPROT(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWPROT__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWPROT__MASK; +} +#define SDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK 0x00000018 +#define SDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT 3 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWBURST(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWBURST__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWBURST__MASK; +} +#define SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK 0x00000007 +#define SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_WR_CFG_WR_AWSIZE(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__SHIFT) & SDMA_CFG_DMA_WR_CFG_WR_AWSIZE__MASK; +} + +#define REG_SDMA_CFG_DMA_WSTRB 0x0000902c +#define SDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK 0xffffffff +#define SDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT 0 +static inline uint32_t SDMA_CFG_DMA_WSTRB_WR_WSTRB(uint32_t val) +{ + return ((val) << SDMA_CFG_DMA_WSTRB_WR_WSTRB__SHIFT) & SDMA_CFG_DMA_WSTRB_WR_WSTRB__MASK; +} + +#define REG_SDMA_CFG_STATUS 0x00009030 +#define SDMA_CFG_STATUS_RESERVED_0__MASK 0xfffffe00 +#define SDMA_CFG_STATUS_RESERVED_0__SHIFT 9 +static inline uint32_t SDMA_CFG_STATUS_RESERVED_0(uint32_t val) +{ + return ((val) << SDMA_CFG_STATUS_RESERVED_0__SHIFT) & SDMA_CFG_STATUS_RESERVED_0__MASK; +} +#define SDMA_CFG_STATUS_IDEL__MASK 0x00000100 +#define SDMA_CFG_STATUS_IDEL__SHIFT 8 +static inline uint32_t SDMA_CFG_STATUS_IDEL(uint32_t val) +{ + return ((val) << SDMA_CFG_STATUS_IDEL__SHIFT) & SDMA_CFG_STATUS_IDEL__MASK; +} +#define SDMA_CFG_STATUS_RESERVED_1__MASK 0x000000ff +#define SDMA_CFG_STATUS_RESERVED_1__SHIFT 0 +static inline uint32_t SDMA_CFG_STATUS_RESERVED_1(uint32_t val) +{ + return ((val) << SDMA_CFG_STATUS_RESERVED_1__SHIFT) & SDMA_CFG_STATUS_RESERVED_1__MASK; +} + +#define REG_GLOBAL_OPERATION_ENABLE 0x0000f008 +#define GLOBAL_OPERATION_ENABLE_RESERVED_0__MASK 0xffffff80 +#define GLOBAL_OPERATION_ENABLE_RESERVED_0__SHIFT 7 +static inline uint32_t GLOBAL_OPERATION_ENABLE_RESERVED_0(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_RESERVED_0__SHIFT) & GLOBAL_OPERATION_ENABLE_RESERVED_0__MASK; +} +#define GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__MASK 0x00000040 +#define GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__SHIFT 6 +static inline uint32_t GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_PPU_RDMA_OP_EN__MASK; +} +#define GLOBAL_OPERATION_ENABLE_PPU_OP_EN__MASK 0x00000020 +#define GLOBAL_OPERATION_ENABLE_PPU_OP_EN__SHIFT 5 +static inline uint32_t GLOBAL_OPERATION_ENABLE_PPU_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_PPU_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_PPU_OP_EN__MASK; +} +#define GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__MASK 0x00000010 +#define GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__SHIFT 4 +static inline uint32_t GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_DPU_RDMA_OP_EN__MASK; +} +#define GLOBAL_OPERATION_ENABLE_DPU_OP_EN__MASK 0x00000008 +#define GLOBAL_OPERATION_ENABLE_DPU_OP_EN__SHIFT 3 +static inline uint32_t GLOBAL_OPERATION_ENABLE_DPU_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_DPU_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_DPU_OP_EN__MASK; +} +#define GLOBAL_OPERATION_ENABLE_CORE_OP_EN__MASK 0x00000004 +#define GLOBAL_OPERATION_ENABLE_CORE_OP_EN__SHIFT 2 +static inline uint32_t GLOBAL_OPERATION_ENABLE_CORE_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_CORE_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_CORE_OP_EN__MASK; +} +#define GLOBAL_OPERATION_ENABLE_RESERVED_1__MASK 0x00000002 +#define GLOBAL_OPERATION_ENABLE_RESERVED_1__SHIFT 1 +static inline uint32_t GLOBAL_OPERATION_ENABLE_RESERVED_1(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_RESERVED_1__SHIFT) & GLOBAL_OPERATION_ENABLE_RESERVED_1__MASK; +} +#define GLOBAL_OPERATION_ENABLE_CNA_OP_EN__MASK 0x00000001 +#define GLOBAL_OPERATION_ENABLE_CNA_OP_EN__SHIFT 0 +static inline uint32_t GLOBAL_OPERATION_ENABLE_CNA_OP_EN(uint32_t val) +{ + return ((val) << GLOBAL_OPERATION_ENABLE_CNA_OP_EN__SHIFT) & GLOBAL_OPERATION_ENABLE_CNA_OP_EN__MASK; +} + +#endif /* __ROCKET_REGISTERS_XML__ */ |
