diff options
Diffstat (limited to 'drivers/accel/ivpu')
45 files changed, 5415 insertions, 3292 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 682c53245286..9e055b5ce03d 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU select FW_LOADER select DRM_GEM_SHMEM_HELPER select GENERIC_ALLOCATOR + select WANT_DEV_COREDUMP help Choose this option if you have a system with an 14th generation Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) @@ -15,3 +16,12 @@ config DRM_ACCEL_IVPU and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. + +config DRM_ACCEL_IVPU_DEBUG + bool "Intel NPU debug mode" + depends on DRM_ACCEL_IVPU + help + Choose this option to enable additional + debug features for the Intel NPU driver: + - Always print debug messages regardless of dyndbg config, + - Enable unsafe module params. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 95ff7ad16338..1029e0bab061 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -1,20 +1,29 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation intel_vpu-y := \ ivpu_drv.o \ ivpu_fw.o \ ivpu_fw_log.o \ ivpu_gem.o \ - ivpu_hw_37xx.o \ - ivpu_hw_40xx.o \ + ivpu_hw.o \ + ivpu_hw_btrs.o \ + ivpu_hw_ip.o \ ivpu_ipc.o \ ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o \ - ivpu_pm.o + ivpu_ms.o \ + ivpu_pm.o \ + ivpu_sysfs.o \ + ivpu_trace_points.o intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o +intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o + +subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG + +CFLAGS_ivpu_trace_points.o = -I$(src) diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c new file mode 100644 index 000000000000..16ad0c30818c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> + +#include "ivpu_coredump.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "vpu_boot_api.h" + +#define CRASH_DUMP_HEADER "Intel NPU crash dump" +#define CRASH_DUMP_HEADERS_SIZE SZ_4K + +void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_print_iterator pi = {}; + struct drm_printer p; + size_t coredump_size; + char *coredump; + + coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + + ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); + coredump = vmalloc(coredump_size); + if (!coredump) + return; + + pi.data = coredump; + pi.remain = coredump_size; + p = drm_coredump_printer(&pi); + + drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); + drm_printf(&p, "FW version: %s\n", vdev->fw->version); + ivpu_fw_log_print(vdev, false, &p); + + dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); +} diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h new file mode 100644 index 000000000000..8efb09d02441 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_COREDUMP_H__ +#define __IVPU_COREDUMP_H__ + +#include <drm/drm_print.h> + +#include "ivpu_drv.h" +#include "ivpu_fw_log.h" + +#ifdef CONFIG_DEV_COREDUMP +void ivpu_dev_coredump(struct ivpu_device *vdev); +#else +static inline void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_printer p = drm_info_printer(vdev->drm.dev); + + ivpu_fw_log_print(vdev, false, &p); +} +#endif + +#endif /* __IVPU_COREDUMP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 7cb962e21453..8180b95ed69d 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ +#include <linux/debugfs.h> + #include <drm/drm_debugfs.h> #include <drm/drm_file.h> #include <drm/drm_print.h> @@ -43,6 +45,14 @@ static int fw_name_show(struct seq_file *s, void *v) return 0; } +static int fw_version_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", vdev->fw->version); + return 0; +} + static int fw_trace_capability_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); @@ -106,41 +116,66 @@ static int reset_pending_show(struct seq_file *s, void *v) return 0; } +static int firewall_irq_counter_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter)); + return 0; +} + static const struct drm_debugfs_info vdev_debugfs_list[] = { {"bo_list", bo_list_show, 0}, {"fw_name", fw_name_show, 0}, + {"fw_version", fw_version_show, 0}, {"fw_trace_capability", fw_trace_capability_show, 0}, {"fw_trace_config", fw_trace_config_show, 0}, {"last_bootmode", last_bootmode_show, 0}, {"reset_counter", reset_counter_show, 0}, {"reset_pending", reset_pending_show, 0}, + {"firewall_irq_counter", firewall_irq_counter_show, 0}, }; +static int dvfs_mode_get(void *data, u64 *dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + *dvfs_mode = vdev->fw->dvfs_mode; + return 0; +} + +static int dvfs_mode_set(void *data, u64 dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + vdev->fw->dvfs_mode = (u32)dvfs_mode; + return pci_try_reset_function(to_pci_dev(vdev->drm.dev)); +} + +DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n"); + static ssize_t -dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; - struct ivpu_fw_info *fw = vdev->fw; - u32 dvfs_mode; + char buffer[VPU_DYNDBG_CMD_MAX_LEN] = {}; int ret; - ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode); - if (ret < 0) - return ret; - - fw->dvfs_mode = dvfs_mode; + if (size >= VPU_DYNDBG_CMD_MAX_LEN) + return -EINVAL; - ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret) + ret = strncpy_from_user(buffer, user_buf, size); + if (ret < 0) return ret; + ivpu_jsm_dyndbg_control(vdev, buffer, size); return size; } -static const struct file_operations dvfs_mode_fops = { +static const struct file_operations fw_dyndbg_fops = { .owner = THIS_MODULE, .open = simple_open, - .write = dvfs_mode_fops_write, + .write = fw_dyndbg_fops_write, }; static int fw_log_show(struct seq_file *s, void *v) @@ -166,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l if (!size) return -EINVAL; - ivpu_fw_log_clear(vdev); + ivpu_fw_log_mark_read(vdev); return size; } @@ -287,22 +322,6 @@ static const struct file_operations fw_trace_level_fops = { }; static ssize_t -ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) -{ - struct ivpu_device *vdev = file->private_data; - - if (!size) - return -EINVAL; - - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) - return -ENODEV; - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY)) - return -ENODEV; - - return size; -} - -static ssize_t ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; @@ -327,11 +346,56 @@ static const struct file_operations ivpu_force_recovery_fops = { .write = ivpu_force_recovery_fn, }; -static const struct file_operations ivpu_reset_engine_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = ivpu_reset_engine_fn, -}; +static int ivpu_reset_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_reset_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n"); + +static int ivpu_resume_engine_fn(void *data, u64 val) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; + + return ivpu_jsm_hws_resume_engine(vdev, (u32)val); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n"); + +static int dct_active_get(void *data, u64 *active_percent) +{ + struct ivpu_device *vdev = data; + + *active_percent = vdev->pm->dct_active_percent; + + return 0; +} + +static int dct_active_set(void *data, u64 active_percent) +{ + struct ivpu_device *vdev = data; + int ret; + + if (active_percent > 100) + return -EINVAL; + + ret = ivpu_rpm_get(vdev); + if (ret) + return ret; + + if (active_percent) + ret = ivpu_pm_dct_enable(vdev, active_percent); + else + ret = ivpu_pm_dct_disable(vdev); + + ivpu_rpm_put(vdev); + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_dct_fops, dct_active_get, dct_active_set, "%llu\n"); void ivpu_debugfs_init(struct ivpu_device *vdev) { @@ -342,9 +406,11 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); - debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev, + debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev, &dvfs_mode_fops); + debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev, + &fw_dyndbg_fops); debugfs_create_file("fw_log", 0644, debugfs_root, vdev, &fw_log_fops); debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev, @@ -356,8 +422,12 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, &ivpu_reset_engine_fops); + debugfs_create_file("resume_engine", 0200, debugfs_root, vdev, + &ivpu_resume_engine_fops); - if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) { debugfs_create_file("fw_profiling_freq_drive", 0200, debugfs_root, vdev, &fw_profiling_freq_fops); + debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops); + } } diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 4b0640226986..38cf1c342c72 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/pm_runtime.h> +#include <generated/utsrelease.h> #include <drm/drm_accel.h> #include <drm/drm_file.h> @@ -14,7 +15,7 @@ #include <drm/drm_ioctl.h> #include <drm/drm_prime.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" @@ -26,11 +27,13 @@ #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_sysfs.h" +#include "vpu_boot_api.h" #ifndef DRIVER_VERSION_STR -#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ - __stringify(DRM_IVPU_DRIVER_MINOR) "." +#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE #endif static struct lock_class_key submitted_jobs_xa_lock_class_key; @@ -40,21 +43,31 @@ module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); +#endif u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); -MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set NPU frequency"); u8 ivpu_pll_max_ratio = U8_MAX; module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); -MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency"); + +int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO; +module_param_named(sched_mode, ivpu_sched_mode, int, 0444); +MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler"); bool ivpu_disable_mmu_cont_pages; -module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644); +module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444); MODULE_PARM_DESC(disable_mmu_cont_pages, "Disable MMU contiguous pages optimization"); +bool ivpu_force_snoop; +module_param_named(force_snoop, ivpu_force_snoop, bool, 0444); +MODULE_PARM_DESC(force_snoop, "Force snooping for NPU host memory access"); + struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; @@ -74,9 +87,8 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id); ivpu_cmdq_release_all_locked(file_priv); - ivpu_jsm_context_release(vdev, file_priv->ctx.id); ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + ivpu_mmu_context_fini(vdev, &file_priv->ctx); file_priv->bound = false; drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); } @@ -94,9 +106,12 @@ static void file_priv_release(struct kref *ref) pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&vdev->context_list_lock); file_priv_unbind(vdev, file_priv); + drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa)); + xa_destroy(&file_priv->cmdq_xa); mutex_unlock(&vdev->context_list_lock); pm_runtime_put_autosuspend(vdev->drm.dev); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -106,8 +121,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; - drm_WARN_ON(&vdev->drm, !file_priv); - ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -119,7 +132,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param { switch (args->index) { case DRM_IVPU_CAP_METRIC_STREAMER: - args->value = 0; + args->value = 1; break; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: args->value = 1; @@ -131,22 +144,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param return 0; } -static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate) -{ - int ret; - - ret = ivpu_rpm_get_if_active(vdev); - if (ret < 0) - return ret; - - *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0; - - if (ret) - ivpu_rpm_put(vdev); - - return 0; -} - static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -170,7 +167,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - ret = ivpu_get_core_clock_rate(vdev, &args->value); + args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -244,10 +241,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_dev_exit; } + INIT_LIST_HEAD(&file_priv->ms_instance_list); + file_priv->vdev = vdev; file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_init(&file_priv->ms_lock); mutex_lock(&vdev->context_list_lock); @@ -258,9 +258,14 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_unlock; } - ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); - if (ret) - goto err_xa_erase; + ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id); + + file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK; + + xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1); + file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID; + file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID; mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx); @@ -272,10 +277,9 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) return 0; -err_xa_erase: - xa_erase_irq(&vdev->context_xa, ctx_id); err_unlock: mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); err_dev_exit: @@ -291,6 +295,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", file_priv->ctx.id, current->comm, task_pid_nr(current)); + ivpu_ms_cleanup(file_priv); ivpu_file_priv_put(&file_priv); } @@ -301,6 +306,10 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -317,7 +326,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); while (1) { - ivpu_ipc_irq_handler(vdev, NULL); + ivpu_ipc_irq_handler(vdev); ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) break; @@ -328,13 +337,28 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) ivpu_ipc_consumer_del(vdev, &cons); if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) { - ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n", + ivpu_err(vdev, "Invalid NPU ready message: 0x%x\n", ipc_hdr.data_addr); return -EIO; } if (!ret) - ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); + ivpu_dbg(vdev, PM, "NPU ready message received successfully\n"); + + return ret; +} + +static int ivpu_hw_sched_init(struct ivpu_device *vdev) +{ + int ret = 0; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_setup_priority_bands(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret); + return ret; + } + } return ret; } @@ -362,17 +386,35 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); - ivpu_hw_diagnose_failure(vdev); - ivpu_mmu_evtq_dump(vdev); - ivpu_fw_log_dump(vdev); - return ret; + goto err_diagnose_failure; } ivpu_hw_irq_clear(vdev); enable_irq(vdev->irq); ivpu_hw_irq_enable(vdev); ivpu_ipc_enable(vdev); + + if (ivpu_fw_is_cold_boot(vdev)) { + ret = ivpu_pm_dct_init(vdev); + if (ret) + goto err_disable_ipc; + + ret = ivpu_hw_sched_init(vdev); + if (ret) + goto err_disable_ipc; + } + return 0; + +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); +err_diagnose_failure: + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); + ivpu_dev_coredump(vdev); + return ret; } void ivpu_prepare_for_reset(struct ivpu_device *vdev) @@ -387,12 +429,15 @@ int ivpu_shutdown(struct ivpu_device *vdev) { int ret; - ivpu_prepare_for_reset(vdev); + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); ret = ivpu_hw_power_down(vdev); if (ret) ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + return ret; } @@ -416,16 +461,56 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, - .major = DRM_IVPU_DRIVER_MAJOR, - .minor = DRM_IVPU_DRIVER_MINOR, + + .major = 1, }; +static void ivpu_context_abort_invalid(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) + continue; + + mutex_lock(&file_priv->lock); + ivpu_context_abort_locked(file_priv); + file_priv->aborted = true; + mutex_unlock(&file_priv->lock); + } + + mutex_unlock(&vdev->context_list_lock); +} + static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) { struct ivpu_device *vdev = arg; + u8 irq_src; - return ivpu_ipc_irq_thread_handler(vdev); + if (kfifo_is_empty(&vdev->hw->irq.fifo)) + return IRQ_NONE; + + while (kfifo_get(&vdev->hw->irq.fifo, &irq_src)) { + switch (irq_src) { + case IVPU_HW_IRQ_SRC_IPC: + ivpu_ipc_irq_thread_handler(vdev); + break; + case IVPU_HW_IRQ_SRC_MMU_EVTQ: + ivpu_context_abort_invalid(vdev); + break; + case IVPU_HW_IRQ_SRC_DCT: + ivpu_pm_dct_irq_thread_handler(vdev); + break; + default: + ivpu_err_ratelimited(vdev, "Unknown IRQ source: %u\n", irq_src); + break; + } + } + + return IRQ_HANDLED; } static int ivpu_irq_init(struct ivpu_device *vdev) @@ -439,9 +524,11 @@ static int ivpu_irq_init(struct ivpu_device *vdev) return ret; } + ivpu_irq_handlers_init(vdev); + vdev->irq = pci_irq_vector(pdev, 0); - ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, ivpu_hw_irq_handler, ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev); if (ret) ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); @@ -518,23 +605,24 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->pm) return -ENOMEM; - if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) { - vdev->hw->ops = &ivpu_hw_40xx_ops; + if (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX) vdev->hw->dma_bits = 48; - } else { - vdev->hw->ops = &ivpu_hw_37xx_ops; + else vdev->hw->dma_bits = 38; - } vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); - xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB; + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy; @@ -552,18 +640,16 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_xa_destroy; /* Init basic HW info based on buttress registers which are accessible before power up */ - ret = ivpu_hw_info_init(vdev); + ret = ivpu_hw_init(vdev); if (ret) goto err_xa_destroy; /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); if (ret) - goto err_power_down; + goto err_shutdown; - ret = ivpu_mmu_global_context_init(vdev); - if (ret) - goto err_power_down; + ivpu_mmu_global_context_init(vdev); ret = ivpu_mmu_init(vdev); if (ret) @@ -600,11 +686,10 @@ err_mmu_rctx_fini: ivpu_mmu_reserved_context_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); -err_power_down: - ivpu_hw_power_down(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); +err_shutdown: + ivpu_shutdown(vdev); err_xa_destroy: + xa_destroy(&vdev->db_xa); xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); return ret; @@ -625,14 +710,14 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { + ivpu_jobs_abort_all(vdev); + ivpu_pm_cancel_recovery(vdev); ivpu_pm_disable(vdev); + ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); - ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); ivpu_job_done_consumer_fini(vdev); - ivpu_pm_cancel_recovery(vdev); ivpu_bo_unbind_all_user_contexts(vdev); ivpu_ipc_fini(vdev); @@ -640,6 +725,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_mmu_reserved_context_fini(vdev); ivpu_mmu_global_context_fini(vdev); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->db_xa)); + xa_destroy(&vdev->db_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); xa_destroy(&vdev->submitted_jobs_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); @@ -650,6 +737,7 @@ static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) }, { } }; MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); @@ -670,6 +758,7 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; ivpu_debugfs_init(vdev); + ivpu_sysfs_init(vdev); ret = drm_dev_register(&vdev->drm, 0); if (ret) { diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 069ace4adb2d..3fdff3f6cffd 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_DRV_H__ @@ -21,14 +21,21 @@ #define DRIVER_NAME "intel_vpu" #define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" -#define DRIVER_DATE "20230117" -#define PCI_DEVICE_ID_MTL 0x7d1d -#define PCI_DEVICE_ID_ARL 0xad1d -#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d +#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_PTL_P 0xb03e -#define IVPU_HW_37XX 37 -#define IVPU_HW_40XX 40 +#define IVPU_HW_IP_37XX 37 +#define IVPU_HW_IP_40XX 40 +#define IVPU_HW_IP_50XX 50 +#define IVPU_HW_IP_60XX 60 + +#define IVPU_HW_IP_REV_LNL_B0 4 + +#define IVPU_HW_BTRS_MTL 1 +#define IVPU_HW_BTRS_LNL 2 #define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 /* SSID 1 is used by the VPU to represent reserved context */ @@ -36,13 +43,25 @@ #define IVPU_USER_CONTEXT_MIN_SSID 2 #define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) -#define IVPU_NUM_ENGINES 2 +#define IVPU_MIN_DB 1 +#define IVPU_MAX_DB 255 + +#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) +#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) + +#define IVPU_NUM_PRIORITIES 4 +#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES) + +#define IVPU_CMDQ_MIN_ID 1 +#define IVPU_CMDQ_MAX_ID 255 #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 #define IVPU_PLATFORM_INVALID 8 +#define IVPU_SCHED_MODE_AUTO -1 + #define IVPU_DBG_REG BIT(0) #define IVPU_DBG_IRQ BIT(1) #define IVPU_DBG_MMU BIT(2) @@ -87,10 +106,10 @@ struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; - bool d3hot_after_power_off; bool interrupt_clear_with_0; bool disable_clock_relinquish; bool disable_d0i3_msg; + bool wp0_during_power_up; }; struct ivpu_hw_info; @@ -119,6 +138,10 @@ struct ivpu_device { struct xarray context_xa; struct xa_limit context_xa_limit; + struct xarray db_xa; + struct xa_limit db_limit; + u32 db_next; + struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -127,13 +150,16 @@ struct ivpu_device { atomic64_t unique_id_counter; + ktime_t busy_start_ts; + ktime_t busy_time; + struct { int boot; int jsm; int tdr; - int reschedule_suspend; int autosuspend; int d0i3_entry_msg; + int state_dump_msg; } timeout; }; @@ -145,22 +171,35 @@ struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; struct mutex lock; /* Protects cmdq */ - struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; + struct xarray cmdq_xa; struct ivpu_mmu_context ctx; + struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ + struct list_head ms_instance_list; + struct ivpu_bo *ms_info_bo; + struct xa_limit job_limit; + u32 job_id_next; + struct xa_limit cmdq_limit; + u32 cmdq_id_next; bool has_mmu_faults; bool bound; + bool aborted; }; extern int ivpu_dbg_mask; extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; +extern int ivpu_sched_mode; extern bool ivpu_disable_mmu_cont_pages; +extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_FW_TEST BIT(0) #define IVPU_TEST_MODE_NULL_HW BIT(1) #define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) #define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) #define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) +#define IVPU_TEST_MODE_MIP_DISABLE BIT(6) +#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) +#define IVPU_TEST_MODE_TURBO BIT(9) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); @@ -180,16 +219,35 @@ static inline u16 ivpu_device_id(struct ivpu_device *vdev) return to_pci_dev(vdev->drm.dev)->device; } -static inline int ivpu_hw_gen(struct ivpu_device *vdev) +static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev) { switch (ivpu_device_id(vdev)) { case PCI_DEVICE_ID_MTL: case PCI_DEVICE_ID_ARL: - return IVPU_HW_37XX; + return IVPU_HW_IP_37XX; case PCI_DEVICE_ID_LNL: - return IVPU_HW_40XX; + return IVPU_HW_IP_40XX; + case PCI_DEVICE_ID_PTL_P: + return IVPU_HW_IP_50XX; default: - ivpu_err(vdev, "Unknown VPU device\n"); + dump_stack(); + ivpu_err(vdev, "Unknown NPU IP generation\n"); + return 0; + } +} + +static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev) +{ + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: + return IVPU_HW_BTRS_MTL; + case PCI_DEVICE_ID_LNL: + case PCI_DEVICE_ID_PTL_P: + return IVPU_HW_BTRS_LNL; + default: + dump_stack(); + ivpu_err(vdev, "Unknown buttress generation\n"); return 0; } } @@ -227,4 +285,9 @@ static inline bool ivpu_is_fpga(struct ivpu_device *vdev) return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; } +static inline bool ivpu_is_force_snoop_enabled(struct ivpu_device *vdev) +{ + return ivpu_force_snoop; +} + #endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5fa8bd4603d5..6037ec0b3096 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/firmware.h> @@ -25,7 +25,6 @@ #define FW_SHAVE_NN_MAX_SIZE SZ_2M #define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) #define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) -#define FW_VERSION_HEADER_SIZE SZ_4K #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) #define WATCHDOG_MSS_REDIRECT 32 @@ -44,22 +43,31 @@ #define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \ ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor) +#define IVPU_FOCUS_PRESENT_TIMER_MS 1000 + static char *ivpu_firmware; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); -MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); +MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/.."); +#endif -/* TODO: Remove mtl_vpu.bin from names after transition to generation based FW names */ static struct { int gen; const char *name; } fw_names[] = { - { IVPU_HW_37XX, "vpu_37xx.bin" }, - { IVPU_HW_37XX, "mtl_vpu.bin" }, - { IVPU_HW_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, - { IVPU_HW_40XX, "vpu_40xx.bin" }, - { IVPU_HW_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_37XX, "vpu_37xx.bin" }, + { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, + { IVPU_HW_IP_40XX, "vpu_40xx.bin" }, + { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_50XX, "vpu_50xx.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" }, }; +/* Production fw_names from the table above */ +MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin"); + static int ivpu_fw_request(struct ivpu_device *vdev) { int ret = -ENOENT; @@ -73,7 +81,7 @@ static int ivpu_fw_request(struct ivpu_device *vdev) } for (i = 0; i < ARRAY_SIZE(fw_names); i++) { - if (fw_names[i].gen != ivpu_hw_gen(vdev)) + if (fw_names[i].gen != ivpu_hw_ip_gen(vdev)) continue; ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i].name, vdev->drm.dev); @@ -123,6 +131,23 @@ ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_hea return false; } +static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range_size) +{ + if (addr < range_start || addr + size > range_start + range_size) + return false; + + return true; +} + +static u32 +ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) +{ + if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) + return ivpu_sched_mode; + + return VPU_SCHEDULING_MODE_OS; +} + static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -179,8 +204,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", fw_hdr->header_version, fw_hdr->image_format); - ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, - (const char *)fw_hdr + VPU_FW_HEADER_SIZE); + if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) + ivpu_warn(vdev, "Missing firmware version\n"); + + ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) return -EINVAL; @@ -196,16 +223,35 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->cold_boot_entry_point = fw_hdr->entry_point; fw->entry_point = fw->cold_boot_entry_point; - fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL); + fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; fw->trace_hw_component_mask = -1; fw->dvfs_mode = 0; + fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, + fw_hdr->ro_section_size, + fw_hdr->image_load_address, + fw_hdr->image_size)) { + ivpu_err(vdev, "Invalid read-only section: start address 0x%llx, size %u\n", + fw_hdr->ro_section_start_address, fw_hdr->ro_section_size); + return -EINVAL; + } + + fw->read_only_addr = fw_hdr->ro_section_start_address; + fw->read_only_size = fw_hdr->ro_section_size; + ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", fw->runtime_addr, image_load_addr, fw->entry_point); + ivpu_dbg(vdev, FW_BOOT, "Read-only section: address 0x%llx, size %u\n", + fw->read_only_addr, fw->read_only_size); return 0; } @@ -243,13 +289,14 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev) return -EINVAL; } - ivpu_hw_init_range(&vdev->hw->ranges.global, start, size); + ivpu_hw_range_init(&vdev->hw->ranges.global, start, size); return 0; } static int ivpu_fw_mem_init(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; + struct ivpu_addr_range fw_range; int log_verb_size; int ret; @@ -257,37 +304,48 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (ret) return ret; - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + fw_range.start = fw->runtime_addr; + fw_range.end = fw->runtime_addr + fw->runtime_size; + fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size, + DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!fw->mem) { - ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); + ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n"); return -ENOMEM; } - fw->mem_log_crit = ivpu_bo_alloc_internal(vdev, 0, IVPU_FW_CRITICAL_BUFFER_SIZE, - DRM_IVPU_BO_CACHED); + ret = ivpu_mmu_context_set_pages_ro(vdev, &vdev->gctx, fw->read_only_addr, + fw->read_only_size); + if (ret) { + ivpu_err(vdev, "Failed to set firmware image read-only\n"); + goto err_free_fw_mem; + } + + fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); if (!fw->mem_log_crit) { - ivpu_err(vdev, "Failed to allocate critical log buffer\n"); + ivpu_err(vdev, "Failed to create critical log buffer\n"); ret = -ENOMEM; goto err_free_fw_mem; } - if (ivpu_log_level <= IVPU_FW_LOG_INFO) + if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO) log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE; else log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE; - fw->mem_log_verb = ivpu_bo_alloc_internal(vdev, 0, log_verb_size, DRM_IVPU_BO_CACHED); + fw->mem_log_verb = ivpu_bo_create_global(vdev, log_verb_size, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); if (!fw->mem_log_verb) { - ivpu_err(vdev, "Failed to allocate verbose log buffer\n"); + ivpu_err(vdev, "Failed to create verbose log buffer\n"); ret = -ENOMEM; goto err_free_log_crit; } if (fw->shave_nn_size) { - fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start, - fw->shave_nn_size, DRM_IVPU_BO_WC); + fw->mem_shave_nn = ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.shave, + fw->shave_nn_size, DRM_IVPU_BO_WC); if (!fw->mem_shave_nn) { - ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); + ivpu_err(vdev, "Failed to create shavenn buffer\n"); ret = -ENOMEM; goto err_free_log_verb; } @@ -296,11 +354,11 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) return 0; err_free_log_verb: - ivpu_bo_free_internal(fw->mem_log_verb); + ivpu_bo_free(fw->mem_log_verb); err_free_log_crit: - ivpu_bo_free_internal(fw->mem_log_crit); + ivpu_bo_free(fw->mem_log_crit); err_free_fw_mem: - ivpu_bo_free_internal(fw->mem); + ivpu_bo_free(fw->mem); return ret; } @@ -309,13 +367,13 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev) struct ivpu_fw_info *fw = vdev->fw; if (fw->mem_shave_nn) { - ivpu_bo_free_internal(fw->mem_shave_nn); + ivpu_bo_free(fw->mem_shave_nn); fw->mem_shave_nn = NULL; } - ivpu_bo_free_internal(fw->mem_log_verb); - ivpu_bo_free_internal(fw->mem_log_crit); - ivpu_bo_free_internal(fw->mem); + ivpu_bo_free(fw->mem_log_verb); + ivpu_bo_free(fw->mem_log_crit); + ivpu_bo_free(fw->mem); fw->mem_log_verb = NULL; fw->mem_log_crit = NULL; @@ -461,6 +519,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->punit_telemetry_sram_size); ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", boot_params->vpu_telemetry_enable); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_scheduling_mode = 0x%x\n", + boot_params->vpu_scheduling_mode); ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n", boot_params->dvfs_mode); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n", @@ -469,6 +529,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_residency_time_us); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -480,11 +542,14 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts); boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts; + boot_params->system_time_us = ktime_to_us(ktime_get_real()); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", boot_params->d0i3_residency_time_us); ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", boot_params->d0i3_entry_vpu_ts); + ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", + boot_params->system_time_us); boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; @@ -496,7 +561,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); + boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock @@ -519,8 +584,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2; boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; - boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; + boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + } /* Allow configuration for L2C_PAGE_TABLE with boot param value */ boot_params->autoconfig = 1; @@ -553,15 +620,19 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr; boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb); - boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); - boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); - boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); + boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev); + boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev); + boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev); + boot_params->vpu_scheduling_mode = vdev->fw->sched_mode; + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS; boot_params->dvfs_mode = vdev->fw->dvfs_mode; if (!IVPU_WA(disable_d0i3_msg)) boot_params->d0i3_delayed_entry = 1; boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; + boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ ivpu_fw_boot_params_print(vdev, boot_params); diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 66b60fa161b5..1d0b2bd9d65c 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -1,11 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ +#include "vpu_jsm_api.h" + +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_VERSION_STR_SIZE SZ_256 + struct ivpu_device; struct ivpu_bo; struct vpu_boot_params; @@ -13,6 +18,7 @@ struct vpu_boot_params; struct ivpu_fw_info { const struct firmware *file; const char *name; + char version[FW_VERSION_STR_SIZE]; struct ivpu_bo *mem; struct ivpu_bo *mem_shave_nn; struct ivpu_bo *mem_log_crit; @@ -28,6 +34,11 @@ struct ivpu_fw_info { u32 trace_destination_mask; u64 trace_hw_component_mask; u32 dvfs_mode; + u32 primary_preempt_buf_size; + u32 secondary_preempt_buf_size; + u64 read_only_addr; + u32 read_only_size; + u32 sched_mode; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c index f6770f5e82a2..337c906b0210 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.c +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/ctype.h> @@ -15,19 +15,19 @@ #include "ivpu_fw_log.h" #include "ivpu_gem.h" -#define IVPU_FW_LOG_LINE_LENGTH 256 +#define IVPU_FW_LOG_LINE_LENGTH 256 -unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR; -module_param(ivpu_log_level, uint, 0444); -MODULE_PARM_DESC(ivpu_log_level, - "VPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG) +unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR; +module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444); +MODULE_PARM_DESC(fw_log_level, + "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG) " info=" __stringify(IVPU_FW_LOG_INFO) " warn=" __stringify(IVPU_FW_LOG_WARN) " error=" __stringify(IVPU_FW_LOG_ERROR) " fatal=" __stringify(IVPU_FW_LOG_FATAL)); -static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, - struct vpu_tracing_buffer_header **log_header) +static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, + struct vpu_tracing_buffer_header **out_log) { struct vpu_tracing_buffer_header *log; @@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return -EINVAL; } - *log_header = log; + *out_log = log; *offset += log->size; ivpu_dbg(vdev, FW_BOOT, @@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return 0; } -static void buffer_print(char *buffer, u32 size, struct drm_printer *p) +static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) { char line[IVPU_FW_LOG_LINE_LENGTH]; u32 index = 0; @@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p) } line[index] = 0; if (index != 0) - drm_printf(p, "%s\n", line); + drm_printf(p, "%s", line); } -static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log, - const char *prefix, bool only_new_msgs, struct drm_printer *p) +static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix, + bool only_new_msgs, struct drm_printer *p) { - char *log_buffer = (void *)log + log->header_size; - u32 log_size = log->size - log->header_size; - u32 log_start = log->read_index; - u32 log_end = log->write_index; - - if (!(log->write_index || log->wrap_count) || - (log->write_index == log->read_index && only_new_msgs)) { - drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); - return; + char *log_data = (void *)log + log->header_size; + u32 data_size = log->size - log->header_size; + u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0; + u32 log_end = READ_ONCE(log->write_index); + + if (log->wrap_count == log->read_wrap_count) { + if (log_end <= log_start) { + drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); + return; + } + } else if (log->wrap_count == log->read_wrap_count + 1) { + if (log_end > log_start) + log_start = log_end; + } else { + log_start = log_end; } drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name); - if (log->write_index > log->read_index) { - buffer_print(log_buffer + log_start, log_end - log_start, p); + if (log_end > log_start) { + fw_log_print_lines(log_data + log_start, log_end - log_start, p); } else { - buffer_print(log_buffer + log_end, log_size - log_end, p); - buffer_print(log_buffer, log_end, p); + fw_log_print_lines(log_data + log_start, data_size - log_start, p); + fw_log_print_lines(log_data, log_end, p); } - drm_printf(p, "\x1b[0m"); + drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */ drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name); } -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +static void +fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name, + struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p) { - struct vpu_tracing_buffer_header *log_header; + struct vpu_tracing_buffer_header *log; u32 next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "VPU critical", only_new_msgs, p); + while (fw_log_from_bo(vdev, bo, &next, &log) == 0) + fw_log_print_buffer(log, name, only_new_msgs, p); +} + +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +{ + fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p); + fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p); +} + +void ivpu_fw_log_mark_read(struct ivpu_device *vdev) +{ + struct vpu_tracing_buffer_header *log; + u32 next; + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "VPU verbose", only_new_msgs, p); + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } } -void ivpu_fw_log_clear(struct ivpu_device *vdev) +void ivpu_fw_log_reset(struct ivpu_device *vdev) { - struct vpu_tracing_buffer_header *log_header; - u32 next = 0; + struct vpu_tracing_buffer_header *log; + u32 next; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } } diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h index 0b2573f6f315..8bb528a73cb7 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.h +++ b/drivers/accel/ivpu/ivpu_fw_log.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_LOG_H__ @@ -8,8 +8,6 @@ #include <linux/types.h> -#include <drm/drm_print.h> - #include "ivpu_drv.h" #define IVPU_FW_LOG_DEFAULT 0 @@ -19,20 +17,15 @@ #define IVPU_FW_LOG_ERROR 4 #define IVPU_FW_LOG_FATAL 5 -extern unsigned int ivpu_log_level; - #define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M #define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M #define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); -void ivpu_fw_log_clear(struct ivpu_device *vdev); +extern unsigned int ivpu_fw_log_level; -static inline void ivpu_fw_log_dump(struct ivpu_device *vdev) -{ - struct drm_printer p = drm_info_printer(vdev->drm.dev); +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); +void ivpu_fw_log_mark_read(struct ivpu_device *vdev); +void ivpu_fw_log_reset(struct ivpu_device *vdev); - ivpu_fw_log_print(vdev, false, &p); -} #endif /* __IVPU_FW_LOG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index e9ddbe9f50eb..16178054e629 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -172,8 +172,7 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz return &bo->base.base; } -static struct ivpu_bo * -ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) +static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags) { struct drm_gem_shmem_object *shmem; struct ivpu_bo *bo; @@ -201,7 +200,7 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) return bo; } -static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) +static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; @@ -224,7 +223,7 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range); } -static void ivpu_bo_free(struct drm_gem_object *obj) +static void ivpu_gem_bo_free(struct drm_gem_object *obj) { struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); @@ -245,8 +244,8 @@ static void ivpu_bo_free(struct drm_gem_object *obj) } static const struct drm_gem_object_funcs ivpu_gem_funcs = { - .free = ivpu_bo_free, - .open = ivpu_bo_open, + .free = ivpu_gem_bo_free, + .open = ivpu_gem_bo_open, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, @@ -272,9 +271,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (size == 0) return -EINVAL; - bo = ivpu_bo_create(vdev, size, args->flags); + bo = ivpu_bo_alloc(vdev, size, args->flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", + ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)", bo, file_priv->ctx.id, args->size, args->flags); return PTR_ERR(bo); } @@ -289,33 +288,28 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } struct ivpu_bo * -ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags) +ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags) { - const struct ivpu_addr_range *range; - struct ivpu_addr_range fixed_range; struct iosys_map map; struct ivpu_bo *bo; int ret; - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); - drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); + if (drm_WARN_ON(&vdev->drm, !range)) + return NULL; - if (vpu_addr) { - fixed_range.start = vpu_addr; - fixed_range.end = vpu_addr + size; - range = &fixed_range; - } else { - range = &vdev->hw->ranges.global; - } + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->start)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); - bo = ivpu_bo_create(vdev, size, flags); + bo = ivpu_bo_alloc(vdev, size, flags); if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", - bo, vpu_addr, size, flags); + ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", + bo, range->start, size, flags); return NULL; } - ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range); + ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range); if (ret) goto err_put; @@ -323,11 +317,14 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla if (ret) goto err_put; - dma_resv_lock(bo->base.base.resv, NULL); - ret = drm_gem_shmem_vmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); - if (ret) - goto err_put; + if (flags & DRM_IVPU_BO_MAPPABLE) { + dma_resv_lock(bo->base.base.resv, NULL); + ret = drm_gem_shmem_vmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + + if (ret) + goto err_put; + } return bo; @@ -336,13 +333,20 @@ err_put: return NULL; } -void ivpu_bo_free_internal(struct ivpu_bo *bo) +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags) +{ + return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags); +} + +void ivpu_bo_free(struct ivpu_bo *bo) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); - dma_resv_lock(bo->base.base.resv, NULL); - drm_gem_shmem_vunmap(&bo->base, &map); - dma_resv_unlock(bo->base.base.resv); + if (bo->flags & DRM_IVPU_BO_MAPPABLE) { + dma_resv_lock(bo->base.base.resv, NULL); + drm_gem_shmem_vunmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + } drm_gem_object_put(&bo->base.base); } @@ -380,6 +384,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */ + timeout += 1; + obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -EINVAL; @@ -402,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) mutex_lock(&bo->lock); drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", - bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size, + bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size, bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index a8559211c70d..d975000abd78 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -28,8 +28,10 @@ int ivpu_bo_pin(struct ivpu_bo *bo); void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); -struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); -void ivpu_bo_free_internal(struct ivpu_bo *bo); +struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + struct ivpu_addr_range *range, u64 size, u32 flags); +struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags); +void ivpu_bo_free(struct ivpu_bo *bo); int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -58,14 +60,17 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) return bo->flags & DRM_IVPU_BO_CACHE_MASK; } -static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) +static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) { - return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; + return to_ivpu_device(bo->base.base.dev); } -static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) +static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { - return to_ivpu_device(bo->base.base.dev); + if (ivpu_is_force_snoop_enabled(ivpu_bo_to_vdev(bo))) + return true; + + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c new file mode 100644 index 000000000000..4e1054f3466e --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - 2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" + +#include <linux/dmi.h> + +static char *platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "SILICON"; + case IVPU_PLATFORM_SIMICS: + return "SIMICS"; + case IVPU_PLATFORM_FPGA: + return "FPGA"; + default: + return "Invalid platform"; + } +} + +static const struct dmi_system_id dmi_platform_simulation[] = { + { + .ident = "Intel Simics", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"), + DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_BOARD_SERIAL, "123456789"), + }, + }, + { + .ident = "Intel Simics", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "Simics"), + }, + }, + { } +}; + +static void platform_init(struct ivpu_device *vdev) +{ + if (dmi_check_system(dmi_platform_simulation)) + vdev->platform = IVPU_PLATFORM_SIMICS; + else + vdev->platform = IVPU_PLATFORM_SILICON; + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", + platform_to_str(vdev->platform), vdev->platform); +} + +static void wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.clear_runtime_mem = false; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); + + if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + vdev->wa.disable_clock_relinquish = true; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->wa.wp0_during_power_up = true; + + IVPU_PRINT_WA(punit_disabled); + IVPU_PRINT_WA(clear_runtime_mem); + IVPU_PRINT_WA(interrupt_clear_with_0); + IVPU_PRINT_WA(disable_clock_relinquish); + IVPU_PRINT_WA(wp0_during_power_up); +} + +static void timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_test_mode & IVPU_TEST_MODE_DISABLE_TIMEOUTS) { + vdev->timeout.boot = -1; + vdev->timeout.jsm = -1; + vdev->timeout.tdr = -1; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = -1; + } else if (ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 100000; + vdev->timeout.jsm = 50000; + vdev->timeout.tdr = 2000000; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 500; + vdev->timeout.state_dump_msg = 10; + } else if (ivpu_is_simics(vdev)) { + vdev->timeout.boot = 50; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 10000; + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 100; + vdev->timeout.state_dump_msg = 10; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->timeout.autosuspend = 10; + else + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 5; + vdev->timeout.state_dump_msg = 10; + } +} + +static void memory_ranges_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x88000000, 511 * SZ_1M); + ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x180000000, SZ_2G); + ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_128G); + } else { + ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); + ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000, SZ_2G); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x100000000, SZ_256G); + vdev->hw->ranges.dma = vdev->hw->ranges.user; + } +} + +static int wp_enable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, true); +} + +static int wp_disable(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_wp_drive(vdev, false); +} + +int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(wp0_during_power_up)) { + /* WP requests may fail when powering down, so issue WP 0 here */ + ret = wp_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable workpoint: %d\n", ret); + } + + ret = ivpu_hw_btrs_d0i3_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); + + ret = wp_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable workpoint: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) >= IVPU_HW_BTRS_LNL) { + if (IVPU_WA(disable_clock_relinquish)) + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + ivpu_hw_btrs_profiling_freq_reg_set_lnl(vdev); + ivpu_hw_btrs_ats_print_lnl(vdev); + } + + ret = ivpu_hw_ip_host_ss_configure(vdev); + if (ret) { + ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); + return ret; + } + + ivpu_hw_ip_idle_gen_disable(vdev); + + ret = ivpu_hw_btrs_wait_for_clock_res_own_ack(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for clock resource own ACK\n"); + return ret; + } + + ret = ivpu_hw_ip_pwr_domain_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); + return ret; + } + + ret = ivpu_hw_ip_host_ss_axi_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); + return ret; + } + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_LNL) + ivpu_hw_btrs_set_port_arbitration_weights_lnl(vdev); + + ret = ivpu_hw_ip_top_noc_enable(vdev); + if (ret) + ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); + + return ret; +} + +static void save_d0i3_entry_timestamp(struct ivpu_device *vdev) +{ + vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); + vdev->hw->d0i3_entry_vpu_ts = ivpu_hw_ip_read_perf_timer_counter(vdev); +} + +int ivpu_hw_reset(struct ivpu_device *vdev) +{ + int ret = 0; + + if (ivpu_hw_btrs_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU IP\n"); + ret = -EIO; + } + + if (wp_disable(vdev)) { + ivpu_err(vdev, "Failed to disable workpoint\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + int ret = 0; + + save_d0i3_entry_timestamp(vdev); + + if (!ivpu_hw_is_idle(vdev)) + ivpu_warn(vdev, "NPU not idle during power down\n"); + + if (ivpu_hw_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU\n"); + ret = -EIO; + } + + if (ivpu_hw_btrs_d0i3_enable(vdev)) { + ivpu_err(vdev, "Failed to enter D0I3\n"); + ret = -EIO; + } + + return ret; +} + +int ivpu_hw_init(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_info_init(vdev); + ivpu_hw_btrs_freq_ratios_init(vdev); + memory_ranges_init(vdev); + platform_init(vdev); + wa_init(vdev); + timeouts_init(vdev); + atomic_set(&vdev->hw->firewall_irq_counter, 0); + + return 0; +} + +int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_ip_snoop_disable(vdev); + ivpu_hw_ip_tbu_mmu_enable(vdev); + ret = ivpu_hw_ip_soc_cpu_boot(vdev); + if (ret) + ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret); + + return ret; +} + +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + return; + } + + if (enable) + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; + else + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; +} + +void ivpu_irq_handlers_init(struct ivpu_device *vdev) +{ + INIT_KFIFO(vdev->hw->irq.fifo); + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_37xx; + else + vdev->hw->irq.ip_irq_handler = ivpu_hw_ip_irq_handler_40xx; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_mtl; + else + vdev->hw->irq.btrs_irq_handler = ivpu_hw_btrs_irq_handler_lnl; +} + +void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + kfifo_reset(&vdev->hw->irq.fifo); + ivpu_hw_ip_irq_enable(vdev); + ivpu_hw_btrs_irq_enable(vdev); +} + +void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + ivpu_hw_btrs_irq_disable(vdev); + ivpu_hw_ip_irq_disable(vdev); +} + +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr) +{ + struct ivpu_device *vdev = ptr; + bool ip_handled, btrs_handled; + + ivpu_hw_btrs_global_int_disable(vdev); + + btrs_handled = ivpu_hw_btrs_irq_handler(vdev, irq); + if (!ivpu_hw_is_idle((vdev)) || !btrs_handled) + ip_handled = ivpu_hw_ip_irq_handler(vdev, irq); + else + ip_handled = false; + + /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ + ivpu_hw_btrs_global_int_enable(vdev); + + if (!kfifo_is_empty(&vdev->hw->irq.fifo)) + return IRQ_WAKE_THREAD; + if (ip_handled || btrs_handled) + return IRQ_HANDLED; + return IRQ_NONE; +} diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index b2909168a0a6..fc4dbfc980c8 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,38 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_HW_H__ #define __IVPU_HW_H__ +#include <linux/kfifo.h> + #include "ivpu_drv.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" -struct ivpu_hw_ops { - int (*info_init)(struct ivpu_device *vdev); - int (*power_up)(struct ivpu_device *vdev); - int (*boot_fw)(struct ivpu_device *vdev); - int (*power_down)(struct ivpu_device *vdev); - int (*reset)(struct ivpu_device *vdev); - bool (*is_idle)(struct ivpu_device *vdev); - int (*wait_for_idle)(struct ivpu_device *vdev); - void (*wdt_disable)(struct ivpu_device *vdev); - void (*diagnose_failure)(struct ivpu_device *vdev); - u32 (*profiling_freq_get)(struct ivpu_device *vdev); - void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable); - u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); - u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); - void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); - u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); - u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); - void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); - void (*irq_clear)(struct ivpu_device *vdev); - void (*irq_enable)(struct ivpu_device *vdev); - void (*irq_disable)(struct ivpu_device *vdev); - irqreturn_t (*irq_handler)(int irq, void *ptr); -}; +#define IVPU_HW_IRQ_FIFO_LENGTH 1024 + +#define IVPU_HW_IRQ_SRC_IPC 1 +#define IVPU_HW_IRQ_SRC_MMU_EVTQ 2 +#define IVPU_HW_IRQ_SRC_DCT 3 struct ivpu_addr_range { resource_size_t start; @@ -40,7 +24,11 @@ struct ivpu_addr_range { }; struct ivpu_hw_info { - const struct ivpu_hw_ops *ops; + struct { + bool (*btrs_irq_handler)(struct ivpu_device *vdev, int irq); + bool (*ip_irq_handler)(struct ivpu_device *vdev, int irq); + DECLARE_KFIFO(fifo, u8, IVPU_HW_IRQ_FIFO_LENGTH); + } irq; struct { struct ivpu_addr_range global; struct ivpu_addr_range user; @@ -63,137 +51,110 @@ struct ivpu_hw_info { int dma_bits; ktime_t d0i3_entry_host_ts; u64 d0i3_entry_vpu_ts; + atomic_t firewall_irq_counter; }; -extern const struct ivpu_hw_ops ivpu_hw_37xx_ops; -extern const struct ivpu_hw_ops ivpu_hw_40xx_ops; +int ivpu_hw_init(struct ivpu_device *vdev); +int ivpu_hw_power_up(struct ivpu_device *vdev); +int ivpu_hw_power_down(struct ivpu_device *vdev); +int ivpu_hw_reset(struct ivpu_device *vdev); +int ivpu_hw_boot_fw(struct ivpu_device *vdev); +void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable); +void ivpu_irq_handlers_init(struct ivpu_device *vdev); +void ivpu_hw_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_irq_disable(struct ivpu_device *vdev); +irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr); -static inline int ivpu_hw_info_init(struct ivpu_device *vdev) +static inline u32 ivpu_hw_btrs_irq_handler(struct ivpu_device *vdev, int irq) { - return vdev->hw->ops->info_init(vdev); -}; - -static inline int ivpu_hw_power_up(struct ivpu_device *vdev) -{ - ivpu_dbg(vdev, PM, "HW power up\n"); - - return vdev->hw->ops->power_up(vdev); -}; + return vdev->hw->irq.btrs_irq_handler(vdev, irq); +} -static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ip_irq_handler(struct ivpu_device *vdev, int irq) { - return vdev->hw->ops->boot_fw(vdev); -}; + return vdev->hw->irq.ip_irq_handler(vdev, irq); +} -static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) +static inline void ivpu_hw_range_init(struct ivpu_addr_range *range, u64 start, u64 size) { - return vdev->hw->ops->is_idle(vdev); -}; + range->start = start; + range->end = start + size; +} -static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) { - return vdev->hw->ops->wait_for_idle(vdev); -}; + return range->end - range->start; +} -static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) { - ivpu_dbg(vdev, PM, "HW power down\n"); - - return vdev->hw->ops->power_down(vdev); -}; + return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); +} -static inline int ivpu_hw_reset(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - ivpu_dbg(vdev, PM, "HW reset\n"); - - return vdev->hw->ops->reset(vdev); -}; + ivpu_hw_ip_irq_clear(vdev); +} -static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) { - vdev->hw->ops->wdt_disable(vdev); -}; + return ivpu_hw_btrs_pll_freq_get(vdev); +} static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) { - return vdev->hw->ops->profiling_freq_get(vdev); -}; - -static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - return vdev->hw->ops->profiling_freq_drive(vdev, enable); -}; - -/* Register indirect accesses */ -static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_pll_freq_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_offset_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_size_get(vdev); -}; - -static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return vdev->hw->ops->reg_telemetry_enable_get(vdev); -}; + return vdev->hw->pll.profiling_freq; +} -static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) { - vdev->hw->ops->reg_db_set(vdev, db_id); -}; + ivpu_hw_ip_diagnose_failure(vdev); + ivpu_hw_btrs_diagnose_failure(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_offset_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); -}; + return ivpu_hw_btrs_telemetry_offset_get(vdev); +} -static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +static inline u32 ivpu_hw_telemetry_size_get(struct ivpu_device *vdev) { - return vdev->hw->ops->reg_ipc_rx_count_get(vdev); -}; + return ivpu_hw_btrs_telemetry_size_get(vdev); +} -static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +static inline u32 ivpu_hw_telemetry_enable_get(struct ivpu_device *vdev) { - vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); -}; + return ivpu_hw_btrs_telemetry_enable_get(vdev); +} -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_clear(vdev); -}; + return ivpu_hw_btrs_is_idle(vdev); +} -static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) { - vdev->hw->ops->irq_enable(vdev); -}; + return ivpu_hw_btrs_wait_for_idle(vdev); +} -static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +static inline void ivpu_hw_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) { - vdev->hw->ops->irq_disable(vdev); -}; + ivpu_hw_ip_ipc_tx_set(vdev, vpu_addr); +} -static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +static inline void ivpu_hw_db_set(struct ivpu_device *vdev, u32 db_id) { - range->start = start; - range->end = start + size; + ivpu_hw_ip_db_set(vdev, db_id); } -static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +static inline u32 ivpu_hw_ipc_rx_addr_get(struct ivpu_device *vdev) { - return range->end - range->start; + return ivpu_hw_ip_ipc_rx_addr_get(vdev); } -static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +static inline u32 ivpu_hw_ipc_rx_count_get(struct ivpu_device *vdev) { - vdev->hw->ops->diagnose_failure(vdev); + return ivpu_hw_ip_ipc_rx_count_get(vdev); } #endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c deleted file mode 100644 index 89af1006df55..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ /dev/null @@ -1,1066 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#include "ivpu_drv.h" -#include "ivpu_fw.h" -#include "ivpu_hw_37xx_reg.h" -#include "ivpu_hw_reg_io.h" -#include "ivpu_hw.h" -#include "ivpu_ipc.h" -#include "ivpu_mmu.h" -#include "ivpu_pm.h" - -#define TILE_FUSE_ENABLE_BOTH 0x0 -#define TILE_SKU_BOTH_MTL 0x3630 - -/* Work point configuration values */ -#define CONFIG_1_TILE 0x01 -#define CONFIG_2_TILE 0x02 -#define PLL_RATIO_5_3 0x01 -#define PLL_RATIO_4_3 0x02 -#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) -#define WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_5_3) -#define WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_4_3) -#define WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_5_3) -#define WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_4_3) -#define WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) - -#define PLL_REF_CLK_FREQ (50 * 1000000) -#define PLL_SIMULATION_FREQ (10 * 1000000) -#define PLL_PROF_CLK_FREQ (38400 * 1000) -#define PLL_DEFAULT_EPP_VALUE 0x80 - -#define TIM_SAFE_ENABLE 0xf1d0dead -#define TIM_WATCHDOG_RESET_VALUE 0xffffffff - -#define TIMEOUT_US (150 * USEC_PER_MSEC) -#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) -#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) - -#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) - -#define ICB_1_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ - (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) - -#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) - -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) - -#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE))) - -#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) -#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) - -#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ - (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) - -static void ivpu_hw_wa_init(struct ivpu_device *vdev) -{ - vdev->wa.punit_disabled = false; - vdev->wa.clear_runtime_mem = false; - vdev->wa.d3hot_after_power_off = true; - - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); - if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { - /* Writing 1s does not clear the interrupt status register */ - vdev->wa.interrupt_clear_with_0 = true; - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); - } - - IVPU_PRINT_WA(punit_disabled); - IVPU_PRINT_WA(clear_runtime_mem); - IVPU_PRINT_WA(d3hot_after_power_off); - IVPU_PRINT_WA(interrupt_clear_with_0); -} - -static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) -{ - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = 10; - vdev->timeout.d0i3_entry_msg = 5; -} - -static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); -} - -/* Send KMD initiated workpoint change */ -static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, - u16 target_ratio, u16 config) -{ - int ret; - u32 val; - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) { - ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2); - val = REG_SET_FLD_NUM(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2, val); - - val = REGB_RD32(VPU_37XX_BUTTRESS_WP_REQ_CMD); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_WP_REQ_CMD, SEND, val); - REGB_WR32(VPU_37XX_BUTTRESS_WP_REQ_CMD, val); - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) - ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); - - return ret; -} - -static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) -{ - u32 exp_val = enable ? 0x1 : 0x0; - - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); -} - -static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) -{ - if (IVPU_WA(punit_disabled)) - return 0; - - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); -} - -static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio; - u32 fmin_fuse, fmax_fuse; - - fmin_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMIN_FUSE); - fuse_min_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); - fuse_pn_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); - - fmax_fuse = REGB_RD32(VPU_37XX_BUTTRESS_FMAX_FUSE); - fuse_max_ratio = REG_GET_FLD(VPU_37XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); - - hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); - hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); - hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); -} - -static int ivpu_hw_37xx_wait_for_vpuip_bar(struct ivpu_device *vdev) -{ - return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100); -} - -static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) -{ - struct ivpu_hw_info *hw = vdev->hw; - u16 target_ratio; - u16 config; - int ret; - - if (IVPU_WA(punit_disabled)) { - ivpu_dbg(vdev, PM, "Skipping PLL request\n"); - return 0; - } - - if (enable) { - target_ratio = hw->pll.pn_ratio; - config = hw->config; - } else { - target_ratio = 0; - config = 0; - } - - ivpu_dbg(vdev, PM, "PLL workpoint request: config 0x%04x pll ratio 0x%x\n", - config, target_ratio); - - ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); - if (ret) { - ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); - return ret; - } - - ret = ivpu_pll_wait_for_lock(vdev, enable); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL lock\n"); - return ret; - } - - if (enable) { - ret = ivpu_pll_wait_for_status_ready(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); - return ret; - } - - ret = ivpu_hw_37xx_wait_for_vpuip_bar(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for VPUIP bar\n"); - return ret; - } - } - - return 0; -} - -static int ivpu_pll_enable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, true); -} - -static int ivpu_pll_disable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, false); -} - -static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) -{ - u32 val = 0; - - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val); - - REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val); -} - -static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET); - - if (enable) { - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); - } - - REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val); -} - -static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET); - - if (enable) { - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); - val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); - } - - REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val); -} - -static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev) -{ - ivpu_boot_host_ss_rst_clr_assert(vdev); - - return ivpu_boot_noc_qreqn_check(vdev, 0x0); -} - -static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, 0x0); -} - -static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val); - - ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, true); -} - -static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); - if (enable) { - val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } else { - val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } - REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); - - ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, true); -} - -static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); -} - -static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val); -} - -static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) -{ - return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, - exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); -} - -static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val); -} - -static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE); - - if (enable) - val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - else - val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); - - REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); -} - -static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) -{ - int ret; - - ivpu_boot_pwr_island_trickle_drive(vdev, true); - ivpu_boot_pwr_island_drive(vdev, true); - - ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); - if (ret) { - ivpu_err(vdev, "Timed out waiting for power island status\n"); - return ret; - } - - ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qrenqn check %d\n", ret); - return ret; - } - - ivpu_boot_host_ss_clk_drive(vdev, true); - ivpu_boot_pwr_island_isolation_drive(vdev, false); - ivpu_boot_host_ss_rst_drive(vdev, true); - ivpu_boot_dpu_active_drive(vdev, true); - - return ret; -} - -static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); - - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); - - REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); -} - -static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV); - - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - - REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val); -} - -static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) -{ - u32 val; - - val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); - val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); - - val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - - val = vdev->fw->entry_point >> 9; - REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); - - val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); - REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); - - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); -} - -static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL); - if (enable) - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - else - val = REG_CLR_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, I3, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - - hw->tile_fuse = TILE_FUSE_ENABLE_BOTH; - hw->sku = TILE_SKU_BOTH_MTL; - hw->config = WP_CONFIG_2_TILE_4_3_RATIO; - - ivpu_pll_init_frequency_ratios(vdev); - - ivpu_hw_init_range(&hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_init_range(&hw->ranges.user, 0xc0000000, 255 * SZ_1M); - ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G); - ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G); - - vdev->platform = IVPU_PLATFORM_SILICON; - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - - return 0; -} - -static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev) -{ - int ret; - u32 val; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); - return ret; - } - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); - - return ret; -} - -static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) -{ - int ret = 0; - - if (ivpu_hw_37xx_ip_reset(vdev)) { - ivpu_err(vdev, "Failed to reset NPU\n"); - ret = -EIO; - } - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - return ret; -} - -static int ivpu_hw_37xx_d0i3_enable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, true); - if (ret) - ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); - - udelay(5); /* VPU requires 5 us to complete the transition */ - - return ret; -} - -static int ivpu_hw_37xx_d0i3_disable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_d0i3_drive(vdev, false); - if (ret) - ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) -{ - int ret; - - /* PLL requests may fail when powering down, so issue WP 0 here */ - ret = ivpu_pll_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret); - - ret = ivpu_hw_37xx_d0i3_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); - - ret = ivpu_pll_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_configure(vdev); - if (ret) { - ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); - return ret; - } - - /* - * The control circuitry for vpu_idle indication logic powers up active. - * To ensure unnecessary low power mode signal from LRT during bring up, - * KMD disables the circuitry prior to bringing up the Main Power island. - */ - ivpu_boot_vpu_idle_gen_disable(vdev); - - ret = ivpu_boot_pwr_domain_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_axi_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_top_noc_enable(vdev); - if (ret) - ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); - - return ret; -} - -static int ivpu_hw_37xx_boot_fw(struct ivpu_device *vdev) -{ - ivpu_boot_no_snoop_enable(vdev); - ivpu_boot_tbu_mmu_enable(vdev); - ivpu_boot_soc_cpu_boot(vdev); - - return 0; -} - -static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev) -{ - u32 val; - - if (IVPU_WA(punit_disabled)) - return true; - - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_STATUS); - return REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, READY, val) && - REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val); -} - -static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); -} - -static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) -{ - vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); - vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); -} - -static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev) -{ - int ret = 0; - - ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev); - - if (!ivpu_hw_37xx_is_idle(vdev)) - ivpu_warn(vdev, "VPU not idle during power down\n"); - - if (ivpu_hw_37xx_reset(vdev)) { - ivpu_err(vdev, "Failed to reset VPU\n"); - ret = -EIO; - } - - if (ivpu_hw_37xx_d0i3_enable(vdev)) { - ivpu_err(vdev, "Failed to enter D0I3\n"); - ret = -EIO; - } - - return ret; -} - -static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev) -{ - u32 val; - - /* Enable writing and set non-zero WDT value */ - REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); - - /* Enable writing and disable watchdog timer */ - REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); - - /* Now clear the timeout interrupt */ - val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); -} - -static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev) -{ - return PLL_PROF_CLK_FREQ; -} - -static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */ -} - -static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -/* Register indirect accesses */ -static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL); - pll_curr_ratio &= VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config); -} - -static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET); -} - -static u32 ivpu_hw_37xx_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE); -} - -static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE); -} - -static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id) -{ - u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); - - REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); -} - -static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev) -{ - return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM); -} - -static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev) -{ - u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); - - return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); -} - -static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) -{ - REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); -} - -static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev) -{ - REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); -} - -static void ivpu_hw_37xx_irq_enable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); - REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); - REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); -} - -static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev) -{ - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - REGB_WR32(VPU_37XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); - REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull); - REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0); -} - -static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); -} - -static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev) -{ - ivpu_hw_wdt_disable(vdev); - ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); -} - -static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); -} - -/* Handler for IRQs from VPU core (irqV) */ -static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) -{ - u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - - if (!status) - return false; - - REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) - ivpu_mmu_irq_evtq_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev, wake_thread); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) - ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) - ivpu_mmu_irq_gerr_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) - ivpu_hw_37xx_irq_wdt_mss_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) - ivpu_hw_37xx_irq_wdt_nce_handler(vdev); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) - ivpu_hw_37xx_irq_noc_firewall_handler(vdev); - - return true; -} - -/* Handler for IRQs from Buttress core (irqB) */ -static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) -{ - u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - bool schedule_recovery = false; - - if (!status) - return false; - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL)); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0)); - REGB_WR32(VPU_37XX_BUTTRESS_ATS_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { - u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - REGB_WR32(VPU_37XX_BUTTRESS_UFI_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - /* This must be done after interrupts are cleared at the source. */ - if (IVPU_WA(interrupt_clear_with_0)) - /* - * Writing 1 triggers an interrupt, so we can't perform read update write. - * Clear local interrupt status by writing 0 to all bits. - */ - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); - else - REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status); - - if (schedule_recovery) - ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); - - return true; -} - -static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr) -{ - struct ivpu_device *vdev = ptr; - bool irqv_handled, irqb_handled, wake_thread = false; - - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - - irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread); - irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq); - - /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ - REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - - if (wake_thread) - return IRQ_WAKE_THREAD; - if (irqv_handled || irqb_handled) - return IRQ_HANDLED; - return IRQ_NONE; -} - -static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev) -{ - u32 irqv = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - u32 irqb = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (ivpu_hw_37xx_reg_ipc_rx_count_get(vdev)) - ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) - ivpu_err(vdev, "WDT MSS timeout detected\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) - ivpu_err(vdev, "WDT NCE timeout detected\n"); - - if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) - ivpu_err(vdev, "NOC Firewall irq detected\n"); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) - ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_37XX_BUTTRESS_ATS_ERR_LOG_0)); - - if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) { - u32 ufi_log = REGB_RD32(VPU_37XX_BUTTRESS_UFI_ERR_LOG); - - ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", - ufi_log, REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), - REG_GET_FLD(VPU_37XX_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); - } -} - -const struct ivpu_hw_ops ivpu_hw_37xx_ops = { - .info_init = ivpu_hw_37xx_info_init, - .power_up = ivpu_hw_37xx_power_up, - .is_idle = ivpu_hw_37xx_is_idle, - .wait_for_idle = ivpu_hw_37xx_wait_for_idle, - .power_down = ivpu_hw_37xx_power_down, - .reset = ivpu_hw_37xx_reset, - .boot_fw = ivpu_hw_37xx_boot_fw, - .wdt_disable = ivpu_hw_37xx_wdt_disable, - .diagnose_failure = ivpu_hw_37xx_diagnose_failure, - .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get, - .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive, - .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get, - .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get, - .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get, - .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get, - .reg_db_set = ivpu_hw_37xx_reg_db_set, - .reg_ipc_rx_addr_get = ivpu_hw_37xx_reg_ipc_rx_addr_get, - .reg_ipc_rx_count_get = ivpu_hw_37xx_reg_ipc_rx_count_get, - .reg_ipc_tx_set = ivpu_hw_37xx_reg_ipc_tx_set, - .irq_clear = ivpu_hw_37xx_irq_clear, - .irq_enable = ivpu_hw_37xx_irq_enable, - .irq_disable = ivpu_hw_37xx_irq_disable, - .irq_handler = ivpu_hw_37xx_irq_handler, -}; diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h index f6fec1919202..cf5e2f01049c 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h @@ -8,78 +8,6 @@ #include <linux/bits.h> -#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE 0x00000000u - -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT 0x00000004u -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) - -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_WP_REQ_CMD 0x00000014u -#define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) - -#define VPU_37XX_BUTTRESS_WP_DOWNLOAD 0x00000018u -#define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_CURRENT_PLL 0x0000001cu -#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) - -#define VPU_37XX_BUTTRESS_PLL_ENABLE 0x00000020u - -#define VPU_37XX_BUTTRESS_FMIN_FUSE 0x00000024u -#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) - -#define VPU_37XX_BUTTRESS_FMAX_FUSE 0x00000028u -#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) - -#define VPU_37XX_BUTTRESS_TILE_FUSE 0x0000002cu -#define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) - -#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK 0x00000030u -#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK 0x00000034u - -#define VPU_37XX_BUTTRESS_PLL_STATUS 0x00000040u -#define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) - -#define VPU_37XX_BUTTRESS_VPU_STATUS 0x00000044u -#define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) - -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) - -#define VPU_37XX_BUTTRESS_VPU_IP_RESET 0x00000050u -#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) - -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u - -#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u -#define VPU_37XX_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u -#define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u - -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG 0x000000b0u -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) - -#define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u - #define VPU_37XX_HOST_SS_CPR_CLK_SET 0x00000084u #define VPU_37XX_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) #define VPU_37XX_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c deleted file mode 100644 index a1523d0b1ef3..000000000000 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ /dev/null @@ -1,1244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2023 Intel Corporation - */ - -#include "ivpu_drv.h" -#include "ivpu_fw.h" -#include "ivpu_hw.h" -#include "ivpu_hw_40xx_reg.h" -#include "ivpu_hw_reg_io.h" -#include "ivpu_ipc.h" -#include "ivpu_mmu.h" -#include "ivpu_pm.h" - -#include <linux/dmi.h> - -#define TILE_MAX_NUM 6 -#define TILE_MAX_MASK 0x3f - -#define LNL_HW_ID 0x4040 - -#define SKU_TILE_SHIFT 0u -#define SKU_TILE_MASK 0x0000ffffu -#define SKU_HW_ID_SHIFT 16u -#define SKU_HW_ID_MASK 0xffff0000u - -#define PLL_CONFIG_DEFAULT 0x0 -#define PLL_CDYN_DEFAULT 0x80 -#define PLL_EPP_DEFAULT 0x80 -#define PLL_REF_CLK_FREQ (50 * 1000000) -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) - -#define PLL_PROFILING_FREQ_DEFAULT 38400000 -#define PLL_PROFILING_FREQ_HIGH 400000000 - -#define TIM_SAFE_ENABLE 0xf1d0dead -#define TIM_WATCHDOG_RESET_VALUE 0xffffffff - -#define TIMEOUT_US (150 * USEC_PER_MSEC) -#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) -#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) - -#define WEIGHTS_DEFAULT 0xf711f711u -#define WEIGHTS_ATS_DEFAULT 0x0000f711u - -#define ICB_0_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) - -#define ICB_1_IRQ_MASK ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ - (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) - -#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) - -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR))) - -#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) -#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) - -#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ - (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) - -static char *ivpu_platform_to_str(u32 platform) -{ - switch (platform) { - case IVPU_PLATFORM_SILICON: - return "IVPU_PLATFORM_SILICON"; - case IVPU_PLATFORM_SIMICS: - return "IVPU_PLATFORM_SIMICS"; - case IVPU_PLATFORM_FPGA: - return "IVPU_PLATFORM_FPGA"; - default: - return "Invalid platform"; - } -} - -static const struct dmi_system_id ivpu_dmi_platform_simulation[] = { - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "lnlrvp"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), - DMI_MATCH(DMI_BOARD_SERIAL, "123456789"), - }, - }, - { - .ident = "Intel Simics", - .matches = { - DMI_MATCH(DMI_BOARD_NAME, "Simics"), - }, - }, - { } -}; - -static void ivpu_hw_read_platform(struct ivpu_device *vdev) -{ - if (dmi_check_system(ivpu_dmi_platform_simulation)) - vdev->platform = IVPU_PLATFORM_SIMICS; - else - vdev->platform = IVPU_PLATFORM_SILICON; - - ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", - ivpu_platform_to_str(vdev->platform), vdev->platform); -} - -static void ivpu_hw_wa_init(struct ivpu_device *vdev) -{ - vdev->wa.punit_disabled = ivpu_is_fpga(vdev); - vdev->wa.clear_runtime_mem = false; - - if (ivpu_hw_gen(vdev) == IVPU_HW_40XX) - vdev->wa.disable_clock_relinquish = true; - - IVPU_PRINT_WA(punit_disabled); - IVPU_PRINT_WA(clear_runtime_mem); - IVPU_PRINT_WA(disable_clock_relinquish); -} - -static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) -{ - if (ivpu_is_fpga(vdev)) { - vdev->timeout.boot = 100000; - vdev->timeout.jsm = 50000; - vdev->timeout.tdr = 2000000; - vdev->timeout.reschedule_suspend = 1000; - vdev->timeout.autosuspend = -1; - vdev->timeout.d0i3_entry_msg = 500; - } else if (ivpu_is_simics(vdev)) { - vdev->timeout.boot = 50; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 10000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = -1; - vdev->timeout.d0i3_entry_msg = 100; - } else { - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - vdev->timeout.autosuspend = 10; - vdev->timeout.d0i3_entry_msg = 5; - } -} - -static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); -} - -static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, - u16 target_ratio, u16 epp, u16 config, u16 cdyn) -{ - int ret; - u32 val; - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) { - ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, EPP, epp, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); - val = REG_SET_FLD_NUM(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, CDYN, cdyn, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2, val); - - val = REGB_RD32(VPU_40XX_BUTTRESS_WP_REQ_CMD); - val = REG_SET_FLD(VPU_40XX_BUTTRESS_WP_REQ_CMD, SEND, val); - REGB_WR32(VPU_40XX_BUTTRESS_WP_REQ_CMD, val); - - ret = ivpu_pll_wait_for_cmd_send(vdev); - if (ret) - ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); - - return ret; -} - -static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); -} - -static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev) -{ - if (ivpu_is_simics(vdev)) - return 0; - - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); -} - -static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u8 fuse_min_ratio, fuse_pn_ratio, fuse_max_ratio; - u32 fmin_fuse, fmax_fuse; - - fmin_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMIN_FUSE); - fuse_min_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); - fuse_pn_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); - - fmax_fuse = REGB_RD32(VPU_40XX_BUTTRESS_FMAX_FUSE); - fuse_max_ratio = REG_GET_FLD(VPU_40XX_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); - - hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); - hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); - hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); -} - -static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) -{ - u16 config = enable ? PLL_CONFIG_DEFAULT : 0; - u16 cdyn = enable ? PLL_CDYN_DEFAULT : 0; - u16 epp = enable ? PLL_EPP_DEFAULT : 0; - struct ivpu_hw_info *hw = vdev->hw; - u16 target_ratio = hw->pll.pn_ratio; - int ret; - - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, epp: 0x%x, config: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(target_ratio), epp, config, cdyn); - - ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, - target_ratio, epp, config, cdyn); - if (ret) { - ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); - return ret; - } - - if (enable) { - ret = ivpu_pll_wait_for_status_ready(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); - return ret; - } - } - - return 0; -} - -static int ivpu_pll_enable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, true); -} - -static int ivpu_pll_disable(struct ivpu_device *vdev) -{ - return ivpu_pll_drive(vdev, false); -} - -static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN); - - if (enable) { - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); - } - - REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val); -} - -static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN); - - if (enable) { - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); - val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); - } else { - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); - val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); - } - - REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val); -} - -static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) - return -EIO; - - return 0; -} - -static void ivpu_boot_idle_gen_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val); -} - -static int ivpu_boot_host_ss_check(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_boot_noc_qreqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qreqn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qacceptn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); - REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val); - - ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - return ret; - } - - if (enable) { - REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT); - REGB_WR32(VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT); - } - - return ret; -} - -static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, true); -} - -static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); - if (enable) { - val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } else { - val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); - } - REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val); - - ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, true); -} - -static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); - - if (enable) - ndelay(500); -} - -static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); - - if (!enable) - ndelay(500); -} - -static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) -{ - if (ivpu_is_fpga(vdev)) - return 0; - - return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, - exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); -} - -static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0); - - if (enable) - val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); - else - val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); - - REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val); -} - -static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); - - val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); - val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); - - REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); -} - -static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) -{ - u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV); - - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); - val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); - - REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val); -} - -static int ivpu_boot_cpu_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN); - - if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_cpu_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) -{ - u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY); - - if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val)) - return -EIO; - - return 0; -} - -static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_wait_for_clock_own_resource_ack(vdev); - if (ret) { - ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n"); - return ret; - } - - ivpu_boot_pwr_island_trickle_drive(vdev, true); - ivpu_boot_pwr_island_drive(vdev, true); - - ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); - if (ret) { - ivpu_err(vdev, "Timed out waiting for power island status\n"); - return ret; - } - - ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); - if (ret) { - ivpu_err(vdev, "Failed qrenqn check %d\n", ret); - return ret; - } - - ivpu_boot_host_ss_clk_drive(vdev, true); - ivpu_boot_host_ss_rst_drive(vdev, true); - ivpu_boot_pwr_island_isolation_drive(vdev, false); - - return ret; -} - -static int ivpu_boot_soc_cpu_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN); - if (enable) - val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); - else - val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); - REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val); - - ret = ivpu_boot_cpu_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); - if (ret) { - ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); - return ret; - } - - ret = ivpu_boot_cpu_noc_qdeny_check(vdev, 0x0); - if (ret) - ivpu_err(vdev, "Failed qdeny check: %d\n", ret); - - return ret; -} - -static int ivpu_boot_soc_cpu_enable(struct ivpu_device *vdev) -{ - return ivpu_boot_soc_cpu_drive(vdev, true); -} - -static int ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) -{ - int ret; - u32 val; - u64 val64; - - ret = ivpu_boot_soc_cpu_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); - return ret; - } - - val64 = vdev->fw->entry_point; - val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; - REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); - - val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); - val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); - REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); - - ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", - ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); - - return 0; -} - -static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_D0I3_CONTROL); - if (enable) - val = REG_SET_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val); - else - val = REG_CLR_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, I3, val); - REGB_WR32(VPU_40XX_BUTTRESS_D0I3_CONTROL, val); - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); - return ret; - } - - return 0; -} - -static bool ivpu_tile_disable_check(u32 config) -{ - /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */ - if (config == 0) - return true; - - if (config > BIT(TILE_MAX_NUM - 1)) - return false; - - if ((config & (config - 1)) == 0) - return true; - - return false; -} - -static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) -{ - struct ivpu_hw_info *hw = vdev->hw; - u32 tile_disable; - u32 fuse; - - fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE); - if (!REG_TEST_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, VALID, fuse)) { - ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse); - return -EIO; - } - - tile_disable = REG_GET_FLD(VPU_40XX_BUTTRESS_TILE_FUSE, CONFIG, fuse); - if (!ivpu_tile_disable_check(tile_disable)) { - ivpu_err(vdev, "Fuse: Invalid tile disable config (0x%x)\n", tile_disable); - return -EIO; - } - - if (tile_disable) - ivpu_dbg(vdev, MISC, "Fuse: %d tiles enabled. Tile number %d disabled\n", - TILE_MAX_NUM - 1, ffs(tile_disable) - 1); - else - ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM); - - hw->tile_fuse = tile_disable; - hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; - - ivpu_pll_init_frequency_ratios(vdev); - - ivpu_hw_init_range(&vdev->hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_init_range(&vdev->hw->ranges.user, 0x80000000, SZ_256M); - ivpu_hw_init_range(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M); - ivpu_hw_init_range(&vdev->hw->ranges.dma, 0x200000000, SZ_8G); - - ivpu_hw_read_platform(vdev); - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - - return 0; -} - -static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev) -{ - int ret; - u32 val; - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Wait for *_TRIGGER timed out\n"); - return ret; - } - - val = REGB_RD32(VPU_40XX_BUTTRESS_IP_RESET); - val = REG_SET_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_40XX_BUTTRESS_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_40XX_BUTTRESS_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); - - return ret; -} - -static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) -{ - int ret = 0; - - if (ivpu_hw_40xx_ip_reset(vdev)) { - ivpu_err(vdev, "Failed to reset VPU IP\n"); - ret = -EIO; - } - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - return ret; -} - -static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev) -{ - int ret; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = ivpu_boot_d0i3_drive(vdev, true); - if (ret) - ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); - - udelay(5); /* VPU requires 5 us to complete the transition */ - - return ret; -} - -static int ivpu_hw_40xx_d0i3_disable(struct ivpu_device *vdev) -{ - int ret; - - if (IVPU_WA(punit_disabled)) - return 0; - - ret = ivpu_boot_d0i3_drive(vdev, false); - if (ret) - ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); - - return ret; -} - -static void ivpu_hw_40xx_profiling_freq_reg_set(struct ivpu_device *vdev) -{ - u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - - if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) - val = REG_CLR_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val); - else - val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, PERF_CLK, val); - - REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val); -} - -static void ivpu_hw_40xx_ats_print(struct ivpu_device *vdev) -{ - ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n", - REGB_RD32(VPU_40XX_BUTTRESS_HM_ATS) ? "Enable" : "Disable"); -} - -static void ivpu_hw_40xx_clock_relinquish_disable(struct ivpu_device *vdev) -{ - u32 val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - - val = REG_SET_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, DISABLE_CLK_RELINQUISH, val); - REGB_WR32(VPU_40XX_BUTTRESS_VPU_STATUS, val); -} - -static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_hw_40xx_d0i3_disable(vdev); - if (ret) - ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); - - ret = ivpu_pll_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); - return ret; - } - - if (IVPU_WA(disable_clock_relinquish)) - ivpu_hw_40xx_clock_relinquish_disable(vdev); - ivpu_hw_40xx_profiling_freq_reg_set(vdev); - ivpu_hw_40xx_ats_print(vdev); - - ret = ivpu_boot_host_ss_check(vdev); - if (ret) { - ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); - return ret; - } - - ivpu_boot_idle_gen_drive(vdev, false); - - ret = ivpu_boot_pwr_domain_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_axi_enable(vdev); - if (ret) { - ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); - return ret; - } - - ret = ivpu_boot_host_ss_top_noc_enable(vdev); - if (ret) - ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); - - return ret; -} - -static int ivpu_hw_40xx_boot_fw(struct ivpu_device *vdev) -{ - int ret; - - ivpu_boot_no_snoop_enable(vdev); - ivpu_boot_tbu_mmu_enable(vdev); - - ret = ivpu_boot_soc_cpu_boot(vdev); - if (ret) - ivpu_err(vdev, "Failed to boot SOC CPU: %d\n", ret); - - return ret; -} - -static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev) -{ - u32 val; - - if (IVPU_WA(punit_disabled)) - return true; - - val = REGB_RD32(VPU_40XX_BUTTRESS_VPU_STATUS); - return REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, val) && - REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val); -} - -static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev) -{ - return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); -} - -static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) -{ - vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); - vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); -} - -static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev) -{ - int ret = 0; - - ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev); - - if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev)) - ivpu_warn(vdev, "Failed to reset the VPU\n"); - - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); - ret = -EIO; - } - - if (ivpu_hw_40xx_d0i3_enable(vdev)) { - ivpu_err(vdev, "Failed to enter D0I3\n"); - ret = -EIO; - } - - return ret; -} - -static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev) -{ - u32 val; - - REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); - - REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0); - - val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); -} - -static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev) -{ - return vdev->hw->pll.profiling_freq; -} - -static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) -{ - if (enable) - vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; - else - vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; -} - -/* Register indirect accesses */ -static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_40XX_BUTTRESS_PLL_FREQ); - pll_curr_ratio &= VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET); -} - -static u32 ivpu_hw_40xx_reg_telemetry_size_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE); -} - -static u32 ivpu_hw_40xx_reg_telemetry_enable_get(struct ivpu_device *vdev) -{ - return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE); -} - -static void ivpu_hw_40xx_reg_db_set(struct ivpu_device *vdev, u32 db_id) -{ - u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET); - - REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); -} - -static u32 ivpu_hw_40xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev) -{ - return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM); -} - -static u32 ivpu_hw_40xx_reg_ipc_rx_count_get(struct ivpu_device *vdev) -{ - u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); - - return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); -} - -static void ivpu_hw_40xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) -{ - REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); -} - -static void ivpu_hw_40xx_irq_clear(struct ivpu_device *vdev) -{ - REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); -} - -static void ivpu_hw_40xx_irq_enable(struct ivpu_device *vdev) -{ - REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); - REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); - REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); -} - -static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev) -{ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - REGB_WR32(VPU_40XX_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); - REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull); - REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul); -} - -static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev) -{ - /* TODO: For LNN hang consider engine reset instead of full recovery */ - ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); -} - -static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev) -{ - ivpu_hw_wdt_disable(vdev); - ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); -} - -static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev) -{ - ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); -} - -/* Handler for IRQs from VPU core (irqV) */ -static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) -{ - u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - - if (!status) - return false; - - REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) - ivpu_mmu_irq_evtq_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev, wake_thread); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) - ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) - ivpu_mmu_irq_gerr_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) - ivpu_hw_40xx_irq_wdt_mss_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) - ivpu_hw_40xx_irq_wdt_nce_handler(vdev); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) - ivpu_hw_40xx_irq_noc_firewall_handler(vdev); - - return true; -} - -/* Handler for IRQs from Buttress core (irqB) */ -static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) -{ - bool schedule_recovery = false; - u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (!status) - return false; - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { - ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1), - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2)); - REGB_WR32(VPU_40XX_BUTTRESS_ATS_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, status)) { - ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG)); - REGB_WR32(VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, status)) { - ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG)); - REGB_WR32(VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, status)) { - ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH)); - REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, status)) { - ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH)); - REGB_WR32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR, 0x1); - schedule_recovery = true; - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, status)) { - ivpu_err(vdev, "Survivability error detected\n"); - schedule_recovery = true; - } - - /* This must be done after interrupts are cleared at the source. */ - REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); - - if (schedule_recovery) - ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); - - return true; -} - -static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr) -{ - bool irqv_handled, irqb_handled, wake_thread = false; - struct ivpu_device *vdev = ptr; - - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - - irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread); - irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq); - - /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - - if (wake_thread) - return IRQ_WAKE_THREAD; - if (irqv_handled || irqb_handled) - return IRQ_HANDLED; - return IRQ_NONE; -} - -static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev) -{ - u32 irqv = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - u32 irqb = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - - if (ivpu_hw_40xx_reg_ipc_rx_count_get(vdev)) - ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) - ivpu_err(vdev, "WDT MSS timeout detected\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) - ivpu_err(vdev, "WDT NCE timeout detected\n"); - - if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) - ivpu_err(vdev, "NOC Firewall irq detected\n"); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) { - ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG1), - REGB_RD32(VPU_40XX_BUTTRESS_ATS_ERR_LOG2)); - } - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR, irqb)) - ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI0_ERR_LOG)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR, irqb)) - ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_40XX_BUTTRESS_CFI1_ERR_LOG)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR, irqb)) - ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR1_ERR, irqb)) - ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n", - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW), - REGB_RD32(VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH)); - - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, SURV_ERR, irqb)) - ivpu_err(vdev, "Survivability error detected\n"); -} - -const struct ivpu_hw_ops ivpu_hw_40xx_ops = { - .info_init = ivpu_hw_40xx_info_init, - .power_up = ivpu_hw_40xx_power_up, - .is_idle = ivpu_hw_40xx_is_idle, - .wait_for_idle = ivpu_hw_40xx_wait_for_idle, - .power_down = ivpu_hw_40xx_power_down, - .reset = ivpu_hw_40xx_reset, - .boot_fw = ivpu_hw_40xx_boot_fw, - .wdt_disable = ivpu_hw_40xx_wdt_disable, - .diagnose_failure = ivpu_hw_40xx_diagnose_failure, - .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get, - .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive, - .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get, - .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get, - .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get, - .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get, - .reg_db_set = ivpu_hw_40xx_reg_db_set, - .reg_ipc_rx_addr_get = ivpu_hw_40xx_reg_ipc_rx_addr_get, - .reg_ipc_rx_count_get = ivpu_hw_40xx_reg_ipc_rx_count_get, - .reg_ipc_tx_set = ivpu_hw_40xx_reg_ipc_tx_set, - .irq_clear = ivpu_hw_40xx_irq_clear, - .irq_enable = ivpu_hw_40xx_irq_enable, - .irq_disable = ivpu_hw_40xx_irq_disable, - .irq_handler = ivpu_hw_40xx_irq_handler, -}; diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index ff4a5d4f5821..fc0ee8d637f9 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -8,91 +8,6 @@ #include <linux/bits.h> -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT 0x00000000u -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5) -#define VPU_40XX_BUTTRESS_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6) - -#define VPU_40XX_BUTTRESS_LOCAL_INT_MASK 0x00000004u -#define VPU_40XX_BUTTRESS_GLOBAL_INT_MASK 0x00000008u - -#define VPU_40XX_BUTTRESS_HM_ATS 0x0000000cu - -#define VPU_40XX_BUTTRESS_ATS_ERR_LOG1 0x00000010u -#define VPU_40XX_BUTTRESS_ATS_ERR_LOG2 0x00000014u -#define VPU_40XX_BUTTRESS_ATS_ERR_CLEAR 0x00000018u - -#define VPU_40XX_BUTTRESS_CFI0_ERR_LOG 0x0000001cu -#define VPU_40XX_BUTTRESS_CFI0_ERR_CLEAR 0x00000020u - -#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u - -#define VPU_40XX_BUTTRESS_CFI1_ERR_LOG 0x00000040u -#define VPU_40XX_BUTTRESS_CFI1_ERR_CLEAR 0x00000044u - -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_LOW 0x00000048u -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_HIGH 0x0000004cu -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI0_CLEAR 0x00000050u - -#define VPU_40XX_BUTTRESS_PORT_ARBITRATION_WEIGHTS 0x00000054u - -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_LOW 0x00000058u -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_HIGH 0x0000005cu -#define VPU_40XX_BUTTRESS_IMR_ERR_CFI1_CLEAR 0x00000060u - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000130u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1 0x00000134u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000138u -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) -#define VPU_40XX_BUTTRESS_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16) - -#define VPU_40XX_BUTTRESS_WP_REQ_CMD 0x0000013cu -#define VPU_40XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) - -#define VPU_40XX_BUTTRESS_PLL_FREQ 0x00000148u -#define VPU_40XX_BUTTRESS_PLL_FREQ_RATIO_MASK GENMASK(15, 0) - -#define VPU_40XX_BUTTRESS_TILE_FUSE 0x00000150u -#define VPU_40XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_TILE_FUSE_CONFIG_MASK GENMASK(6, 1) - -#define VPU_40XX_BUTTRESS_VPU_STATUS 0x00000154u -#define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) -#define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) -#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) -#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) -#define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) -#define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) - -#define VPU_40XX_BUTTRESS_IP_RESET 0x00000160u -#define VPU_40XX_BUTTRESS_IP_RESET_TRIGGER_MASK BIT_MASK(0) - -#define VPU_40XX_BUTTRESS_D0I3_CONTROL 0x00000164u -#define VPU_40XX_BUTTRESS_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define VPU_40XX_BUTTRESS_D0I3_CONTROL_I3_MASK BIT_MASK(2) - -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000168u -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x0000016cu -#define VPU_40XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000170u - -#define VPU_40XX_BUTTRESS_FMIN_FUSE 0x00000174u -#define VPU_40XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define VPU_40XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) - -#define VPU_40XX_BUTTRESS_FMAX_FUSE 0x00000178u -#define VPU_40XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) - #define VPU_40XX_HOST_SS_CPR_CLK_EN 0x00000080u #define VPU_40XX_HOST_SS_CPR_CLK_EN_TOP_NOC_MASK BIT_MASK(1) #define VPU_40XX_HOST_SS_CPR_CLK_EN_DSS_MAS_MASK BIT_MASK(10) @@ -198,6 +113,14 @@ #define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu #define VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0_CSS_CPU_MASK BIT_MASK(3) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16) + +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) + #define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u #define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1) @@ -205,6 +128,9 @@ #define VPU_40XX_HOST_SS_AON_DPU_ACTIVE 0x00030204u #define VPU_40XX_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE 0x00030210u +#define VPU_50XX_HOST_SS_AON_FABRIC_REQ_OVERRIDE_REQ_OVERRIDE_MASK BIT_MASK(0) + #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO 0x00040040u #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_DONE_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c new file mode 100644 index 000000000000..3212c99f3682 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -0,0 +1,890 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_btrs_lnl_reg.h" +#include "ivpu_hw_btrs_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_pm.h" + +#define BTRS_MTL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR))) + +#define BTRS_LNL_IRQ_MASK ((REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR)) | \ + (REG_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR))) + +#define BTRS_MTL_ALL_IRQ_MASK (BTRS_MTL_IRQ_MASK | (REG_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, \ + FREQ_CHANGE))) + +#define BTRS_IRQ_DISABLE_MASK ((u32)-1) + +#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) + +#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) +#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) +#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) +#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) +#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) + +#define PLL_CDYN_DEFAULT 0x80 +#define PLL_EPP_DEFAULT 0x80 +#define PLL_CONFIG_DEFAULT 0x0 +#define PLL_SIMULATION_FREQ 10000000 +#define PLL_REF_CLK_FREQ 50000000 +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) +#define TIMEOUT_US (150 * USEC_PER_MSEC) + +/* Work point configuration values */ +#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) +#define MTL_CONFIG_1_TILE 0x01 +#define MTL_CONFIG_2_TILE 0x02 +#define MTL_PLL_RATIO_5_3 0x01 +#define MTL_PLL_RATIO_4_3 0x02 +#define BTRS_MTL_TILE_FUSE_ENABLE_BOTH 0x0 +#define BTRS_MTL_TILE_SKU_BOTH 0x3630 + +#define BTRS_LNL_TILE_MAX_NUM 6 +#define BTRS_LNL_TILE_MAX_MASK 0x3f + +#define WEIGHTS_DEFAULT 0xf711f711u +#define WEIGHTS_ATS_DEFAULT 0x0000f711u + +#define DCT_REQ 0x2 +#define DCT_ENABLE 0x1 +#define DCT_DISABLE 0x0 + +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); + if (REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) == BTRS_MTL_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + return true; + } + + return false; +} + +static void freq_ratios_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_MTL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_MTL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +static void freq_ratios_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMIN_FUSE); + hw->pll.min_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, MIN_RATIO, fmin_fuse); + hw->pll.pn_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(VPU_HW_BTRS_LNL_FMAX_FUSE); + hw->pll.max_ratio = REG_GET_FLD(VPU_HW_BTRS_LNL_FMAX_FUSE, MAX_RATIO, fmax_fuse); +} + +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + freq_ratios_init_mtl(vdev); + else + freq_ratios_init_lnl(vdev); + + hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, hw->pll.max_ratio); + hw->pll.pn_ratio = clamp_t(u8, hw->pll.pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); +} + +static bool tile_disable_check(u32 config) +{ + /* Allowed values: 0 or one bit from range 0-5 (6 tiles) */ + if (config == 0) + return true; + + if (config > BIT(BTRS_LNL_TILE_MAX_NUM - 1)) + return false; + + if ((config & (config - 1)) == 0) + return true; + + return false; +} + +static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config) +{ + u32 fuse; + u32 config; + + fuse = REGB_RD32(VPU_HW_BTRS_LNL_TILE_FUSE); + if (!REG_TEST_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, VALID, fuse)) { + ivpu_err(vdev, "Fuse: invalid (0x%x)\n", fuse); + return -EIO; + } + + config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse); + if (!tile_disable_check(config)) + ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config); + + ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config); + + *tile_fuse_config = config; + return 0; +} + +static int info_init_mtl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + + hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; + hw->sku = BTRS_MTL_TILE_SKU_BOTH; + hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + + return 0; +} + +static int info_init_lnl(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 tile_fuse_config; + int ret; + + ret = read_tile_config_fuse(vdev, &tile_fuse_config); + if (ret) + return ret; + + hw->tile_fuse = tile_fuse_config; + hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; + + return 0; +} + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return info_init_mtl(vdev); + else + return info_init_lnl(vdev); +} + +static int wp_request_sync(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +static int wait_for_status_ready(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, exp_val, PLL_TIMEOUT_US); +} + +struct wp_request { + u16 min; + u16 max; + u16 target; + u16 cfg; + u16 epp; + u16 cdyn; +}; + +static void wp_request_mtl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, EPP, PLL_EPP_DEFAULT, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_MTL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_MTL_WP_REQ_CMD, val); +} + +static void wp_request_lnl(struct ivpu_device *vdev, struct wp_request *wp) +{ + u32 val; + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MIN_RATIO, wp->min, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, MAX_RATIO, wp->max, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, TARGET_RATIO, wp->target, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, EPP, wp->epp, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CONFIG, wp->cfg, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, CDYN, wp->cdyn, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(VPU_HW_BTRS_LNL_WP_REQ_CMD); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_WP_REQ_CMD, SEND, val); + REGB_WR32(VPU_HW_BTRS_LNL_WP_REQ_CMD, val); +} + +static void wp_request(struct ivpu_device *vdev, struct wp_request *wp) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + wp_request_mtl(vdev, wp); + else + wp_request_lnl(vdev, wp); +} + +static int wp_request_send(struct ivpu_device *vdev, struct wp_request *wp) +{ + int ret; + + ret = wp_request_sync(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before workpoint request: %d\n", ret); + return ret; + } + + wp_request(vdev, wp); + + ret = wp_request_sync(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after workpoint request: %d\n", ret); + + return ret; +} + +static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, bool enable) +{ + struct ivpu_hw_info *hw = vdev->hw; + + wp->min = hw->pll.min_ratio; + wp->max = hw->pll.max_ratio; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + wp->target = enable ? hw->pll.pn_ratio : 0; + wp->cfg = enable ? hw->config : 0; + wp->cdyn = 0; + wp->epp = 0; + } else { + wp->target = hw->pll.pn_ratio; + wp->cfg = enable ? PLL_CONFIG_DEFAULT : 0; + wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0; + wp->epp = enable ? PLL_EPP_DEFAULT : 0; + } +} + +static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (ivpu_hw_btrs_gen(vdev) != IVPU_HW_BTRS_MTL) + return 0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) +{ + struct wp_request wp; + int ret; + + if (IVPU_WA(punit_disabled)) { + ivpu_dbg(vdev, PM, "Skipping workpoint request\n"); + return 0; + } + + prepare_wp_request(vdev, &wp, enable); + + ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + + ret = wp_request_send(vdev, &wp); + if (ret) { + ivpu_err(vdev, "Failed to send workpoint request: %d\n", ret); + return ret; + } + + ret = wait_for_pll_lock(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL lock\n"); + return ret; + } + + ret = wait_for_status_ready(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU ready status\n"); + return ret; + } + + return 0; +} + +static int d0i3_drive_mtl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + + return ret; +} + +static int d0i3_drive_lnl(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, I3, val); + REGB_WR32(VPU_HW_BTRS_LNL_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + return ret; + } + + return 0; +} + +static int d0i3_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return d0i3_drive_mtl(vdev, enable); + else + return d0i3_drive_lnl(vdev, enable); +} + +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, true); + if (ret) + ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); + + udelay(5); /* VPU requires 5 us to complete the transition */ + + return ret; +} + +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev) +{ + int ret; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = d0i3_drive(vdev, false); + if (ret) + ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); + + return ret; +} + +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return 0; + + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); +} + +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev) +{ + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS, WEIGHTS_DEFAULT); + REGB_WR32(VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS, WEIGHTS_ATS_DEFAULT); +} + +static int ip_reset_mtl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_MTL_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +static int ip_reset_lnl(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + ivpu_hw_btrs_clock_relinquish_disable_lnl(vdev); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Wait for *_TRIGGER timed out\n"); + return ret; + } + + val = REGB_RD32(VPU_HW_BTRS_LNL_IP_RESET); + val = REG_SET_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_HW_BTRS_LNL_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_HW_BTRS_LNL_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return ip_reset_mtl(vdev); + else + return ip_reset_lnl(vdev); +} + +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) + val = REG_CLR_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + else + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, PERF_CLK, val); + + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, MISC, "Buttress ATS: %s\n", + REGB_RD32(VPU_HW_BTRS_LNL_HM_ATS) ? "Enable" : "Disable"); +} + +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + val = REG_SET_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, DISABLE_CLK_RELINQUISH, val); + REGB_WR32(VPU_HW_BTRS_LNL_VPU_STATUS, val); +} + +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev) +{ + u32 val; + + if (IVPU_WA(punit_disabled)) + return true; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + val = REGB_RD32(VPU_HW_BTRS_MTL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, val); + } else { + val = REGB_RD32(VPU_HW_BTRS_LNL_VPU_STATUS); + + return REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, READY, val) && + REG_TEST_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, val); + } +} + +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_POLL_FLD(VPU_HW_BTRS_MTL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); + else + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", + REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + REGB_WR32(VPU_HW_BTRS_MTL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, status)) { + u32 ufi_log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, ufi_log)); + REGB_WR32(VPU_HW_BTRS_MTL_UFI_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + if (IVPU_WA(interrupt_clear_with_0)) + /* + * Writing 1 triggers an interrupt, so we can't perform read update write. + * Clear local interrupt status by writing 0 to all bits. + */ + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, 0x0); + else + REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +/* Handler for IRQs from Buttress core (irqB) */ +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(VPU_HW_BTRS_LNL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + bool schedule_recovery = false; + + if (!status) + return false; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, status)) { + ivpu_dbg(vdev, IRQ, "Survivability IRQ\n"); + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_DCT)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + REGB_WR32(VPU_HW_BTRS_LNL_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, status)) { + ivpu_err(vdev, "CFI0_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, status)) { + ivpu_err(vdev, "CFI1_ERR 0x%08x", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + REGB_WR32(VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, status)) { + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + REGB_WR32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR, 0x1); + schedule_recovery = true; + } + + /* This must be done after interrupts are cleared at the source. */ + REGB_WR32(VPU_HW_BTRS_LNL_INTERRUPT_STAT, status); + + if (schedule_recovery) + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); + + return true; +} + +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable) +{ + u32 val = REGB_RD32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW); + u32 cmd = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, CMD, val); + u32 param1 = REG_GET_FLD(VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW, PARAM1, val); + + if (cmd != DCT_REQ) { + ivpu_err_ratelimited(vdev, "Unsupported PCODE command: 0x%x\n", cmd); + return -EBADR; + } + + switch (param1) { + case DCT_ENABLE: + *enable = true; + return 0; + case DCT_DISABLE: + *enable = false; + return 0; + default: + ivpu_err_ratelimited(vdev, "Invalid PARAM1 value: %u\n", param1); + return -EINVAL; + } +} + +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 active_percent) +{ + u32 val = 0; + u32 cmd = enable ? DCT_ENABLE : DCT_DISABLE; + + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, CMD, DCT_REQ, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM1, cmd, val); + val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); + + REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); +} + +static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) +{ + u32 pll_clock = PLL_REF_CLK_FREQ * ratio; + u32 cpu_clock; + + if ((config & 0xff) == MTL_PLL_RATIO_4_3) + cpu_clock = pll_clock * 2 / 4; + else + cpu_clock = pll_clock * 2 / 5; + + return cpu_clock; +} + +u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +{ + struct ivpu_hw_info *hw = vdev->hw; + + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_freq_mtl(ratio, hw->config); + else + return PLL_RATIO_TO_FREQ(ratio); +} + +static u32 pll_freq_get_mtl(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); + pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; + + if (!ivpu_is_silicon(vdev)) + return PLL_SIMULATION_FREQ; + + return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); +} + +static u32 pll_freq_get_lnl(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); + pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; + + return PLL_RATIO_TO_FREQ(pll_curr_ratio); +} + +u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_freq_get_mtl(vdev); + else + return pll_freq_get_lnl(vdev); +} + +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET); +} + +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE); +} + +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return REGB_RD32(VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE); + else + return REGB_RD32(VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE); +} + +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); +} + +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + else + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); +} + +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, (u32)(~BTRS_MTL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x0); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, (u32)(~BTRS_LNL_IRQ_MASK)); + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x0); + } +} + +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) { + REGB_WR32(VPU_HW_BTRS_MTL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_MTL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } else { + REGB_WR32(VPU_HW_BTRS_LNL_GLOBAL_INT_MASK, 0x1); + REGB_WR32(VPU_HW_BTRS_LNL_LOCAL_INT_MASK, BTRS_IRQ_DISABLE_MASK); + } +} + +static void diagnose_failure_mtl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_MTL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, reg)) + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); + + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, UFI_ERR, reg)) { + u32 log = REGB_RD32(VPU_HW_BTRS_MTL_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + log, REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, OPCODE, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, AXI_ID, log), + REG_GET_FLD(VPU_HW_BTRS_MTL_UFI_ERR_LOG, CQ_ID, log)); + } +} + +static void diagnose_failure_lnl(struct ivpu_device *vdev) +{ + u32 reg = REGB_RD32(VPU_HW_BTRS_MTL_INTERRUPT_STAT) & BTRS_LNL_IRQ_MASK; + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, reg)) { + ivpu_err(vdev, "ATS_ERR_LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG1), + REGB_RD32(VPU_HW_BTRS_LNL_ATS_ERR_LOG2)); + } + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI0_ERR, reg)) + ivpu_err(vdev, "CFI0_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI0_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, CFI1_ERR, reg)) + ivpu_err(vdev, "CFI1_ERR_LOG 0x%08x\n", REGB_RD32(VPU_HW_BTRS_LNL_CFI1_ERR_LOG)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR0_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI0 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, IMR1_ERR, reg)) + ivpu_err(vdev, "IMR_ERR_CFI1 LOW: 0x%08x HIGH: 0x%08x\n", + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW), + REGB_RD32(VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH)); + + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, SURV_ERR, reg)) + ivpu_err(vdev, "Survivability IRQ\n"); +} + +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return diagnose_failure_mtl(vdev); + else + return diagnose_failure_lnl(vdev); +} diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h new file mode 100644 index 000000000000..04f14f50fed6 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_H__ +#define __IVPU_HW_BTRS_H__ + +#include "ivpu_drv.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_reg_io.h" + +#define PLL_PROFILING_FREQ_DEFAULT 38400000 +#define PLL_PROFILING_FREQ_HIGH 400000000 +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + +#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_PERIOD_US 35300u + +int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); +void ivpu_hw_btrs_freq_ratios_init(struct ivpu_device *vdev); +int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev); +int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable); +int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_enable(struct ivpu_device *vdev); +int ivpu_hw_btrs_d0i3_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_set_port_arbitration_weights_lnl(struct ivpu_device *vdev); +bool ivpu_hw_btrs_is_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev); +int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); +void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); +void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); +bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); +int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); +void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); +u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); +u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_global_int_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_btrs_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_btrs_diagnose_failure(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_BTRS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h new file mode 100644 index 000000000000..fc51f3098f97 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_lnl_reg.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_LNL_REG_H__ +#define __IVPU_HW_BTRS_LNL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT 0x00000000u +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI0_ERR_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_CFI1_ERR_MASK BIT_MASK(3) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR0_ERR_MASK BIT_MASK(4) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_IMR1_ERR_MASK BIT_MASK(5) +#define VPU_HW_BTRS_LNL_INTERRUPT_STAT_SURV_ERR_MASK BIT_MASK(6) + +#define VPU_HW_BTRS_LNL_LOCAL_INT_MASK 0x00000004u +#define VPU_HW_BTRS_LNL_GLOBAL_INT_MASK 0x00000008u + +#define VPU_HW_BTRS_LNL_HM_ATS 0x0000000cu + +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG1 0x00000010u +#define VPU_HW_BTRS_LNL_ATS_ERR_LOG2 0x00000014u +#define VPU_HW_BTRS_LNL_ATS_ERR_CLEAR 0x00000018u + +#define VPU_HW_BTRS_LNL_CFI0_ERR_LOG 0x0000001cu +#define VPU_HW_BTRS_LNL_CFI0_ERR_CLEAR 0x00000020u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS_ATS 0x00000024u + +#define VPU_HW_BTRS_LNL_CFI1_ERR_LOG 0x00000040u +#define VPU_HW_BTRS_LNL_CFI1_ERR_CLEAR 0x00000044u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_LOW 0x00000048u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_HIGH 0x0000004cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI0_CLEAR 0x00000050u + +#define VPU_HW_BTRS_LNL_PORT_ARBITRATION_WEIGHTS 0x00000054u + +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_LOW 0x00000058u +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_HIGH 0x0000005cu +#define VPU_HW_BTRS_LNL_IMR_ERR_CFI1_CLEAR 0x00000060u + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS 0x00000070u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW 0x00000074u +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_CMD_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM1_MASK GENMASK(15, 8) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM2_MASK GENMASK(23, 16) +#define VPU_HW_BTRS_LNL_PCODE_MAILBOX_SHADOW_PARAM3_MASK GENMASK(31, 24) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0 0x00000130u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1 0x00000134u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2 0x00000138u +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_LNL_WP_REQ_PAYLOAD2_CDYN_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_LNL_WP_REQ_CMD 0x0000013cu +#define VPU_HW_BTRS_LNL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_PLL_FREQ 0x00000148u +#define VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_LNL_TILE_FUSE 0x00000150u +#define VPU_HW_BTRS_LNL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_TILE_FUSE_CONFIG_MASK GENMASK(6, 1) + +#define VPU_HW_BTRS_LNL_VPU_STATUS 0x00000154u +#define VPU_HW_BTRS_LNL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_VPU_STATUS_IDLE_MASK BIT_MASK(1) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) +#define VPU_HW_BTRS_LNL_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) +#define VPU_HW_BTRS_LNL_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) +#define VPU_HW_BTRS_LNL_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) +#define VPU_HW_BTRS_LNL_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) + +#define VPU_HW_BTRS_LNL_IP_RESET 0x00000160u +#define VPU_HW_BTRS_LNL_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_LNL_D0I3_CONTROL 0x00000164u +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_LNL_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_OFFSET 0x00000168u +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_SIZE 0x0000016cu +#define VPU_HW_BTRS_LNL_VPU_TELEMETRY_ENABLE 0x00000170u + +#define VPU_HW_BTRS_LNL_FMIN_FUSE 0x00000174u +#define VPU_HW_BTRS_LNL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_LNL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_LNL_FMAX_FUSE 0x00000178u +#define VPU_HW_BTRS_LNL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#endif /* __IVPU_HW_BTRS_LNL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h new file mode 100644 index 000000000000..e93d539e066f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_btrs_mtl_reg.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_BTRS_MTL_REG_H__ +#define __IVPU_HW_BTRS_MTL_REG_H__ + +#include <linux/bits.h> + +#define VPU_HW_BTRS_MTL_INTERRUPT_TYPE 0x00000000u + +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT 0x00000004u +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define VPU_HW_BTRS_MTL_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0 0x00000008u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1 0x0000000cu +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2 0x00000010u +#define VPU_HW_BTRS_MTL_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_WP_REQ_CMD 0x00000014u +#define VPU_HW_BTRS_MTL_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD 0x00000018u +#define VPU_HW_BTRS_MTL_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_CURRENT_PLL 0x0000001cu +#define VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) + +#define VPU_HW_BTRS_MTL_PLL_ENABLE 0x00000020u + +#define VPU_HW_BTRS_MTL_FMIN_FUSE 0x00000024u +#define VPU_HW_BTRS_MTL_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_HW_BTRS_MTL_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define VPU_HW_BTRS_MTL_FMAX_FUSE 0x00000028u +#define VPU_HW_BTRS_MTL_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#define VPU_HW_BTRS_MTL_TILE_FUSE 0x0000002cu +#define VPU_HW_BTRS_MTL_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_TILE_FUSE_SKU_MASK GENMASK(3, 2) + +#define VPU_HW_BTRS_MTL_LOCAL_INT_MASK 0x00000030u +#define VPU_HW_BTRS_MTL_GLOBAL_INT_MASK 0x00000034u + +#define VPU_HW_BTRS_MTL_PLL_STATUS 0x00000040u +#define VPU_HW_BTRS_MTL_PLL_STATUS_LOCK_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_STATUS 0x00000044u +#define VPU_HW_BTRS_MTL_VPU_STATUS_READY_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_STATUS_IDLE_MASK BIT_MASK(1) + +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL 0x00000060u +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_HW_BTRS_MTL_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define VPU_HW_BTRS_MTL_VPU_IP_RESET 0x00000050u +#define VPU_HW_BTRS_MTL_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_OFFSET 0x00000080u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_SIZE 0x00000084u +#define VPU_HW_BTRS_MTL_VPU_TELEMETRY_ENABLE 0x00000088u + +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_0 0x000000a0u +#define VPU_HW_BTRS_MTL_ATS_ERR_LOG_1 0x000000a4u +#define VPU_HW_BTRS_MTL_ATS_ERR_CLEAR 0x000000a8u + +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG 0x000000b0u +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define VPU_HW_BTRS_MTL_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) + +#define VPU_HW_BTRS_MTL_UFI_ERR_CLEAR 0x000000b4u + +#endif /* __IVPU_HW_BTRS_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c new file mode 100644 index 000000000000..029dd065614b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -0,0 +1,1188 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_hw_37xx_reg.h" +#include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_btrs.h" +#include "ivpu_hw_ip.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define ICB_0_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_37XX ((((u64)ICB_1_IRQ_MASK_37XX) << 32) | ICB_0_IRQ_MASK_37XX) + +#define ICB_0_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(VPU_40XX_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK_40XX ((((u64)ICB_1_IRQ_MASK_40XX) << 32) | ICB_0_IRQ_MASK_40XX) + +#define ITF_FIREWALL_VIOLATION_MASK_37XX ((REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +#define ITF_FIREWALL_VIOLATION_MASK_40XX ((REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static int wait_for_ip_bar(struct ivpu_device *vdev) +{ + return REGV_POLL_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, AON, 0, 100); +} + +static void host_ss_rst_clr(struct ivpu_device *vdev) +{ + u32 val = 0; + + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_CLR, MSS_MAS, val); + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_CLR, val); +} + +static int host_ss_noc_qreqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qreqn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qreqn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qacceptn_check_37xx(vdev, exp_val); + else + return host_ss_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int host_ss_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int host_ss_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return host_ss_noc_qdeny_check_37xx(vdev, exp_val); + else + return host_ss_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_qrenqn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qrenqn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qrenqn_check_37xx(vdev, exp_val); + else + return top_noc_qrenqn_check_40xx(vdev, exp_val); +} + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev) +{ + int ret; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + ret = wait_for_ip_bar(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for NPU IP bar\n"); + return ret; + } + host_ss_rst_clr(vdev); + } + + ret = host_ss_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qreqn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qacceptn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check %d\n", ret); + + return ret; +} + +static void idle_gen_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, EN, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_VPU_IDLE_GEN, val); +} + +static void idle_gen_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_IDLE_GEN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_IDLE_GEN, EN, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_IDLE_GEN, val); +} + +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, true); + else + idle_gen_drive_40xx(vdev, true); +} + +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + idle_gen_drive_37xx(vdev, false); + else + idle_gen_drive_40xx(vdev, false); +} + +static void +pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status) +{ + u32 val; + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val); + + val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, STATUS_DLY, status, val); + REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY, val); +} + +static void pwr_island_trickle_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static void pwr_island_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + pwr_island_trickle_drive_37xx(vdev, true); + ndelay(500); + pwr_island_drive_37xx(vdev, true); + } else { + pwr_island_trickle_drive_40xx(vdev, true); + ndelay(500); + pwr_island_drive_40xx(vdev, true); + } +} + +static int wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); + else + return REGV_POLL_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_STATUS0, CSS_CPU, exp_val, + PWR_ISLAND_STATUS_TIMEOUT_US); +} + +static void pwr_island_isolation_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, CSS_CPU, val); + + REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + pwr_island_isolation_drive_37xx(vdev, enable); + else + pwr_island_isolation_drive_40xx(vdev, enable); +} + +static void pwr_island_isolation_disable(struct ivpu_device *vdev) +{ + pwr_island_isolation_drive(vdev, false); +} + +static void host_ss_clk_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_CLK_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_CLK_SET, val); +} + +static void host_ss_clk_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_CLK_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_CLK_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_CLK_EN, val); +} + +static void host_ss_clk_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_clk_drive_37xx(vdev, enable); + else + host_ss_clk_drive_40xx(vdev, enable); +} + +static void host_ss_clk_enable(struct ivpu_device *vdev) +{ + host_ss_clk_drive(vdev, true); +} + +static void host_ss_rst_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_CPR_RST_SET); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_SET_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_CLR_FLD(VPU_37XX_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } + + REGV_WR32(VPU_37XX_HOST_SS_CPR_RST_SET, val); +} + +static void host_ss_rst_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_CPR_RST_EN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_SET_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } else { + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, TOP_NOC, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, DSS_MAS, val); + val = REG_CLR_FLD(VPU_40XX_HOST_SS_CPR_RST_EN, CSS_MAS, val); + } + + REGV_WR32(VPU_40XX_HOST_SS_CPR_RST_EN, val); +} + +static void host_ss_rst_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_rst_drive_37xx(vdev, enable); + else + host_ss_rst_drive_40xx(vdev, enable); +} + +static void host_ss_rst_enable(struct ivpu_device *vdev) +{ + host_ss_rst_drive(vdev, true); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_37XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_SS_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(VPU_40XX_HOST_SS_NOC_QREQN, val); +} + +static void host_ss_noc_qreqn_top_socmmio_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + host_ss_noc_qreqn_top_socmmio_drive_37xx(vdev, enable); + else + host_ss_noc_qreqn_top_socmmio_drive_40xx(vdev, enable); +} + +static int host_ss_axi_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + host_ss_noc_qreqn_top_socmmio_drive(vdev, enable); + + ret = host_ss_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed HOST SS NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = host_ss_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed HOST SS NOC QDENY check: %d\n", ret); + + return ret; +} + +static void top_noc_qreqn_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_40XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_40XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); + + if (enable) { + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + + REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); +} + +static void top_noc_qreqn_drive(struct ivpu_device *vdev, bool enable) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + top_noc_qreqn_drive_37xx(vdev, enable); + else + top_noc_qreqn_drive_40xx(vdev, enable); +} + +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev) +{ + return host_ss_axi_drive(vdev, true); +} + +static int top_noc_qacceptn_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qacceptn_check_37xx(vdev, exp_val); + else + return top_noc_qacceptn_check_40xx(vdev, exp_val); +} + +static int top_noc_qdeny_check_37xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_40XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return top_noc_qdeny_check_37xx(vdev, exp_val); + else + return top_noc_qdeny_check_40xx(vdev, exp_val); +} + +static int top_noc_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + + top_noc_qreqn_drive(vdev, enable); + + ret = top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QACCEPTN check: %d\n", ret); + return ret; + } + + ret = top_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed TOP NOC QDENY check: %d\n", ret); + + return ret; +} + +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev) +{ + return top_noc_drive(vdev, true); +} + +static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE); + + if (enable) + val = REG_SET_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + else + val = REG_CLR_FLD(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + + REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); +} + +static void pwr_island_delay_set(struct ivpu_device *vdev) +{ + bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH; + u32 post, post1, post2, status; + + if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX) + return; + + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_PTL_P: + post = high ? 18 : 0; + post1 = 0; + post2 = 0; + status = high ? 46 : 3; + break; + + default: + dump_stack(); + ivpu_err(vdev, "Unknown device ID\n"); + return; + } + + pwr_island_delay_set_50xx(vdev, post, post1, post2, status); +} + +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev) +{ + int ret; + + pwr_island_delay_set(vdev); + pwr_island_enable(vdev); + + ret = wait_for_pwr_island_status(vdev, 0x1); + if (ret) { + ivpu_err(vdev, "Timed out waiting for power island status\n"); + return ret; + } + + ret = top_noc_qreqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed TOP NOC QREQN check %d\n", ret); + return ret; + } + + host_ss_clk_enable(vdev); + pwr_island_isolation_disable(vdev); + host_ss_rst_enable(vdev); + + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + dpu_active_drive_37xx(vdev, true); + + return ret; +} + +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); + else + return REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); +} + +static void ivpu_hw_ip_snoop_disable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + else + val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + + REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +static void ivpu_hw_ip_snoop_disable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + + if (ivpu_is_force_snoop_enabled(vdev)) + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + else + val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); + + REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_snoop_disable_37xx(vdev); + else + return ivpu_hw_ip_snoop_disable_40xx(vdev); +} + +static void ivpu_hw_ip_tbu_mmu_enable_37xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_37XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_37XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_37XX_HOST_IF_TBU_MMUSSIDV, val); +} + +static void ivpu_hw_ip_tbu_mmu_enable_40xx(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(VPU_40XX_HOST_IF_TBU_MMUSSIDV); + + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + + REGV_WR32(VPU_40XX_HOST_IF_TBU_MMUSSIDV, val); +} + +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ivpu_hw_ip_tbu_mmu_enable_37xx(vdev); + else + return ivpu_hw_ip_tbu_mmu_enable_40xx(vdev); +} + +static int soc_cpu_boot_37xx(struct ivpu_device *vdev) +{ + u32 val; + + val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = vdev->fw->entry_point >> 9; + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + val = REG_SET_FLD(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); + + return 0; +} + +static int cpu_noc_qacceptn_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QACCEPTN, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int cpu_noc_qdeny_check_40xx(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(VPU_40XX_CPU_SS_CPR_NOC_QDENY, TOP_MMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static void cpu_noc_top_mmio_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(VPU_40XX_CPU_SS_CPR_NOC_QREQN); + + if (enable) + val = REG_SET_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + else + val = REG_CLR_FLD(VPU_40XX_CPU_SS_CPR_NOC_QREQN, TOP_MMIO, val); + REGV_WR32(VPU_40XX_CPU_SS_CPR_NOC_QREQN, val); +} + +static int soc_cpu_drive_40xx(struct ivpu_device *vdev, bool enable) +{ + int ret; + + cpu_noc_top_mmio_drive_40xx(vdev, enable); + + ret = cpu_noc_qacceptn_check_40xx(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = cpu_noc_qdeny_check_40xx(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int soc_cpu_enable(struct ivpu_device *vdev) +{ + return soc_cpu_drive_40xx(vdev, true); +} + +static int soc_cpu_boot_40xx(struct ivpu_device *vdev) +{ + int ret; + u32 val; + u64 val64; + + ret = soc_cpu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable SOC CPU: %d\n", ret); + return ret; + } + + val64 = vdev->fw->entry_point; + val64 <<= ffs(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO_IMAGE_LOCATION_MASK) - 1; + REGV_WR64(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val64); + + val = REGV_RD32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO); + val = REG_SET_FLD(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, DONE, val); + REGV_WR32(VPU_40XX_HOST_SS_VERIFICATION_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + ivpu_fw_is_cold_boot(vdev) ? "cold boot" : "resume"); + + return 0; +} + +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return soc_cpu_boot_37xx(vdev); + else + return soc_cpu_boot_40xx(vdev); +} + +static void wdt_disable_37xx(struct ivpu_device *vdev) +{ + u32 val; + + /* Enable writing and set non-zero WDT value */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + /* Enable writing and disable watchdog timer */ + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); + + /* Now clear the timeout interrupt */ + val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +static void wdt_disable_40xx(struct ivpu_device *vdev) +{ + u32 val; + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + REGV_WR32(VPU_40XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_40XX_CPU_SS_TIM_WDOG_EN, 0); + + val = REGV_RD32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); +} + +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return wdt_disable_37xx(vdev); + else + return wdt_disable_40xx(vdev); +} + +static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return ipc_rx_count_get_37xx(vdev); + else + return ipc_rx_count_get_40xx(vdev); +} + +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_37XX); + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_37XX); + } else { + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK_40XX); + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK_40XX); + } +} + +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + REGV_WR64(VPU_37XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, 0x0); + } else { + REGV_WR64(VPU_40XX_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGV_WR32(VPU_40XX_HOST_SS_FW_SOC_IRQ_EN, 0x0ul); + } +} + +static void diagnose_failure_37xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (ipc_rx_count_get_37xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +static void diagnose_failure_40xx(struct ivpu_device *vdev) +{ + u32 reg = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (ipc_rx_count_get_40xx(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, reg)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, reg)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, reg)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); +} + +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + diagnose_failure_37xx(vdev); + else + diagnose_failure_40xx(vdev); +} + +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR64(VPU_37XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_37XX); + else + REGV_WR64(VPU_40XX_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK_40XX); +} + +static void irq_wdt_nce_handler(struct ivpu_device *vdev) +{ + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); +} + +static void irq_wdt_mss_handler(struct ivpu_device *vdev) +{ + ivpu_hw_ip_wdt_disable(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); +} + +static void irq_noc_firewall_handler(struct ivpu_device *vdev) +{ + atomic_inc(&vdev->hw->firewall_irq_counter); + + ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n", + atomic_read(&vdev->hw->firewall_irq_counter)); +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_37XX; + + if (!status) + return false; + + REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +/* Handler for IRQs from NPU core */ +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK_40XX; + + if (!status) + return false; + + REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + irq_noc_firewall_handler(vdev); + + return true; +} + +static void db_set_37xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +static void db_set_40xx(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = VPU_40XX_CPU_SS_DOORBELL_1 - VPU_40XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_40XX_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(VPU_40XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + db_set_37xx(vdev, db_id); + else + db_set_40xx(vdev, db_id); +} + +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + return REGV_RD32(VPU_37XX_HOST_SS_TIM_IPC_FIFO_ATM); + else + return REGV_RD32(VPU_40XX_HOST_SS_TIM_IPC_FIFO_ATM); +} + +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) + REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); + else + REGV_WR32(VPU_40XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); +} diff --git a/drivers/accel/ivpu/ivpu_hw_ip.h b/drivers/accel/ivpu/ivpu_hw_ip.h new file mode 100644 index 000000000000..5b1b391aa577 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_ip.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_HW_IP_H__ +#define __IVPU_HW_IP_H__ + +#include "ivpu_drv.h" + +int ivpu_hw_ip_host_ss_configure(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev); +int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_host_ss_axi_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_top_noc_enable(struct ivpu_device *vdev); +u64 ivpu_hw_ip_read_perf_timer_counter(struct ivpu_device *vdev); +void ivpu_hw_ip_snoop_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_tbu_mmu_enable(struct ivpu_device *vdev); +int ivpu_hw_ip_soc_cpu_boot(struct ivpu_device *vdev); +void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +u32 ivpu_hw_ip_ipc_rx_count_get(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_clear(struct ivpu_device *vdev); +bool ivpu_hw_ip_irq_handler_37xx(struct ivpu_device *vdev, int irq); +bool ivpu_hw_ip_irq_handler_40xx(struct ivpu_device *vdev, int irq); +void ivpu_hw_ip_db_set(struct ivpu_device *vdev, u32 db_id); +u32 ivpu_hw_ip_ipc_rx_addr_get(struct ivpu_device *vdev); +void ivpu_hw_ip_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr); +void ivpu_hw_ip_irq_enable(struct ivpu_device *vdev); +void ivpu_hw_ip_irq_disable(struct ivpu_device *vdev); +void ivpu_hw_ip_diagnose_failure(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_enable_50xx(struct ivpu_device *vdev); +void ivpu_hw_ip_fabric_req_override_disable_50xx(struct ivpu_device *vdev); + +#endif /* __IVPU_HW_IP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index fa66c39b57ec..01ebf88fe6ef 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/genalloc.h> @@ -15,6 +15,7 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" #define IPC_MAX_RX_MSG 128 @@ -58,8 +59,8 @@ static void ivpu_ipc_mem_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - ivpu_bo_free_internal(ipc->mem_rx); - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_rx); + ivpu_bo_free(ipc->mem_tx); } static int @@ -129,7 +130,7 @@ static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr) static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) { - ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); + ivpu_hw_ipc_tx_set(vdev, vpu_addr); } static void @@ -210,8 +211,7 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); } -static int -ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) { struct ivpu_ipc_info *ipc = vdev->ipc; int ret; @@ -228,6 +228,7 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v goto unlock; ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + trace_jsm("[tx]", req); unlock: mutex_unlock(&ipc->lock); @@ -279,12 +280,13 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { - ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } if (jsm_msg) memcpy(jsm_msg, rx_msg->jsm_msg, size); + trace_jsm("[rx]", rx_msg->jsm_msg); } ivpu_ipc_rx_msg_del(vdev, rx_msg); @@ -292,15 +294,16 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, return ret; } -static int +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms) { struct ivpu_ipc_consumer cons; int ret; + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ret = ivpu_ipc_send(vdev, &cons, req); @@ -326,19 +329,21 @@ consumer_del: return ret; } -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; int ret, hb_ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) - return ret; + goto rpm_put; hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, @@ -346,21 +351,33 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r if (hb_ret == -ETIMEDOUT) ivpu_pm_trigger_recovery(vdev, "IPC timeout"); +rpm_put: + ivpu_rpm_put(vdev); return ret; } -int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms) { + struct ivpu_ipc_consumer cons; int ret; ret = ivpu_rpm_get(vdev); if (ret < 0) return ret; - ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); + + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + msleep(timeout_ms); +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); ivpu_rpm_put(vdev); return ret; } @@ -378,7 +395,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons return false; } -void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) +void ivpu_ipc_irq_handler(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons; @@ -392,8 +409,8 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt. * Without purge IPC FIFO to 0 next IPC interrupts won't be generated. */ - while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { - vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); + while (ivpu_hw_ipc_rx_count_get(vdev)) { + vpu_addr = ivpu_hw_ipc_rx_addr_get(vdev); if (vpu_addr == REG_IO_ERROR) { ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n"); return; @@ -442,11 +459,12 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) } } - if (wake_thread) - *wake_thread = !list_empty(&ipc->cb_msg_list); + if (!list_empty(&ipc->cb_msg_list)) + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_IPC)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); } -irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) +void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg, *r; @@ -462,8 +480,6 @@ irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); ivpu_ipc_rx_msg_del(vdev, rx_msg); } - - return IRQ_HANDLED; } int ivpu_ipc_init(struct ivpu_device *vdev) @@ -471,13 +487,13 @@ int ivpu_ipc_init(struct ivpu_device *vdev) struct ivpu_ipc_info *ipc = vdev->ipc; int ret; - ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + ipc->mem_tx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!ipc->mem_tx) { ivpu_err(vdev, "Failed to allocate mem_tx\n"); return -ENOMEM; } - ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + ipc->mem_rx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!ipc->mem_rx) { ivpu_err(vdev, "Failed to allocate mem_rx\n"); ret = -ENOMEM; @@ -501,14 +517,18 @@ int ivpu_ipc_init(struct ivpu_device *vdev) spin_lock_init(&ipc->cons_lock); INIT_LIST_HEAD(&ipc->cons_list); INIT_LIST_HEAD(&ipc->cb_msg_list); - drmm_mutex_init(&vdev->drm, &ipc->lock); + ret = drmm_mutex_init(&vdev->drm, &ipc->lock); + if (ret) { + ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret); + goto err_free_rx; + } ivpu_ipc_reset(vdev); return 0; err_free_rx: - ivpu_bo_free_internal(ipc->mem_rx); + ivpu_bo_free(ipc->mem_rx); err_free_tx: - ivpu_bo_free_internal(ipc->mem_tx); + ivpu_bo_free(ipc->mem_tx); return ret; } @@ -516,7 +536,6 @@ void ivpu_ipc_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - drm_WARN_ON(&vdev->drm, ipc->on); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 40ca3cc4e61f..b4dfb504679b 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_IPC_H__ @@ -89,22 +89,25 @@ void ivpu_ipc_enable(struct ivpu_device *vdev); void ivpu_ipc_disable(struct ivpu_device *vdev); void ivpu_ipc_reset(struct ivpu_device *vdev); -void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread); -irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel, ivpu_ipc_rx_callback_t callback); void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); +int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct vpu_jsm_msg *req); int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms); - -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index e70cfb859339..7149312f16e1 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <drm/drm_file.h> @@ -12,50 +12,106 @@ #include <uapi/drm/ivpu_accel.h> #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" #define CMD_BUF_IDX 0 -#define JOB_ID_JOB_MASK GENMASK(7, 0) -#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) { - ivpu_hw_reg_db_set(vdev, cmdq->db_id); + ivpu_hw_db_set(vdev, cmdq->db_id); } -static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE); + u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE); + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW || + ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE) + return 0; + + cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, + primary_size, DRM_IVPU_BO_WC); + if (!cmdq->primary_preempt_buf) { + ivpu_err(vdev, "Failed to create primary preemption buffer\n"); + return -ENOMEM; + } + + cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, + secondary_size, DRM_IVPU_BO_WC); + if (!cmdq->secondary_preempt_buf) { + ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); + goto err_free_primary; + } + + return 0; + +err_free_primary: + ivpu_bo_free(cmdq->primary_preempt_buf); + cmdq->primary_preempt_buf = NULL; + return -ENOMEM; +} + +static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, + struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + return; + + if (cmdq->primary_preempt_buf) + ivpu_bo_free(cmdq->primary_preempt_buf); + if (cmdq->secondary_preempt_buf) + ivpu_bo_free(cmdq->secondary_preempt_buf); +} + +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; - struct vpu_job_queue_header *jobq_header; struct ivpu_cmdq *cmdq; + int ret; cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); if (!cmdq) return NULL; - cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); - if (!cmdq->mem) - goto cmdq_free; + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); + goto err_free_cmdq; + } - cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); - cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / - sizeof(struct vpu_job_queue_entry)); + ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, + &file_priv->cmdq_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret); + goto err_erase_db_xa; + } - cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); - jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = engine; - jobq_header->head = 0; - jobq_header->tail = 0; - wmb(); /* Flush WC buffer for jobq->header */ + cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); + if (!cmdq->mem) + goto err_erase_cmdq_xa; + + ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); + if (ret) + ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n"); return cmdq; -cmdq_free: +err_erase_cmdq_xa: + xa_erase(&file_priv->cmdq_xa, cmdq->id); +err_erase_db_xa: + xa_erase(&vdev->db_xa, cmdq->db_id); +err_free_cmdq: kfree(cmdq); return NULL; } @@ -65,91 +121,173 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c if (!cmdq) return; - ivpu_bo_free_internal(cmdq->mem); + ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); + ivpu_bo_free(cmdq->mem); + xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); kfree(cmdq); } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, + u8 priority) +{ + struct ivpu_device *vdev = file_priv->vdev; + int ret; + + ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, + task_pid_nr(current), engine, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + if (ret) + return ret; + + ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, + priority); + if (ret) + return ret; + + return 0; +} + +static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { struct ivpu_device *vdev = file_priv->vdev; - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + int ret; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + else + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n", + cmdq->db_id, cmdq->id, file_priv->ctx.id); + + return ret; +} + +static int +ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct vpu_job_queue_header *jobq_header; int ret; lockdep_assert_held(&file_priv->lock); - if (!cmdq) { - cmdq = ivpu_cmdq_alloc(file_priv, engine); - if (!cmdq) - return NULL; - file_priv->cmdq[engine] = cmdq; + if (cmdq->db_registered) + return 0; + + cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / + sizeof(struct vpu_job_queue_entry)); + + cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); + jobq_header = &cmdq->jobq->header; + jobq_header->engine_idx = VPU_ENGINE_COMPUTE; + jobq_header->head = 0; + jobq_header->tail = 0; + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { + ivpu_dbg(vdev, JOB, "Turbo mode enabled"); + jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; } - if (cmdq->db_registered) - return cmdq; + wmb(); /* Flush WC buffer for jobq->header */ - ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, - cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority); + if (ret) + return ret; + } + + ret = ivpu_register_db(file_priv, cmdq); if (ret) - return NULL; + return ret; cmdq->db_registered = true; - return cmdq; + return 0; } -static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_device *vdev = file_priv->vdev; + int ret; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - file_priv->cmdq[engine] = NULL; - if (cmdq->db_registered) - ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + if (!cmdq->db_registered) + return 0; - ivpu_cmdq_free(file_priv, cmdq); + cmdq->db_registered = false; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); + if (!ret) + ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id); } + + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + + return 0; } -void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority) { - int i; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + int ret; lockdep_assert_held(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_release_locked(file_priv, i); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + if (cmdq->priority == priority) + break; + + if (!cmdq) { + cmdq = ivpu_cmdq_alloc(file_priv); + if (!cmdq) + return NULL; + cmdq->priority = priority; + } + + ret = ivpu_cmdq_init(file_priv, cmdq, priority); + if (ret) + return NULL; + + return cmdq; } -/* - * Mark the doorbell as unregistered and reset job queue pointers. - * This function needs to be called when the VPU hardware is restarted - * and FW loses job queue state. The next time job queue is used it - * will be registered again. - */ -static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) { - struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - cmdq->db_registered = false; - cmdq->jobq->header.head = 0; - cmdq->jobq->header.tail = 0; - wmb(); /* Flush WC buffer for jobq header */ + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { + xa_erase(&file_priv->cmdq_xa, cmdq_id); + ivpu_cmdq_fini(file_priv, cmdq); + ivpu_cmdq_free(file_priv, cmdq); } } -static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +/* + * Mark the doorbell as unregistered + * This function needs to be called when the VPU hardware is restarted + * and FW loses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) { - int i; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; mutex_lock(&file_priv->lock); - for (i = 0; i < IVPU_NUM_ENGINES; i++) - ivpu_cmdq_reset_locked(file_priv, i); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + cmdq->db_registered = false; mutex_unlock(&file_priv->lock); } @@ -162,10 +300,30 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) mutex_lock(&vdev->context_list_lock); xa_for_each(&vdev->context_xa, ctx_id, file_priv) - ivpu_cmdq_reset_all(file_priv); + ivpu_cmdq_reset(file_priv); mutex_unlock(&vdev->context_list_lock); +} + +static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv) +{ + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_fini(file_priv, cmdq); +} + +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) +{ + struct ivpu_device *vdev = file_priv->vdev; + + lockdep_assert_held(&file_priv->lock); + + ivpu_cmdq_fini_all(file_priv); + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) + ivpu_jsm_context_release(vdev, file_priv->ctx.id); } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) @@ -178,17 +336,31 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) /* Check if there is space left in job queue */ if (next_entry == header->head) { - ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", - job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", + job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); return -EBUSY; } - entry = &cmdq->jobq->job[tail]; + entry = &cmdq->jobq->slot[tail].job; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; + + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + if (cmdq->primary_preempt_buf) { + entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; + entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); + } + + if (cmdq->secondary_preempt_buf) { + entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; + entry->secondary_preempt_buf_size = + ivpu_bo_size(cmdq->secondary_preempt_buf); + } + } + wmb(); /* Ensure that tail is updated after filling entry */ header->tail = next_entry; wmb(); /* Flush WC buffer for jobq header */ @@ -277,6 +449,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); + trace_job("create", job); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); return job; @@ -285,11 +458,28 @@ err_free_job: return NULL; } +static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id) +{ + struct ivpu_job *job; + + xa_lock(&vdev->submitted_jobs_xa); + job = __xa_erase(&vdev->submitted_jobs_xa, job_id); + + if (xa_empty(&vdev->submitted_jobs_xa) && job) { + vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), + vdev->busy_time); + } + + xa_unlock(&vdev->submitted_jobs_xa); + + return job; +} + static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) { struct ivpu_job *job; - job = xa_erase(&vdev->submitted_jobs_xa, job_id); + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); if (!job) return -ENOENT; @@ -299,6 +489,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job->bos[CMD_BUF_IDX]->job_status = job_status; dma_fence_signal(job->done_fence); + trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); @@ -318,12 +509,12 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); } -static int ivpu_job_submit(struct ivpu_job *job) +static int ivpu_job_submit(struct ivpu_job *job, u8 priority) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; - struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; + bool is_first_job; int ret; ret = ivpu_rpm_get(vdev); @@ -332,20 +523,19 @@ static int ivpu_job_submit(struct ivpu_job *job) mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + cmdq = ivpu_cmdq_acquire(file_priv, priority); if (!cmdq) { - ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n", - file_priv->ctx.id, job->engine_idx); + ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", + file_priv->ctx.id, job->engine_idx, priority); ret = -EINVAL; goto err_unlock_file_priv; } - job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - xa_lock(&vdev->submitted_jobs_xa); - ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); - if (ret) { + is_first_job = xa_empty(&vdev->submitted_jobs_xa); + ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; @@ -363,10 +553,13 @@ static int ivpu_job_submit(struct ivpu_job *job) wmb(); /* Flush WC buffer for jobq header */ } else { ivpu_cmdq_ring_db(vdev, cmdq); + if (is_first_job) + vdev->busy_start_ts = ktime_get(); } - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, + trace_job("submit", job); + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); xa_unlock(&vdev->submitted_jobs_xa); @@ -454,6 +647,14 @@ unlock_reservations: return ret; } +static inline u8 ivpu_job_to_hws_priority(struct ivpu_file_priv *file_priv, u8 priority) +{ + if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) + return DRM_IVPU_JOB_PRIORITY_NORMAL; + + return priority - 1; +} + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -462,8 +663,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct ivpu_job *job; u32 *buf_handles; int idx, ret; + u8 priority; - if (params->engine > DRM_IVPU_ENGINE_COPY) + if (params->engine != DRM_IVPU_ENGINE_COMPUTE) return -EINVAL; if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) @@ -515,8 +717,10 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_destroy_job; } + priority = ivpu_job_to_hws_priority(file_priv, params->priority); + down_read(&vdev->pm->reset_lock); - ret = ivpu_job_submit(job); + ret = ivpu_job_submit(job, priority); up_read(&vdev->pm->reset_lock); if (ret) goto err_signal_fence; diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index ca4984071cc7..8b19e3f8b4cf 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_JOB_H__ @@ -24,10 +24,14 @@ struct ivpu_file_priv; */ struct ivpu_cmdq { struct vpu_job_queue *jobq; + struct ivpu_bo *primary_preempt_buf; + struct ivpu_bo *secondary_preempt_buf; struct ivpu_bo *mem; u32 entry_count; + u32 id; u32 db_id; bool db_registered; + u8 priority; }; /** @@ -55,6 +59,8 @@ struct ivpu_job { int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv); + void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 8cea0dd731b9..30a40be76930 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include "ivpu_drv.h" @@ -48,9 +48,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); - IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); @@ -103,14 +104,10 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_err_ratelimited(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) @@ -123,14 +120,10 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret) { - ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); - return ret; - } - - ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %u: %d\n", db_id, ret); - return 0; + return ret; } int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) @@ -139,7 +132,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.query_engine_hb.engine_idx = engine; @@ -162,7 +155,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_reset.engine_idx = engine; @@ -181,7 +174,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_preempt.engine_idx = engine; @@ -204,7 +197,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN); ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm); if (ret) ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); @@ -255,11 +248,16 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid) { struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE }; struct vpu_jsm_msg resp; + int ret; req.payload.ssid_release.host_ssid = host_ssid; - return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to release context: %d\n", ret); + + return ret; } int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) @@ -273,11 +271,294 @@ int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) req.payload.pwr_d0i3_enter.send_response = 1; - ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, - &resp, VPU_IPC_CHAN_GEN_CMD, - vdev->timeout.d0i3_entry_msg); + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp, + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg); if (ret) return ret; return ivpu_hw_wait_for_idle(vdev); } + +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_CREATE_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_create_cmdq.host_ssid = ctx_id; + req.payload.hws_create_cmdq.process_id = pid; + req.payload.hws_create_cmdq.engine_idx = engine; + req.payload.hws_create_cmdq.cmdq_group = cmdq_group; + req.payload.hws_create_cmdq.cmdq_id = cmdq_id; + req.payload.hws_create_cmdq.cmdq_base = cmdq_base; + req.payload.hws_create_cmdq.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to create command queue: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DESTROY_CMD_QUEUE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_destroy_cmdq.host_ssid = ctx_id; + req.payload.hws_destroy_cmdq.cmdq_id = cmdq_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to destroy command queue: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_REGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.hws_register_db.db_id = db_id; + req.payload.hws_register_db.host_ssid = ctx_id; + req.payload.hws_register_db.cmdq_id = cmdq_id; + req.payload.hws_register_db.cmdq_base = cmdq_base; + req.payload.hws_register_db.cmdq_size = cmdq_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to register doorbell %u: %d\n", db_id, ret); + + return ret; +} + +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_ENGINE_RESUME }; + struct vpu_jsm_msg resp; + int ret; + + if (engine != VPU_ENGINE_COMPUTE) + return -EINVAL; + + req.payload.hws_resume_engine.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err_ratelimited(vdev, "Failed to resume engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_context_sched_properties.host_ssid = ctx_id; + req.payload.hws_set_context_sched_properties.cmdq_id = cmdq_id; + req.payload.hws_set_context_sched_properties.priority_band = priority; + req.payload.hws_set_context_sched_properties.realtime_priority_level = 0; + req.payload.hws_set_context_sched_properties.in_process_priority = 0; + req.payload.hws_set_context_sched_properties.context_quantum = 20000; + req.payload.hws_set_context_sched_properties.grace_period_same_priority = 10000; + req.payload.hws_set_context_sched_properties.grace_period_lower_priority = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set context sched properties: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.hws_set_scheduling_log.engine_idx = engine_idx; + req.payload.hws_set_scheduling_log.host_ssid = host_ssid; + req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va; + req.payload.hws_set_scheduling_log.notify_index = 0; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set scheduling log: %d\n", ret); + + return ret; +} + +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP }; + struct vpu_jsm_msg resp; + int ret; + + /* Idle */ + req.payload.hws_priority_band_setup.grace_period[0] = 0; + req.payload.hws_priority_band_setup.process_grace_period[0] = 50000; + req.payload.hws_priority_band_setup.process_quantum[0] = 160000; + /* Normal */ + req.payload.hws_priority_band_setup.grace_period[1] = 50000; + req.payload.hws_priority_band_setup.process_grace_period[1] = 50000; + req.payload.hws_priority_band_setup.process_quantum[1] = 300000; + /* Focus */ + req.payload.hws_priority_band_setup.grace_period[2] = 50000; + req.payload.hws_priority_band_setup.process_grace_period[2] = 50000; + req.payload.hws_priority_band_setup.process_quantum[2] = 200000; + /* Realtime */ + req.payload.hws_priority_band_setup.grace_period[3] = 0; + req.payload.hws_priority_band_setup.process_grace_period[3] = 50000; + req.payload.hws_priority_band_setup.process_quantum[3] = 200000; + + req.payload.hws_priority_band_setup.normal_band_percentage = 10; + + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, + &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.sampling_rate = sampling_rate; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret); + return ret; + } + + return ret; +} + +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_update.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_update.buffer_addr = buffer_addr; + req.payload.metric_streamer_update.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret); + return ret; + } + + if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) { + ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n", + resp.payload.metric_streamer_done.bytes_written, buffer_size); + return -EOVERFLOW; + } + + *bytes_written = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret); + return ret; + } + + if (!resp.payload.metric_streamer_done.sample_size) { + ivpu_warn_ratelimited(vdev, "Invalid sample size\n"); + return -EBADMSG; + } + + if (sample_size) + *sample_size = resp.payload.metric_streamer_done.sample_size; + if (info_size) + *info_size = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_ENABLE }; + struct vpu_jsm_msg resp; + + req.payload.pwr_dct_control.dct_active_us = active_us; + req.payload.pwr_dct_control.dct_inactive_us = inactive_us; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_dct_disable(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE }; + struct vpu_jsm_msg resp; + + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_state_dump(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; + + return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.state_dump_msg); +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index ae75e5dbcc41..9e84d3526a14 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_JSM_MSG_H__ @@ -23,4 +23,26 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra u64 trace_hw_component_mask); int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid); int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev); +int ivpu_jsm_hws_create_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_group, u32 cmdq_id, + u32 pid, u32 engine, u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_destroy_cmdq(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id); +int ivpu_jsm_hws_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, u32 db_id, + u64 cmdq_base, u32 cmdq_size); +int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine); +int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id, + u32 priority); +int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, + u64 vpu_log_buffer_va); +int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev); +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size); +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask); +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written); +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size); +int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); +int ivpu_jsm_dct_disable(struct ivpu_device *vdev); +int ivpu_jsm_state_dump(struct ivpu_device *vdev); + #endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 91bd640655ab..26ef52fbb93e 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/circ_buf.h> @@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd) case IVPU_MMU_EVT_F_VMS_FETCH: return "Fetch of VMS caused external abort"; default: - return "Unknown CMDQ command"; + return "Unknown event"; } } @@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err) { switch (err) { case IVPU_MMU_CERROR_NONE: - return "No CMDQ Error"; + return "No error"; case IVPU_MMU_CERROR_ILL: return "Illegal command"; case IVPU_MMU_CERROR_ABT: - return "External abort on CMDQ read"; + return "External abort on command queue read"; case IVPU_MMU_CERROR_ATC_INV_SYNC: return "Sync failed to complete ATS invalidation"; default: - return "Unknown CMDQ Error"; + return "Unknown error"; } } @@ -519,7 +519,8 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) if (ret) return ret; - clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod); ret = ivpu_mmu_cmdq_wait_for_cons(vdev); @@ -567,7 +568,8 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) int ret; memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); - clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); mmu->cmdq.prod = 0; mmu->cmdq.cons = 0; @@ -661,7 +663,8 @@ static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) WRITE_ONCE(entry[1], str[1]); WRITE_ONCE(entry[0], str[0]); - clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); } @@ -693,7 +696,7 @@ unlock: return ret; } -static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid) { struct ivpu_mmu_info *mmu = vdev->mmu; struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; @@ -705,40 +708,40 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) return -EINVAL; entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); - - if (cd_dma != 0) { - cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | - IVPU_MMU_CD_0_TCR_EPD1 | - IVPU_MMU_CD_0_AA64 | - IVPU_MMU_CD_0_R | - IVPU_MMU_CD_0_ASET | - IVPU_MMU_CD_0_V; - cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; - cd[2] = 0; - cd[3] = 0x0000000000007444; - - /* For global context generate memory fault on VPU */ - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - cd[0] |= IVPU_MMU_CD_0_A; - } else { - memset(cd, 0, sizeof(cd)); - } + drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid); + + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global context generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + + if (valid) + cd[0] |= IVPU_MMU_CD_0_V; WRITE_ONCE(entry[1], cd[1]); WRITE_ONCE(entry[2], cd[2]); WRITE_ONCE(entry[3], cd[3]); WRITE_ONCE(entry[0], cd[0]); - clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + if (!ivpu_is_force_snoop_enabled(vdev)) + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); - ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); mutex_lock(&mmu->lock); if (!mmu->on) @@ -746,38 +749,18 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); if (ret) - goto unlock; + goto err_invalidate; ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err_invalidate; unlock: mutex_unlock(&mmu->lock); - return ret; -} - -static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); - if (ret) - ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); - - return ret; -} - -static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) -{ - int ret; - - if (ssid == 0) { - ivpu_err(vdev, "Invalid SSID: %u\n", ssid); - return -EINVAL; - } - - ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); - if (ret) - ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + return 0; +err_invalidate: + WRITE_ONCE(entry[0], 0); + mutex_unlock(&mmu->lock); return ret; } @@ -804,12 +787,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev) return ret; } - ret = ivpu_mmu_cd_add_gbl(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); - return ret; - } - ret = ivpu_mmu_enable(vdev); if (ret) { ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); @@ -874,8 +851,9 @@ static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) u64 in_addr = ((u64)event[5]) << 32 | event[4]; u32 sid = event[1]; - ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", - op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); + ivpu_err_ratelimited(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", + op, ivpu_mmu_event_to_str(op), ssid, sid, + event[2], event[3], in_addr, fetch_addr); } static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) @@ -911,6 +889,9 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_mmu_user_context_mark_invalid(vdev, ssid); REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); } + + if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ)) + ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); } void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) @@ -958,12 +939,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val); } -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) { - return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); + return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true); } -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid) { - ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ + ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false); } diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index 6fa35c240710..7afea9cd8731 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -40,8 +40,8 @@ struct ivpu_mmu_info { int ivpu_mmu_init(struct ivpu_device *vdev); void ivpu_mmu_disable(struct ivpu_device *vdev); int ivpu_mmu_enable(struct ivpu_device *vdev); -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid); int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index fe6161299236..0af614dfb6f9 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -6,6 +6,7 @@ #include <linux/bitfield.h> #include <linux/highmem.h> #include <linux/set_memory.h> +#include <linux/vmalloc.h> #include <drm/drm_cache.h> @@ -23,6 +24,7 @@ #define IVPU_MMU_ENTRY_FLAG_CONT BIT(52) #define IVPU_MMU_ENTRY_FLAG_NG BIT(11) #define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_RO BIT(7) #define IVPU_MMU_ENTRY_FLAG_USER BIT(6) #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) @@ -88,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_ } } -static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) -{ - dma_addr_t pgd_dma; - - pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); - if (!pgtable->pgd_dma_ptr) - return -ENOMEM; - - pgtable->pgd_dma = pgd_dma; - - return 0; -} - static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { int pgd_idx, pud_idx, pmd_idx; @@ -138,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt } ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); + pgtable->pgd_dma_ptr = NULL; + pgtable->pgd_dma = 0; +} + +static u64* +ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr; + dma_addr_t pgd_dma; + + if (pgd_dma_ptr) + return pgd_dma_ptr; + + pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); + if (!pgd_dma_ptr) + return NULL; + + pgtable->pgd_dma_ptr = pgd_dma_ptr; + pgtable->pgd_dma = pgd_dma; + + return pgd_dma_ptr; } static u64* @@ -235,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID); + + /* Allocate PGD - first level page table if needed */ + if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable)) + return -ENOMEM; + /* Allocate PUD - second level page table if needed */ if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx)) return -ENOMEM; @@ -318,6 +334,91 @@ ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ct return 0; } +static void ivpu_mmu_context_set_page_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] |= IVPU_MMU_ENTRY_FLAG_RO; +} + +static void ivpu_mmu_context_split_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] &= ~IVPU_MMU_ENTRY_FLAG_CONT; +} + +static void ivpu_mmu_context_split_64k_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr) +{ + u64 start = ALIGN_DOWN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 end = ALIGN(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE); + u64 offset = 0; + + ivpu_dbg(vdev, MMU_MAP, "Split 64K page ctx: %u vpu_addr: 0x%llx\n", ctx->id, vpu_addr); + + while (start + offset < end) { + ivpu_mmu_context_split_page(vdev, ctx, start + offset); + offset += IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + size_t size) +{ + u64 end = vpu_addr + size; + size_t size_left = size; + int ret; + + if (size == 0) + return 0; + + if (drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr | size, IVPU_MMU_PAGE_SIZE))) + return -EINVAL; + + mutex_lock(&ctx->lock); + + ivpu_dbg(vdev, MMU_MAP, "Set read-only pages ctx: %u vpu_addr: 0x%llx size: %lu\n", + ctx->id, vpu_addr, size); + + if (!ivpu_disable_mmu_cont_pages) { + /* Split 64K contiguous page at the beginning if needed */ + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr); + + /* Split 64K contiguous page at the end if needed */ + if (!IS_ALIGNED(vpu_addr + size, IVPU_MMU_CONT_PAGES_SIZE)) + ivpu_mmu_context_split_64k_page(vdev, ctx, vpu_addr + size); + } + + while (size_left) { + if (vpu_addr < end) + ivpu_mmu_context_set_page_ro(vdev, ctx, vpu_addr); + + vpu_addr += IVPU_MMU_PAGE_SIZE; + size_left -= IVPU_MMU_PAGE_SIZE; + } + + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); + + mutex_unlock(&ctx->lock); + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + + return 0; +} + static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) { while (size) { @@ -331,6 +432,7 @@ int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) { + size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; int ret; u64 prot; @@ -361,20 +463,36 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); - mutex_unlock(&ctx->lock); - return ret; + goto err_unmap_pages; } vpu_addr += size; } + if (!ctx->is_cd_valid) { + ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n", + ctx->id, ret); + goto err_unmap_pages; + } + ctx->is_cd_valid = true; + } + /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); - mutex_unlock(&ctx->lock); - ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + goto err_unmap_pages; + } + + mutex_unlock(&ctx->lock); + return 0; + +err_unmap_pages: + ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr); + mutex_unlock(&ctx->lock); return ret; } @@ -443,65 +561,79 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n mutex_unlock(&ctx->lock); } -static int -ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) { u64 start, end; - int ret; mutex_init(&ctx->lock); - ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret); - return ret; - } - if (!context_id) { start = vdev->hw->ranges.global.start; end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user.start; - end = vdev->hw->ranges.dma.end; + start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start); + end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end); } drm_mm_init(&ctx->mm, start, end - start); ctx->id = context_id; - - return 0; } -static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr)) - return; + if (ctx->is_cd_valid) { + ivpu_mmu_cd_clear(vdev, ctx->id); + ctx->is_cd_valid = false; + } mutex_destroy(&ctx->lock); ivpu_mmu_pgtables_free(vdev, &ctx->pgtable); drm_mm_takedown(&ctx->mm); - - ctx->pgtable.pgd_dma_ptr = NULL; - ctx->pgtable.pgd_dma = 0; } -int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +void ivpu_mmu_global_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); + ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); } void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_context_fini(vdev, &vdev->gctx); + ivpu_mmu_context_fini(vdev, &vdev->gctx); } int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + int ret; + + ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + + mutex_lock(&vdev->rctx.lock); + + if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) { + ivpu_err(vdev, "Failed to allocate root page table for reserved context\n"); + ret = -ENOMEM; + goto err_ctx_fini; + } + + ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for reserved context\n"); + goto err_ctx_fini; + } + + mutex_unlock(&vdev->rctx.lock); + return ret; + +err_ctx_fini: + mutex_unlock(&vdev->rctx.lock); + ivpu_mmu_context_fini(vdev, &vdev->rctx); + return ret; } void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_fini(vdev, &vdev->rctx); + ivpu_mmu_cd_clear(vdev, vdev->rctx.id); + ivpu_mmu_context_fini(vdev, &vdev->rctx); } void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) @@ -516,36 +648,3 @@ void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) xa_unlock(&vdev->context_xa); } - -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) -{ - int ret; - - drm_WARN_ON(&vdev->drm, !ctx_id); - - ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); - if (ret) { - ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret); - return ret; - } - - ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret); - goto err_context_fini; - } - - return 0; - -err_context_fini: - ivpu_mmu_context_fini(vdev, ctx); - return ret; -} - -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) -{ - drm_WARN_ON(&vdev->drm, !ctx->id); - - ivpu_mmu_clear_pgtable(vdev, ctx->id); - ivpu_mmu_context_fini(vdev, ctx); -} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 535db3a1fc74..8042fc067062 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -23,19 +23,20 @@ struct ivpu_mmu_pgtable { }; struct ivpu_mmu_context { - struct mutex lock; /* Protects: mm, pgtable */ + struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */ struct drm_mm mm; struct ivpu_mmu_pgtable pgtable; + bool is_cd_valid; u32 id; }; -int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id); +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_global_context_init(struct ivpu_device *vdev); void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, @@ -46,5 +47,7 @@ int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context * u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt); +int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, size_t size); #endif /* __IVPU_MMU_CONTEXT_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c new file mode 100644 index 000000000000..ffe7b10f8a76 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <drm/drm_file.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ms.h" +#include "ivpu_pm.h" + +#define MS_INFO_BUFFER_SIZE SZ_64K +#define MS_NUM_BUFFERS 2 +#define MS_READ_PERIOD_MULTIPLIER 2 +#define MS_MIN_SAMPLE_PERIOD_NS 1000000 + +static struct ivpu_ms_instance * +get_instance_by_mask(struct ivpu_file_priv *file_priv, u64 metric_mask) +{ + struct ivpu_ms_instance *ms; + + lockdep_assert_held(&file_priv->ms_lock); + + list_for_each_entry(ms, &file_priv->ms_instance_list, ms_instance_node) + if (ms->mask == metric_mask) + return ms; + + return NULL; +} + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_start *args = data; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 single_buff_size; + u32 sample_size; + int ret; + + if (!args->metric_group_mask || !args->read_period_samples || + args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + if (get_instance_by_mask(file_priv, args->metric_group_mask)) { + ivpu_err(vdev, "Instance already exists (mask %#llx)\n", args->metric_group_mask); + ret = -EALREADY; + goto unlock; + } + + ms = kzalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) { + ret = -ENOMEM; + goto unlock; + } + + ms->mask = args->metric_group_mask; + + ret = ivpu_jsm_metric_streamer_info(vdev, ms->mask, 0, 0, &sample_size, NULL); + if (ret) + goto err_free_ms; + + single_buff_size = sample_size * + ((u64)args->read_period_samples * MS_READ_PERIOD_MULTIPLIER); + ms->bo = ivpu_bo_create_global(vdev, PAGE_ALIGN(single_buff_size * MS_NUM_BUFFERS), + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!ms->bo) { + ivpu_err(vdev, "Failed to allocate MS buffer (size %llu)\n", single_buff_size); + ret = -ENOMEM; + goto err_free_ms; + } + + ms->buff_size = ivpu_bo_size(ms->bo) / MS_NUM_BUFFERS; + ms->active_buff_vpu_addr = ms->bo->vpu_addr; + ms->inactive_buff_vpu_addr = ms->bo->vpu_addr + ms->buff_size; + ms->active_buff_ptr = ivpu_bo_vaddr(ms->bo); + ms->inactive_buff_ptr = ivpu_bo_vaddr(ms->bo) + ms->buff_size; + + ret = ivpu_jsm_metric_streamer_start(vdev, ms->mask, args->sampling_period_ns, + ms->active_buff_vpu_addr, ms->buff_size); + if (ret) + goto err_free_bo; + + args->sample_size = sample_size; + args->max_data_size = ivpu_bo_size(ms->bo); + list_add_tail(&ms->ms_instance_node, &file_priv->ms_instance_list); + goto unlock; + +err_free_bo: + ivpu_bo_free(ms->bo); +err_free_ms: + kfree(ms); +unlock: + mutex_unlock(&file_priv->ms_lock); + return ret; +} + +static int +copy_leftover_bytes(struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 copy_bytes; + + if (ms->leftover_bytes) { + copy_bytes = min(user_size - *user_bytes_copied, ms->leftover_bytes); + if (copy_to_user(user_ptr + *user_bytes_copied, ms->leftover_addr, copy_bytes)) + return -EFAULT; + + ms->leftover_bytes -= copy_bytes; + ms->leftover_addr += copy_bytes; + *user_bytes_copied += copy_bytes; + } + + return 0; +} + +static int +copy_samples_to_user(struct ivpu_device *vdev, struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 bytes_written; + int ret; + + *user_bytes_copied = 0; + + ret = copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); + if (ret) + return ret; + + if (*user_bytes_copied == user_size) + return 0; + + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, ms->inactive_buff_vpu_addr, + ms->buff_size, &bytes_written); + if (ret) + return ret; + + swap(ms->active_buff_vpu_addr, ms->inactive_buff_vpu_addr); + swap(ms->active_buff_ptr, ms->inactive_buff_ptr); + + ms->leftover_bytes = bytes_written; + ms->leftover_addr = ms->inactive_buff_ptr; + + return copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); +} + +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 bytes_written; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (!ms) { + ivpu_err(vdev, "Instance doesn't exist for mask: %#llx\n", args->metric_group_mask); + ret = -EINVAL; + goto unlock; + } + + if (!args->buffer_size) { + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, 0, 0, &bytes_written); + if (ret) + goto unlock; + args->data_size = bytes_written + ms->leftover_bytes; + goto unlock; + } + + if (!args->buffer_ptr) { + ret = -EINVAL; + goto unlock; + } + + ret = copy_samples_to_user(vdev, ms, u64_to_user_ptr(args->buffer_ptr), + args->buffer_size, &args->data_size); +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +static void free_instance(struct ivpu_file_priv *file_priv, struct ivpu_ms_instance *ms) +{ + lockdep_assert_held(&file_priv->ms_lock); + + list_del(&ms->ms_instance_node); + ivpu_jsm_metric_streamer_stop(file_priv->vdev, ms->mask); + ivpu_bo_free(ms->bo); + kfree(ms); +} + +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_ms_instance *ms; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (ms) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); + + return ms ? 0 : -EINVAL; +} + +static inline struct ivpu_bo *get_ms_info_bo(struct ivpu_file_priv *file_priv) +{ + lockdep_assert_held(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) + return file_priv->ms_info_bo; + + file_priv->ms_info_bo = ivpu_bo_create_global(file_priv->vdev, MS_INFO_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + return file_priv->ms_info_bo; +} + +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_bo *bo; + u64 info_size; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + if (!args->buffer_size) + return ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, + 0, 0, NULL, &args->data_size); + if (!args->buffer_ptr) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + bo = get_ms_info_bo(file_priv); + if (!bo) { + ret = -ENOMEM; + goto unlock; + } + + ret = ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, bo->vpu_addr, + ivpu_bo_size(bo), NULL, &info_size); + if (ret) + goto unlock; + + if (args->buffer_size < info_size) { + ret = -ENOSPC; + goto unlock; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer_ptr), ivpu_bo_vaddr(bo), info_size)) + ret = -EFAULT; + + args->data_size = info_size; +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) +{ + struct ivpu_ms_instance *ms, *tmp; + + mutex_lock(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) { + ivpu_bo_free(file_priv->ms_info_bo); + file_priv->ms_info_bo = NULL; + } + + list_for_each_entry_safe(ms, tmp, &file_priv->ms_instance_list, ms_instance_node) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); +} + +void ivpu_ms_cleanup_all(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + ivpu_ms_cleanup(file_priv); + + mutex_unlock(&vdev->context_list_lock); +} diff --git a/drivers/accel/ivpu/ivpu_ms.h b/drivers/accel/ivpu/ivpu_ms.h new file mode 100644 index 000000000000..fbd5ebebc3d9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ +#ifndef __IVPU_MS_H__ +#define __IVPU_MS_H__ + +#include <linux/list.h> + +struct drm_device; +struct drm_file; +struct ivpu_bo; +struct ivpu_device; +struct ivpu_file_priv; + +struct ivpu_ms_instance { + struct ivpu_bo *bo; + struct list_head ms_instance_node; + u64 mask; + u64 buff_size; + u64 active_buff_vpu_addr; + u64 inactive_buff_vpu_addr; + void *active_buff_ptr; + void *inactive_buff_ptr; + u64 leftover_bytes; + void *leftover_addr; +}; + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv); +void ivpu_ms_cleanup_all(struct ivpu_device *vdev); + +#endif /* __IVPU_MS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 5f73854234ba..5060c5dd40d1 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/highmem.h> @@ -9,20 +9,25 @@ #include <linux/pm_runtime.h> #include <linux/reboot.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_drv.h" -#include "ivpu_hw.h" #include "ivpu_fw.h" #include "ivpu_fw_log.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" static bool ivpu_disable_recovery; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); -MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); +MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected"); +#endif static unsigned long ivpu_tdr_timeout_ms; module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); @@ -36,6 +41,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_cmdq_reset_all_contexts(vdev); ivpu_ipc_reset(vdev); + ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; } @@ -58,14 +64,11 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; - /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ - pci_save_state(to_pci_dev(vdev->drm.dev)); + ivpu_prepare_for_reset(vdev); ret = ivpu_shutdown(vdev); if (ret) - ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret); return ret; } @@ -74,10 +77,10 @@ static int ivpu_resume(struct ivpu_device *vdev) { int ret; +retry: pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); pci_restore_state(to_pci_dev(vdev->drm.dev)); -retry: ret = ivpu_hw_power_up(vdev); if (ret) { ivpu_err(vdev, "Failed to power up HW: %d\n", ret); @@ -100,6 +103,7 @@ err_mmu_disable: ivpu_mmu_disable(vdev); err_power_down: ivpu_hw_power_down(vdev); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); if (!ivpu_fw_is_cold_boot(vdev)) { ivpu_pm_prepare_cold_boot(vdev); @@ -111,39 +115,57 @@ err_power_down: return ret; } -static void ivpu_pm_recovery_work(struct work_struct *work) +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) { - struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; - char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - - ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); - - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (ret) - ivpu_err(vdev, "Failed to resume VPU: %d\n", ret); - - ivpu_fw_log_dump(vdev); + pm_runtime_disable(vdev->drm.dev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; - ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } up_write(&vdev->pm->reset_lock); atomic_set(&vdev->pm->reset_pending, 0); - kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) @@ -195,6 +217,7 @@ int ivpu_pm_suspend_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); unsigned long timeout; + trace_pm("suspend"); ivpu_dbg(vdev, PM, "Suspend..\n"); timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); @@ -212,6 +235,7 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_pm_prepare_warm_boot(vdev); ivpu_dbg(vdev, PM, "Suspend done.\n"); + trace_pm("suspend done"); return 0; } @@ -222,6 +246,7 @@ int ivpu_pm_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("resume"); ivpu_dbg(vdev, PM, "Resume..\n"); ret = ivpu_resume(vdev); @@ -229,6 +254,7 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to resume: %d\n", ret); ivpu_dbg(vdev, PM, "Resume done.\n"); + trace_pm("resume done"); return ret; } @@ -237,42 +263,40 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - bool hw_is_idle = true; - int ret; + int ret, ret_d0i3; + bool is_idle; drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + trace_pm("runtime suspend"); ivpu_dbg(vdev, PM, "Runtime suspend..\n"); - if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EAGAIN; - } + ivpu_mmu_disable(vdev); + + is_idle = ivpu_hw_is_idle(vdev) || vdev->pm->dct_active_percent; + if (!is_idle) + ivpu_err(vdev, "NPU is not idle before autosuspend\n"); - if (!vdev->pm->suspend_reschedule_counter) - hw_is_idle = false; - else if (ivpu_jsm_pwr_d0i3_enter(vdev)) - hw_is_idle = false; + ret_d0i3 = ivpu_jsm_pwr_d0i3_enter(vdev); + if (ret_d0i3) + ivpu_err(vdev, "Failed to prepare for d0i3: %d\n", ret_d0i3); ret = ivpu_suspend(vdev); if (ret) - ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); + ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret); - if (!hw_is_idle) { - ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n"); - ivpu_fw_log_dump(vdev); + if (!is_idle || ret_d0i3) { + ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); + atomic_inc(&vdev->pm->reset_counter); + ivpu_dev_coredump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + trace_pm("runtime suspend done"); return 0; } @@ -283,6 +307,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("runtime resume"); ivpu_dbg(vdev, PM, "Runtime resume..\n"); ret = ivpu_resume(vdev); @@ -290,6 +315,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + trace_pm("runtime resume done"); return ret; } @@ -299,18 +325,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev) int ret; ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (!drm_WARN_ON(&vdev->drm, ret < 0)) - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - - return ret; -} - -int ivpu_rpm_get_if_active(struct ivpu_device *vdev) -{ - int ret; - - ret = pm_runtime_get_if_active(vdev->drm.dev, false); - drm_WARN_ON(&vdev->drm, ret < 0); + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } @@ -326,33 +344,26 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) struct ivpu_device *vdev = pci_get_drvdata(pdev); ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->reset_pending, 1); - pm_runtime_get_sync(vdev->drm.dev); - down_write(&vdev->pm->reset_lock); - ivpu_prepare_for_reset(vdev); - ivpu_hw_reset(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - up_write(&vdev->pm->reset_lock); - atomic_set(&vdev->pm->reset_pending, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } void ivpu_pm_init(struct ivpu_device *vdev) @@ -362,7 +373,6 @@ void ivpu_pm_init(struct ivpu_device *vdev) int delay; pm->vdev = vdev; - pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; init_rwsem(&pm->reset_lock); atomic_set(&pm->reset_pending, 0); @@ -378,6 +388,7 @@ void ivpu_pm_init(struct ivpu_device *vdev) pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, delay); + pm_runtime_set_active(dev); ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); } @@ -392,7 +403,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; - pm_runtime_set_active(dev); pm_runtime_allow(dev); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); @@ -403,3 +413,68 @@ void ivpu_pm_disable(struct ivpu_device *vdev) pm_runtime_get_noresume(vdev->drm.dev); pm_runtime_forbid(vdev->drm.dev); } + +int ivpu_pm_dct_init(struct ivpu_device *vdev) +{ + if (vdev->pm->dct_active_percent) + return ivpu_pm_dct_enable(vdev, vdev->pm->dct_active_percent); + + return 0; +} + +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) +{ + u32 active_us, inactive_us; + int ret; + + if (active_percent == 0 || active_percent > 100) + return -EINVAL; + + active_us = (DCT_PERIOD_US * active_percent) / 100; + inactive_us = DCT_PERIOD_US - active_us; + + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); + return ret; + } + + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + return 0; +} + +int ivpu_pm_dct_disable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_jsm_dct_disable(vdev); + if (ret) { + ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); + return ret; + } + + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT disabled\n"); + return 0; +} + +void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev) +{ + bool enable; + int ret; + + if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) + return; + + if (vdev->pm->dct_active_percent) + ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); + else + ret = ivpu_pm_dct_disable(vdev); + + if (!ret) + ivpu_hw_btrs_dct_set_status(vdev, enable, vdev->pm->dct_active_percent); +} diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index ec60fbeefefc..b70efe6c36e4 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_PM_H__ @@ -19,7 +19,7 @@ struct ivpu_pm_info { atomic_t reset_counter; atomic_t reset_pending; bool is_warmboot; - u32 suspend_reschedule_counter; + u8 dct_active_percent; }; void ivpu_pm_init(struct ivpu_device *vdev); @@ -36,11 +36,15 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev); void ivpu_pm_reset_done_cb(struct pci_dev *pdev); int __must_check ivpu_rpm_get(struct ivpu_device *vdev); -int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason); void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); +int ivpu_pm_dct_init(struct ivpu_device *vdev); +int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent); +int ivpu_pm_dct_disable(struct ivpu_device *vdev); +void ivpu_pm_dct_irq_thread_handler(struct ivpu_device *vdev); + #endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c new file mode 100644 index 000000000000..616477fc17fa --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Intel Corporation + */ + +#include <linux/device.h> +#include <linux/err.h> + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw.h" +#include "ivpu_sysfs.h" + +/* + * npu_busy_time_us is the time that the device spent executing jobs. + * The time is counted when and only when there are jobs submitted to firmware. + * + * This time can be used to measure the utilization of NPU, either by calculating + * npu_busy_time_us difference between two timepoints (i.e. measuring the time + * that the NPU was active during some workload) or monitoring utilization percentage + * by reading npu_busy_time_us periodically. + * + * When reading the value periodically, it shouldn't be read too often as it may have + * an impact on job submission performance. Recommended period is 1 second. + */ +static ssize_t +npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + ktime_t total, now = 0; + + xa_lock(&vdev->submitted_jobs_xa); + total = vdev->busy_time; + if (!xa_empty(&vdev->submitted_jobs_xa)) + now = ktime_sub(ktime_get(), vdev->busy_start_ts); + xa_unlock(&vdev->submitted_jobs_xa); + + return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); +} + +static DEVICE_ATTR_RO(npu_busy_time_us); + +/** + * DOC: sched_mode + * + * The sched_mode is used to report current NPU scheduling mode. + * + * It returns following strings: + * - "HW" - Hardware Scheduler mode + * - "OS" - Operating System Scheduler mode + * + */ +static ssize_t +sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + + return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS"); +} + +static DEVICE_ATTR_RO(sched_mode); + +static struct attribute *ivpu_dev_attrs[] = { + &dev_attr_npu_busy_time_us.attr, + &dev_attr_sched_mode.attr, + NULL, +}; + +static struct attribute_group ivpu_dev_attr_group = { + .attrs = ivpu_dev_attrs, +}; + +void ivpu_sysfs_init(struct ivpu_device *vdev) +{ + int ret; + + ret = devm_device_add_group(vdev->drm.dev, &ivpu_dev_attr_group); + if (ret) + ivpu_warn(vdev, "Failed to add group to device, ret %d", ret); +} diff --git a/drivers/accel/ivpu/ivpu_sysfs.h b/drivers/accel/ivpu/ivpu_sysfs.h new file mode 100644 index 000000000000..9836f09b35a3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Intel Corporation + */ + +#ifndef __IVPU_SYSFS_H__ +#define __IVPU_SYSFS_H__ + +#include "ivpu_drv.h" + +void ivpu_sysfs_init(struct ivpu_device *vdev); + +#endif /* __IVPU_SYSFS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h new file mode 100644 index 000000000000..eb792038e701 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __IVPU_TRACE_H__ + +#include <linux/tracepoint.h> +#include "ivpu_drv.h" +#include "ivpu_job.h" +#include "vpu_jsm_api.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ipc.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vpu +#define TRACE_INCLUDE_FILE ivpu_trace + +TRACE_EVENT(pm, + TP_PROTO(const char *event), + TP_ARGS(event), + TP_STRUCT__entry(__field(const char *, event)), + TP_fast_assign(__entry->event = event;), + TP_printk("%s", __entry->event) +); + +TRACE_EVENT(job, + TP_PROTO(const char *event, struct ivpu_job *job), + TP_ARGS(event, job), + TP_STRUCT__entry(__field(const char *, event) + __field(u32, ctx_id) + __field(u32, engine_id) + __field(u32, job_id) + ), + TP_fast_assign(__entry->event = event; + __entry->ctx_id = job->file_priv->ctx.id; + __entry->engine_id = job->engine_idx; + __entry->job_id = job->job_id;), + TP_printk("%s context:%d engine:%d job:%d", + __entry->event, + __entry->ctx_id, + __entry->engine_id, + __entry->job_id) +); + +TRACE_EVENT(jsm, + TP_PROTO(const char *event, struct vpu_jsm_msg *msg), + TP_ARGS(event, msg), + TP_STRUCT__entry(__field(const char *, event) + __field(const char *, type) + __field(enum vpu_ipc_msg_status, status) + __field(u32, request_id) + __field(u32, result) + ), + TP_fast_assign(__entry->event = event; + __entry->type = ivpu_jsm_msg_type_to_str(msg->type); + __entry->status = msg->status; + __entry->request_id = msg->request_id; + __entry->result = msg->result;), + TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x", + __entry->event, + __entry->type, + __entry->status, + __entry->request_id, + __entry->result) +); + +#endif /* __IVPU_TRACE_H__ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c new file mode 100644 index 000000000000..f8fb99de0de3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "ivpu_trace.h" +#endif diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 04c954258563..908e68ea1c39 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,14 +1,13 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2024, Intel Corporation. */ #ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H /* - * =========== FW API version information beginning ================ - * The bellow values will be used to construct the version info this way: + * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes @@ -27,21 +26,20 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 20 +#define VPU_BOOT_API_VER_MINOR 26 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 4 +#define VPU_BOOT_API_VER_PATCH 3 /* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 -/* ------------ FW API version information end ---------------------*/ -#pragma pack(push, 1) +#pragma pack(push, 4) /* * Firmware image header format @@ -66,12 +64,25 @@ struct vpu_firmware_header { /* Size of memory require for firmware execution */ u32 runtime_size; u32 shave_nn_fw_size; - /* Size of primary preemption buffer. */ + /* + * Size of primary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ u32 preemption_buffer_1_size; - /* Size of secondary preemption buffer. */ + /* + * Size of secondary preemption buffer, assuming a 2-job submission queue. + * NOTE: host driver is expected to adapt size accordingly to actual + * submission queue size and device capabilities. + */ u32 preemption_buffer_2_size; /* Space reserved for future preemption-related fields. */ u32 preemption_reserved[6]; + /* FW image read only section start address, 4KB aligned */ + u64 ro_section_start_address; + /* FW image read only section size, 4KB aligned */ + u32 ro_section_size; + u32 reserved; }; /* @@ -151,8 +162,6 @@ enum vpu_trace_destination { /* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ #define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT #define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST struct vpu_boot_l2_cache_config { u8 use; @@ -181,10 +190,21 @@ struct vpu_warm_boot_section { #define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000 /** - * Macros to enable various operation modes within the VPU. + * Macros to enable various power profiles within the NPU. * To be defined as part of 32 bit mask. */ -#define VPU_OP_MODE_SURVIVABILITY 0x1 +#define POWER_PROFILE_SURVIVABILITY 0x1 + +/** + * Enum for dvfs_mode boot param. + */ +enum vpu_governor { + VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ + VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ + VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ + VPU_GOV_POWER_SAVE = 3, /* Power save governor */ + VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ +}; struct vpu_boot_params { u32 magic; @@ -288,7 +308,14 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ + /** + * DVFS Mode: + * 0 - Default, DVFS mode selected by the firmware + * 1 - Max Performance + * 2 - On Demand + * 3 - Power Save + * 4 - On Demand Priority Aware + */ u32 dvfs_mode; /** * Depending on DVFS Mode: @@ -317,7 +344,22 @@ struct vpu_boot_params { u64 d0i3_residency_time_us; /* Value of VPU perf counter at the time of entering D0i3 state . */ u64 d0i3_entry_vpu_ts; - u32 pad4[20]; + /* + * The system time of the host operating system in microseconds. + * E.g the number of microseconds since 1st of January 1970, or whatever + * date the host operating system uses to maintain system time. + * This value will be used to track system time on the VPU. + * The KMD is required to update this value on every VPU reset. + */ + u64 system_time_us; + u32 pad4[2]; + /* + * The delta between device monotonic time and the current value of the + * HW timestamp register, in ticks. Written by the firmware during boot. + * Can be used by the KMD to calculate device time. + */ + u64 device_time_delta_ticks; + u32 pad7[14]; /* Warm boot information: 0x400 - 0x43F */ u32 warm_boot_sections_count; u32 warm_boot_start_address_reference; @@ -344,16 +386,17 @@ struct vpu_boot_params { u32 vpu_focus_present_timer_ms; /* VPU ECC Signaling */ u32 vpu_uses_ecc_mca_signal; - /* Values defined by VPU_OP_MODE* macros */ - u32 vpu_operation_mode; - /* Unused/reserved: 0x480 - 0xFFF */ - u32 pad6[736]; + /* Values defined by POWER_PROFILE* macros */ + u32 power_profile; + /* Microsecond value for DCT active cycle */ + u32 dct_active_us; + /* Microsecond value for DCT inactive cycle */ + u32 dct_inactive_us; + /* Unused/reserved: 0x488 - 0xFFF */ + u32 pad6[734]; }; -/* - * Magic numbers set between host and vpu to detect corruptio of tracing init - */ - +/* Magic numbers set between host and vpu to detect corruption of tracing init */ #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) /* Tracing buffer message format definitions */ @@ -373,7 +416,9 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - u32 pad_to_cache_line_size_0[14]; + /* keeps track of wrapping on the reader side */ + u32 read_wrap_count; + u32 pad_to_cache_line_size_0[13]; /* End of first cache line */ /** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7da7622742be..7215c144158c 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (c) 2020-2024, Intel Corporation. */ /** @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 15 +#define VPU_JSM_API_VER_MINOR 25 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -36,15 +36,18 @@ /* * Number of Priority Bands for Hardware Scheduling - * Bands: RealTime, Focus, Normal, Idle + * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) */ #define VPU_HWS_NUM_PRIORITY_BANDS 4 /* Max number of impacted contexts that can be dealt with the engine reset command */ #define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 -/** Pack the API structures for now, once alignment issues are fixed this can be removed */ -#pragma pack(push, 1) +/* + * Pack the API structures to enforce binary compatibility + * Align to 8 bytes for optimal performance + */ +#pragma pack(push, 8) /* * Engine indexes. @@ -71,6 +74,7 @@ #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU /* Job status returned when the job was preempted mid-inference */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU +#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU /* * Host <-> VPU IPC channels. @@ -83,18 +87,58 @@ /* * Job flags bit masks. */ -#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 -#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 +enum { + /* + * Null submission mask. + * When set, batch buffer's commands are not processed but returned as + * successful immediately, except fences and timestamps. + * When cleared, batch buffer's commands are processed normally. + * Used for testing and profiling purposes. + */ + VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), + /* + * Inline command mask. + * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). + * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). + */ + VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), + /* + * VPU private data mask. + * Reserved for the VPU to store private data about the job (or inline command) + * while being processed. + */ + VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U +}; /* - * Sizes of the reserved areas in jobs, in bytes. + * Job queue flags bit masks. */ -#define VPU_JOB_RESERVED_BYTES 8 +enum { + /* + * No job done notification mask. + * When set, indicates that no job done notification should be sent for any + * job from this queue. When cleared, indicates that job done notification + * should be sent for every job completed from this queue. + */ + VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), + /* + * Native fence usage mask. + * When set, indicates that job queue uses native fences (as inline commands + * in job queue). Such queues may also use legacy fences (as commands in batch buffers). + * When cleared, indicates the job queue only uses legacy fences. + * NOTE: For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + */ + VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), -/* - * Sizes of the reserved areas in job queues, in bytes. - */ -#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + /* + * Enable turbo mode for testing NPU performance; not recommended for regular usage. + */ + VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U) +}; /* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -125,23 +169,125 @@ #define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U /* + * vpu_jsm_engine_reset_context flag definitions + */ +#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK BIT(0) +#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0 +#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1 + +/* + * Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group + * in this API. + */ +#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL + +/* + * Inline commands types. + */ +/* + * NOP. + * VPU does nothing other than consuming the inline command object. + */ +#define VPU_INLINE_CMD_TYPE_NOP 0x0 +/* + * Fence wait. + * VPU waits for the fence current value to reach monitored value. + * Fence wait operations are executed upon job dispatching. While waiting for + * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. + * Jobs present in the queue prior to the fence wait object may be processed + * concurrently. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 +/* + * Fence signal. + * VPU sets the fence current value to the provided value. If new current value + * is equal to or higher than monitored value, VPU sends fence signalled notification + * to the host. Fence signal operations are executed upon completion of all the jobs + * present in the queue prior to them, and in-order relative to each other in the queue. + * But jobs in-between them may be processed concurrently and may complete out-of-order. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 + +/* + * Job scheduling priority bands for both hardware scheduling and OS scheduling. + */ +enum vpu_job_scheduling_priority_band { + VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, + VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, + VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, + VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, + VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, +}; + +/* * Job format. + * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ - u32 job_id; /**< Job ID */ - u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u64 root_page_table_addr; /**< Address of root page table to use for this job */ - u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 primary_preempt_buf_addr; + /**< Address of VPU commands batch buffer */ + u64 batch_buf_addr; + /**< Job ID */ + u32 job_id; + /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u32 flags; + /** + * Doorbell ring timestamp taken by KMD from SoC's global system clock, in + * microseconds. NPU can convert this value to its own fixed clock's timebase, + * to match other profiling timestamps. + */ + u64 doorbell_timestamp; + /**< Extra id for job tracking, used only in the firmware perf traces */ + u64 host_tracking_id; /**< Address of the primary preemption buffer to use for this job */ - u32 primary_preempt_buf_size; + u64 primary_preempt_buf_addr; /**< Size of the primary preemption buffer to use for this job */ - u32 secondary_preempt_buf_size; + u32 primary_preempt_buf_size; /**< Size of secondary preemption buffer to use for this job */ - u64 secondary_preempt_buf_addr; + u32 secondary_preempt_buf_size; /**< Address of secondary preemption buffer to use for this job */ - u8 reserved_0[VPU_JOB_RESERVED_BYTES]; + u64 secondary_preempt_buf_addr; + u64 reserved_0; +}; + +/* + * Inline command format. + * Inline commands are the commands executed at scheduler level (typically, + * synchronization directives). Inline command and job objects must be of + * the same size and have flags field at same offset. + */ +struct vpu_inline_cmd { + u64 reserved_0; + /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ + u32 type; + /* Flags bit field, see VPU_JOB_FLAGS_* above. */ + u32 flags; + /* Inline command payload. Depends on inline command type. */ + union { + /* Fence (wait and signal) commands' payload. */ + struct { + /* Fence object handle. */ + u64 fence_handle; + /* User VA of the current fence value. */ + u64 current_value_va; + /* User VA of the monitored fence value (read-only). */ + u64 monitored_value_va; + /* Value to wait for or write in fence location. */ + u64 value; + /* User VA of the log buffer in which to add log entry on completion. */ + u64 log_buffer_va; + } fence; + /* Other commands do not have a payload. */ + /* Payload definition for future inline commands can be inserted here. */ + u64 reserved_1[6]; + } payload; +}; + +/* + * Job queue slots can be populated either with job objects or inline command objects. + */ +union vpu_jobq_slot { + struct vpu_job_queue_entry job; + struct vpu_inline_cmd inline_cmd; }; /* @@ -151,7 +297,21 @@ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; + u32 flags; + /* Set to 1 to indicate priority_band field is valid */ + u32 priority_band_valid; + /* + * Priority for the work of this job queue, valid only if the HWS is NOT used + * and the `priority_band_valid` is set to 1. It is applied only during + * the VPU_JSM_MSG_REGISTER_DB message processing. + * The device firmware might use the `priority_band` to optimize the power + * management logic, but it will not affect the order of jobs. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ + u32 priority_band; + /* Inside realtime band assigns a further priority, limited to 0..31 range */ + u32 realtime_priority_level; + u32 reserved_0[9]; }; /* @@ -159,7 +319,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - struct vpu_job_queue_entry job[]; + union vpu_jobq_slot slot[]; }; /** @@ -181,9 +341,7 @@ enum vpu_trace_entity_type { struct vpu_hws_log_buffer_header { /* Written by VPU after adding a log entry. Initialised by host to 0. */ u32 first_free_entry_index; - /* Incremented by VPU every time the VPU overwrites the 0th entry; - * initialised by host to 0. - */ + /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ u32 wraparound_count; /* * This is the number of buffers that can be stored in the log buffer provided by the host. @@ -214,14 +372,80 @@ struct vpu_hws_log_buffer_entry { u64 operation_data[2]; }; +/* Native fence log buffer types. */ +enum vpu_hws_native_fence_log_type { + VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, + VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 +}; + +/* HWS native fence log buffer header. */ +struct vpu_hws_native_fence_log_header { + union { + struct { + /* Index of the first free entry in buffer. */ + u32 first_free_entry_idx; + /* Incremented each time NPU wraps around the buffer to write next entry. */ + u32 wraparound_count; + }; + /* Field allowing atomic update of both fields above. */ + u64 atomic_wraparound_and_entry_idx; + }; + /* Log buffer type, see enum vpu_hws_native_fence_log_type. */ + u64 type; + /* Allocated number of entries in the log buffer. */ + u64 entry_nb; + u64 reserved[2]; +}; + +/* Native fence log operation types. */ +enum vpu_hws_native_fence_log_op { + VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, + VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 +}; + +/* HWS native fence log entry. */ +struct vpu_hws_native_fence_log_entry { + /* Newly signaled/unblocked fence value. */ + u64 fence_value; + /* Native fence object handle to which this operation belongs. */ + u64 fence_handle; + /* Operation type, see enum vpu_hws_native_fence_log_op. */ + u64 op_type; + u64 reserved_0; + /* + * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence + * wait was started (in NPU SysTime). + */ + u64 fence_wait_start_ts; + u64 reserved_1; + /* Timestamp at which fence operation was completed (in NPU SysTime). */ + u64 fence_end_ts; +}; + +/* Native fence log buffer. */ +struct vpu_hws_native_fence_log_buffer { + struct vpu_hws_native_fence_log_header header; + struct vpu_hws_native_fence_log_entry entry[]; +}; + /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + /** + * Preempt engine. The NPU stops (preempts) all the jobs currently + * executing on the target engine making the engine become idle and ready to + * execute new jobs. + * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of + * the target engine, but it stops processing them (until the queue doorbell + * is rung again); the host is responsible to reset the job queue, either + * after preemption or when resubmitting jobs to the queue. + */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, VPU_JSM_MSG_REGISTER_DB = 0x1102, VPU_JSM_MSG_UNREGISTER_DB = 0x1103, @@ -307,9 +531,10 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC commands before this one. * */ VPU_JSM_MSG_STATE_DUMP = 0x11FF, + /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. @@ -319,8 +544,12 @@ enum vpu_ipc_msg_type { * Perform the save procedure for the D0i3 entry */ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, + /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Fence signalled */ + VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, + /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, @@ -406,6 +635,7 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC responses before this one. * */ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, + /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, @@ -584,11 +814,6 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; }; -struct vpu_ipc_msg_payload_blob_deinit { - /* 64-bit unique ID for the blob to be de-initialized. */ - u64 blob_id; -}; - struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ u32 engine_idx; @@ -606,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done { u64 cmdq_id; }; +/* + * Notification message upon native fence signalling. + * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED + */ +struct vpu_ipc_msg_payload_native_fence_signalled { + /* Engine ID. */ + u32 engine_idx; + /* Host SSID. */ + u32 host_ssid; + /* CMDQ ID */ + u64 cmdq_id; + /* Fence object handle. */ + u64 fence_handle; +}; + struct vpu_jsm_engine_reset_context { /* Host SSID */ u32 host_ssid; @@ -613,7 +853,7 @@ struct vpu_jsm_engine_reset_context { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Flags: 0: cause of hang; 1: collateral damage of reset */ + /* See VPU_ENGINE_RESET_CONTEXT_* defines */ u64 flags; }; @@ -684,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; }; -struct vpu_ipc_msg_payload_blob_deinit_done { - /* 64-bit unique ID for the blob de-initialized. */ - u64 blob_id; -}; - /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* @@ -730,11 +965,7 @@ struct vpu_ipc_msg_payload_hws_create_cmdq { u32 host_ssid; /* Engine for which queue is being created */ u32 engine_idx; - /* - * Cmdq group may be set to 0 or equal to - * cmdq_id while each priority band contains - * only single engine instances. - */ + /* Cmdq group: only used for HWS logging of state changes */ u64 cmdq_group; /* Command queue id */ u64 cmdq_id; @@ -782,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Priority band to assign to work of this context */ + /* + * Priority band to assign to work of this context. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ u32 priority_band; /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; @@ -856,6 +1090,12 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { * is generated when an event log is written to this index. */ u64 notify_index; + /* + * Field is now deprecated, will be removed when KMD is updated to support removal + */ + u32 enable_extra_events; + /* Zero Padding */ + u32 reserved_0; }; /* @@ -1223,10 +1463,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; - struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -1234,7 +1474,6 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; - struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name; |