diff options
Diffstat (limited to 'drivers/accel/amdxdna/aie2_pci.h')
| -rw-r--r-- | drivers/accel/amdxdna/aie2_pci.h | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h new file mode 100644 index 000000000000..a5f9c42155d1 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -0,0 +1,346 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE2_PCI_H_ +#define _AIE2_PCI_H_ + +#include <drm/amdxdna_accel.h> +#include <linux/semaphore.h> + +#include "amdxdna_mailbox.h" + +#define AIE2_INTERVAL 20000 /* us */ +#define AIE2_TIMEOUT 1000000 /* us */ + +/* Firmware determines device memory base address and size */ +#define AIE2_DEVM_BASE 0x4000000 +#define AIE2_DEVM_SIZE SZ_64M + +#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev)) + +#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr) +#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr) + +#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx) +#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset) +#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset) + +#define SMU_REG(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \ +}) +#define SRAM_GET_ADDR(ndev, idx) \ +({ \ + typeof(ndev) _ndev = ndev; \ + ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \ +}) + +#define CHAN_SLOT_SZ SZ_8K +#define MBOX_SIZE(ndev) \ +({ \ + typeof(ndev) _ndev = (ndev); \ + ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \ + pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ +}) + +enum aie2_smu_reg_idx { + SMU_CMD_REG = 0, + SMU_ARG_REG, + SMU_INTR_REG, + SMU_RESP_REG, + SMU_OUT_REG, + SMU_MAX_REGS /* Keep this at the end */ +}; + +enum aie2_sram_reg_idx { + MBOX_CHANN_OFF = 0, + FW_ALIVE_OFF, + SRAM_MAX_INDEX /* Keep this at the end */ +}; + +enum psp_reg_idx { + PSP_CMD_REG = 0, + PSP_ARG0_REG, + PSP_ARG1_REG, + PSP_ARG2_REG, + PSP_NUM_IN_REGS, /* number of input registers */ + PSP_INTR_REG = PSP_NUM_IN_REGS, + PSP_STATUS_REG, + PSP_RESP_REG, + PSP_MAX_REGS /* Keep this at the end */ +}; + +struct amdxdna_client; +struct amdxdna_fw_ver; +struct amdxdna_hwctx; +struct amdxdna_sched_job; + +struct psp_config { + const void *fw_buf; + u32 fw_size; + void __iomem *psp_regs[PSP_MAX_REGS]; +}; + +struct aie_version { + u16 major; + u16 minor; +}; + +struct aie_tile_metadata { + u16 row_count; + u16 row_start; + u16 dma_channel_count; + u16 lock_count; + u16 event_reg_count; +}; + +struct aie_metadata { + u32 size; + u16 cols; + u16 rows; + struct aie_version version; + struct aie_tile_metadata core; + struct aie_tile_metadata mem; + struct aie_tile_metadata shim; +}; + +enum rt_config_category { + AIE2_RT_CFG_INIT, + AIE2_RT_CFG_CLK_GATING, + AIE2_RT_CFG_FORCE_PREEMPT, + AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, +}; + +struct rt_config { + u32 type; + u32 value; + u32 category; + unsigned long feature_mask; +}; + +struct dpm_clk_freq { + u32 npuclk; + u32 hclk; +}; + +/* + * Define the maximum number of pending commands in a hardware context. + * Must be power of 2! + */ +#define HWCTX_MAX_CMDS 4 +#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) +struct amdxdna_hwctx_priv { + struct amdxdna_gem_obj *heap; + void *mbox_chann; + + struct drm_gpu_scheduler sched; + struct drm_sched_entity entity; + + struct mutex io_lock; /* protect seq and cmd order */ + struct wait_queue_head job_free_wq; + u32 num_pending; + u64 seq; + struct semaphore job_sem; + bool job_done; + + /* Completed job counter */ + u64 completed; + + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; + struct drm_syncobj *syncobj; +}; + +enum aie2_dev_status { + AIE2_DEV_UNINIT, + AIE2_DEV_INIT, + AIE2_DEV_START, +}; + +struct aie2_exec_msg_ops { + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + u32 (*get_chain_msg_op)(u32 cmd_op); +}; + +struct amdxdna_dev_hdl { + struct amdxdna_dev *xdna; + const struct amdxdna_dev_priv *priv; + void __iomem *sram_base; + void __iomem *smu_base; + void __iomem *mbox_base; + struct psp_device *psp_hdl; + + struct xdna_mailbox_chann_res mgmt_x2i; + struct xdna_mailbox_chann_res mgmt_i2x; + u32 mgmt_chan_idx; + u32 mgmt_prot_major; + u32 mgmt_prot_minor; + + u32 total_col; + struct aie_version version; + struct aie_metadata metadata; + unsigned long feature_mask; + struct aie2_exec_msg_ops *exec_msg_ops; + + /* power management and clock*/ + enum amdxdna_power_mode_type pw_mode; + u32 dpm_level; + u32 dft_dpm_level; + u32 max_dpm_level; + u32 clk_gating; + u32 npuclk_freq; + u32 hclk_freq; + u32 max_tops; + u32 curr_tops; + u32 force_preempt_enabled; + u32 frame_boundary_preempt; + + /* Mailbox and the management channel */ + struct mailbox *mbox; + struct mailbox_channel *mgmt_chann; + struct async_events *async_events; + + enum aie2_dev_status dev_status; + u32 hwctx_num; + + struct amdxdna_async_error last_async_err; +}; + +#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ + [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE} + +struct aie2_bar_off_pair { + int bar_idx; + u32 offset; +}; + +struct aie2_hw_ops { + int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +}; + +enum aie2_fw_feature { + AIE2_NPU_COMMAND, + AIE2_PREEMPT, + AIE2_FEATURE_MAX +}; + +struct aie2_fw_feature_tbl { + enum aie2_fw_feature feature; + u32 max_minor; + u32 min_minor; +}; + +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) + +struct amdxdna_dev_priv { + const char *fw_path; + u64 protocol_major; + u64 protocol_minor; + const struct rt_config *rt_config; + const struct dpm_clk_freq *dpm_clk_tbl; + const struct aie2_fw_feature_tbl *fw_feature_tbl; + +#define COL_ALIGN_NONE 0 +#define COL_ALIGN_NATURE 1 + u32 col_align; + u32 mbox_dev_addr; + /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ + u32 mbox_size; + u32 hwctx_limit; + u32 sram_dev_addr; + struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; + struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; + struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; + struct aie2_hw_ops hw_ops; +}; + +extern const struct amdxdna_dev_ops aie2_ops; + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val); + +/* aie2 npu hw config */ +extern const struct dpm_clk_freq npu1_dpm_clk_table[]; +extern const struct dpm_clk_freq npu4_dpm_clk_table[]; +extern const struct rt_config npu1_default_rt_cfg[]; +extern const struct rt_config npu4_default_rt_cfg[]; +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; + +/* aie2_smu.c */ +int aie2_smu_init(struct amdxdna_dev_hdl *ndev); +void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + +/* aie2_pm.c */ +int aie2_pm_init(struct amdxdna_dev_hdl *ndev); +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); + +/* aie2_psp.c */ +struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); +int aie2_psp_start(struct psp_device *psp); +void aie2_psp_stop(struct psp_device *psp); + +/* aie2_error.c */ +int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); +void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); +int aie2_error_async_msg_thread(void *data); +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, + struct amdxdna_drm_get_array *args); + +/* aie2_message.c */ +void aie2_msg_init(struct amdxdna_dev_hdl *ndev); +int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); +int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); +int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); +int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); +int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); +int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); +int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, + struct amdxdna_fw_ver *fw_ver); +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); +int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header); +int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, + void *handle, int (*cb)(void*, void __iomem *, size_t)); +int aie2_config_cu(struct amdxdna_hwctx *hwctx, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, + struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); +int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)); + +/* aie2_hwctx.c */ +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); +int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); +void aie2_hwctx_suspend(struct amdxdna_client *client); +int aie2_hwctx_resume(struct amdxdna_client *client); +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); + +#endif /* _AIE2_PCI_H_ */ |
