diff options
Diffstat (limited to 'drivers/accel/habanalabs/common/habanalabs.h')
| -rw-r--r-- | drivers/accel/habanalabs/common/habanalabs.h | 384 |
1 files changed, 282 insertions, 102 deletions
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 2f027d5a8206..d94c2ba22a6a 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2022 HabanaLabs, Ltd. + * Copyright 2016-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/cpucp_if.h" +#include <linux/habanalabs/cpucp_if.h> #include "../include/common/qman_if.h" #include "../include/hw_ip/mmu/mmu_general.h" #include <uapi/drm/habanalabs_accel.h> @@ -29,6 +29,9 @@ #include <linux/coresight.h> #include <linux/dma-buf.h> +#include <drm/drm_device.h> +#include <drm/drm_file.h> + #include "security.h" #define HL_NAME "habanalabs" @@ -68,7 +71,7 @@ struct hl_fpriv; #define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ -#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ +#define HL_HEARTBEAT_PER_USEC 10000000 /* 10 s */ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ @@ -82,14 +85,14 @@ struct hl_fpriv; #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ -#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */ - #define HL_INVALID_QUEUE UINT_MAX #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF #define HL_COMMON_DEC_INTERRUPT_ID 0xFFE -#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20 +#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */ /* Default value for device reset trigger , an invalid value */ #define HL_RESET_TRIGGER_DEFAULT 0xFF @@ -103,6 +106,8 @@ struct hl_fpriv; /* MMU */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +#define TIMESTAMP_FREE_NODES_NUM 512 + /** * enum hl_mmu_page_table_location - mmu page table location * @MMU_DR_PGT: page-table is located on device DRAM. @@ -154,6 +159,11 @@ enum hl_mmu_page_table_location { #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) +#define hl_dma_map_sgtable(hdev, sgt, dir) \ + hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__) +#define hl_dma_unmap_sgtable(hdev, sgt, dir) \ + hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__) + /* * Reset Flags * @@ -435,18 +445,22 @@ enum hl_collective_mode { * a CB handle can be provided for jobs on this queue. * Otherwise, a CB address must be provided. * @collective_mode: collective mode of current queue + * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. * @binned: True if the queue is binned out and should not be used * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hw_queue_properties { enum hl_queue_type type; enum queue_cb_alloc_flags cb_alloc_flags; enum hl_collective_mode collective_mode; + u64 q_dram_bd_address; u8 driver_only; u8 binned; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -545,8 +559,7 @@ struct hl_hints_range { * allocated with huge pages. * @hints_dram_reserved_va_range: dram hint addresses reserved range. * @hints_host_reserved_va_range: host hint addresses reserved range. - * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved - * range. + * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -583,9 +596,7 @@ struct hl_hints_range { * we display to the user * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. - * @mmu_hop_table_size: MMU hop table size. - * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. - * @dram_page_size: page size for MMU DRAM allocation. + * @dram_page_size: The DRAM physical page size. * @cfg_size: configuration space size on SRAM. * @sram_size: total size of SRAM. * @max_asid: maximum number of open contexts (ASIDs). @@ -638,9 +649,12 @@ struct hl_hints_range { * @num_engine_cores: number of engine cpu cores. * @max_num_of_engines: maximum number of all engines in the ASIC. * @num_of_special_blocks: special_blocks array size. - * @glbl_err_cause_num: global err cause number. + * @glbl_err_max_cause_num: global err max cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * not supported. + * @reserved_fw_mem_size: size of dram memory reserved for FW. + * @fw_event_queue_size: queue size for events from CPU-CP. + * A value of 0 means using the default HL_EQ_SIZE_IN_BYTES value. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -686,9 +700,11 @@ struct hl_hints_range { * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. - * @dma_mask: the dma mask to be set for this device + * @dma_mask: the dma mask to be set for this device. * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. * @supports_engine_modes: true if changing engines/engine_cores modes is supported. + * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw. + * @supports_nvme: indicates whether the asic supports NVMe P2P DMA. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -734,8 +750,6 @@ struct asic_fixed_properties { u32 clk_pll_index; u32 mmu_pgt_size; u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; u32 dram_page_size; u32 cfg_size; u32 sram_size; @@ -770,8 +784,10 @@ struct asic_fixed_properties { u32 num_engine_cores; u32 max_num_of_engines; u32 num_of_special_blocks; - u32 glbl_err_cause_num; + u32 glbl_err_max_cause_num; u32 hbw_flush_reg; + u32 reserved_fw_mem_size; + u32 fw_event_queue_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -808,6 +824,8 @@ struct asic_fixed_properties { u8 dma_mask; u8 supports_advanced_cpucp_rc; u8 supports_engine_modes; + u8 support_dynamic_resereved_fw_size; + u8 supports_nvme; }; /** @@ -891,6 +909,18 @@ struct hl_mem_mgr { }; /** + * struct hl_mem_mgr_fini_stats - describes statistics returned during memory manager teardown. + * @n_busy_cb: the amount of CB handles that could not be removed + * @n_busy_ts: the amount of TS handles that could not be removed + * @n_busy_other: the amount of any other type of handles that could not be removed + */ +struct hl_mem_mgr_fini_stats { + u32 n_busy_cb; + u32 n_busy_ts; + u32 n_busy_other; +}; + +/** * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior * @topic: string identifier used for logging * @mem_id: memory type identifier, embedded in the handle and used to identify @@ -1041,6 +1071,8 @@ struct hl_encaps_signals_mgr { * @collective_mode: collective mode of current queue * @kernel_address: holds the queue's kernel virtual address. * @bus_address: holds the queue's DMA address. + * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in + * queue properites. * @pi: holds the queue's pi value. * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). * @hw_queue_id: the id of the H/W queue. @@ -1050,6 +1082,7 @@ struct hl_encaps_signals_mgr { * @valid: is the queue valid (we have array of 32 queues, not all of them * exist). * @supports_sync_stream: True if queue supports sync stream + * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram */ struct hl_hw_queue { struct hl_cs_job **shadow_queue; @@ -1058,6 +1091,7 @@ struct hl_hw_queue { enum hl_collective_mode collective_mode; void *kernel_address; dma_addr_t bus_address; + u64 pq_dram_address; u32 pi; atomic_t ci; u32 hw_queue_id; @@ -1066,6 +1100,7 @@ struct hl_hw_queue { u16 int_queue_len; u8 valid; u8 supports_sync_stream; + u8 dram_bd; }; /** @@ -1098,19 +1133,41 @@ enum hl_user_interrupt_type { }; /** + * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data + * @free_nodes_pool: pool of nodes to be used for free timestamp jobs + * @free_nodes_length: number of nodes in free_nodes_pool + * @next_avail_free_node_idx: index of the next free node in the pool + * + * the free nodes pool must be protected by the user interrupt lock + * to avoid race between different interrupts which are using the same + * ts buffer with different offsets. + */ +struct hl_ts_free_jobs { + struct timestamp_reg_free_node *free_nodes_pool; + u32 free_nodes_length; + u32 next_avail_free_node_idx; +}; + +/** * struct hl_user_interrupt - holds user interrupt information * @hdev: pointer to the device structure + * @ts_free_jobs_data: timestamp free jobs related data * @type: user interrupt type * @wait_list_head: head to the list of user threads pending on this interrupt + * @ts_list_head: head to the list of timestamp records * @wait_list_lock: protects wait_list_head + * @ts_list_lock: protects ts_list_head * @timestamp: last timestamp taken upon interrupt * @interrupt_id: msix interrupt id */ struct hl_user_interrupt { struct hl_device *hdev; + struct hl_ts_free_jobs ts_free_jobs_data; enum hl_user_interrupt_type type; struct list_head wait_list_head; + struct list_head ts_list_head; spinlock_t wait_list_lock; + spinlock_t ts_list_lock; ktime_t timestamp; u32 interrupt_id; }; @@ -1120,11 +1177,15 @@ struct hl_user_interrupt { * @free_objects_node: node in the list free_obj_jobs * @cq_cb: pointer to cq command buffer to be freed * @buf: pointer to timestamp buffer to be freed + * @in_use: indicates whether the node still in use in workqueue thread. + * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler */ struct timestamp_reg_free_node { struct list_head free_objects_node; struct hl_cb *cq_cb; struct hl_mmap_mem_buf *buf; + atomic_t in_use; + u8 dynamic_alloc; }; /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job @@ -1133,17 +1194,21 @@ struct timestamp_reg_free_node { * @free_obj: workqueue object to free timestamp registration node objects * @hdev: pointer to the device structure * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) + * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the + * interrupt handler. */ struct timestamp_reg_work_obj { struct work_struct free_obj; struct hl_device *hdev; struct list_head *free_obj_head; + struct list_head *dynamic_alloc_free_obj_head; }; /* struct timestamp_reg_info - holds the timestamp registration related data. * @buf: pointer to the timestamp buffer which include both user/kernel buffers. * relevant only when doing timestamps records registration. * @cq_cb: pointer to CQ counter CB. + * @interrupt: interrupt that the node hanged on it's wait list. * @timestamp_kernel_addr: timestamp handle address, where to set timestamp * relevant only when doing timestamps records * registration. @@ -1153,17 +1218,18 @@ struct timestamp_reg_work_obj { * allocating records dynamically. */ struct timestamp_reg_info { - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; - u64 *timestamp_kernel_addr; - u8 in_use; + struct hl_mmap_mem_buf *buf; + struct hl_cb *cq_cb; + struct hl_user_interrupt *interrupt; + u64 *timestamp_kernel_addr; + bool in_use; }; /** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt * @ts_reg_info: holds the timestamps registration nodes info - * @wait_list_node: node in the list of user threads pending on an interrupt + * @list_node: node in the list of user threads pending on an interrupt or timestamp * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt @@ -1171,7 +1237,7 @@ struct timestamp_reg_info { */ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; - struct list_head wait_list_node; + struct list_head list_node; struct hl_fence fence; u64 cq_target_value; u64 *cq_kernel_addr; @@ -1182,6 +1248,7 @@ struct hl_user_pending_interrupt { * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address * @bus_address: holds the queue's DMA address + * @size: the event queue size * @ci: ci inside the queue * @prev_eqe_index: the index of the previous event queue entry. The index of * the current entry's index must be +1 of the previous one. @@ -1193,6 +1260,7 @@ struct hl_eq { struct hl_device *hdev; void *kernel_address; dma_addr_t bus_address; + u32 size; u32 ci; u32 prev_eqe_index; bool check_eqe_index; @@ -1220,14 +1288,19 @@ struct hl_dec { * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). * @ASIC_GAUDI2: Gaudi2 device. * @ASIC_GAUDI2B: Gaudi2B device. + * @ASIC_GAUDI2C: Gaudi2C device. + * @ASIC_GAUDI2D: Gaudi2D device. */ enum hl_asic_type { ASIC_INVALID, + ASIC_GOYA, ASIC_GAUDI, ASIC_GAUDI_SEC, ASIC_GAUDI2, ASIC_GAUDI2B, + ASIC_GAUDI2C, + ASIC_GAUDI2D, }; struct hl_cs_parser; @@ -1370,6 +1443,8 @@ struct dynamic_fw_load_mgr { * @boot_err0_reg: boot_err0 register address * @boot_err1_reg: boot_err1 register address * @wait_for_preboot_timeout: timeout to poll for preboot ready + * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know + * preboot needs longer time. */ struct pre_fw_load_props { u32 cpu_boot_status_reg; @@ -1378,6 +1453,7 @@ struct pre_fw_load_props { u32 boot_err0_reg; u32 boot_err1_reg; u32 wait_for_preboot_timeout; + u32 wait_for_preboot_extended_timeout; }; /** @@ -1477,11 +1553,9 @@ struct engines_data { * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @asic_dma_unmap_single: unmap a single DMA buffer - * @asic_dma_map_single: map a single buffer to a DMA - * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_map_sgtable: DMA map scatter-gather table. * @cs_parser: parse Command Submission. - * @asic_dma_map_sgtable: DMA map scatter-gather table. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. @@ -1602,18 +1676,11 @@ struct hl_asic_funcs { size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, size_t size, void *vaddr); - void (*asic_dma_unmap_single)(struct hl_device *hdev, - dma_addr_t dma_addr, int len, + void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); - dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev, - void *addr, int len, - enum dma_data_direction dir); - void (*hl_dma_unmap_sgtable)(struct hl_device *hdev, - struct sg_table *sgt, + int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, - enum dma_data_direction dir); void (*add_end_of_cb_packets)(struct hl_device *hdev, void *kernel_address, u32 len, u32 original_len, @@ -1771,16 +1838,19 @@ struct hl_cs_counters_atomic { * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported * where virtual memory is supported. * @memhash_hnode: pointer to the memhash node. this object holds the export count. - * @device_address: physical address of the device's memory. Relevant only - * if phys_pg_pack is NULL (dma-buf was exported from address). - * The total size can be taken from the dmabuf object. + * @offset: the offset into the buffer from which the memory is exported. + * Relevant only if virtual memory is supported and phys_pg_pack is being used. + * device_phys_addr: physical address of the device's memory. Relevant only + * if phys_pg_pack is NULL (dma-buf was exported from address). + * The total size can be taken from the dmabuf object. */ struct hl_dmabuf_priv { struct dma_buf *dmabuf; struct hl_ctx *ctx; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_hash_node *memhash_hnode; - uint64_t device_address; + u64 offset; + u64 device_phys_addr; }; #define HL_CS_OUTCOME_HISTORY_LEN 256 @@ -1835,6 +1905,7 @@ struct hl_cs_outcome_store { * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. * @hw_block_list_lock: protects the HW block memory list. + * @ts_reg_lock: timestamp registration ioctls lock. * @debugfs_list: node in debugfs list of contexts. * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. @@ -1871,6 +1942,7 @@ struct hl_ctx { struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; struct mutex hw_block_list_lock; + struct mutex ts_reg_lock; struct list_head debugfs_list; struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; @@ -1917,17 +1989,17 @@ struct hl_ctx_mgr { * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ struct hl_userptr { - enum vm_type vm_type; /* must be first */ - struct list_head job_node; - struct page **pages; - unsigned int npages; - struct sg_table *sgt; - enum dma_data_direction dir; - struct list_head debugfs_list; - pid_t pid; - u64 addr; - u64 size; - u8 dma_mapped; + enum vm_type vm_type; /* must be first */ + struct list_head job_node; + struct page **pages; + unsigned int npages; + struct sg_table *sgt; + enum dma_data_direction dir; + struct list_head debugfs_list; + pid_t pid; + u64 addr; + u64 size; + u8 dma_mapped; }; /** @@ -2148,7 +2220,6 @@ struct hl_vm_hw_block_list_node { * @pages: the physical page array. * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. - * @exported_size: buffer exported size. * @node: used to attach to deletion list that is used when all the allocations are cleared * at the teardown of the context. * @mapping_cnt: number of shared mappings. @@ -2165,7 +2236,6 @@ struct hl_vm_phys_pg_pack { u64 *pages; u64 npages; u64 total_size; - u64 exported_size; struct list_head node; atomic_t mapping_cnt; u32 asid; @@ -2208,6 +2278,9 @@ struct hl_vm { u8 init_done; }; +#ifdef CONFIG_HL_HLDIO +#include "hldio.h" +#endif /* * DEBUG, PROFILING STRUCTURE @@ -2250,7 +2323,7 @@ struct hl_notifier_event { /** * struct hl_fpriv - process information stored in FD private data. * @hdev: habanalabs device structure. - * @filp: pointer to the given file structure. + * @file_priv: pointer to the DRM file private data structure. * @taskpid: current process ID. * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. @@ -2265,7 +2338,7 @@ struct hl_notifier_event { */ struct hl_fpriv { struct hl_device *hdev; - struct file *filp; + struct drm_file *file_priv; struct pid *taskpid; struct hl_ctx *ctx; struct hl_ctx_mgr ctx_mgr; @@ -2278,7 +2351,6 @@ struct hl_fpriv { struct mutex ctx_lock; }; - /* * DebugFS */ @@ -2306,6 +2378,7 @@ struct hl_debugfs_entry { struct hl_dbg_device_entry *dev_entry; }; + /** * struct hl_dbg_device_entry - ASIC specific debugfs manager. * @root: root dentry. @@ -2337,6 +2410,7 @@ struct hl_debugfs_entry { * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read. + * @dio_stats: Direct I/O statistics */ struct hl_dbg_device_entry { struct dentry *root; @@ -2368,6 +2442,35 @@ struct hl_dbg_device_entry { u8 i2c_addr; u8 i2c_reg; u8 i2c_len; +#ifdef CONFIG_HL_HLDIO + struct hl_dio_stats dio_stats; +#endif +}; + +/** + * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of + * hl_debugfs_cfg_access. + * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons. + * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64. + * @addr: the requested address to access. + * @valid: if set, this entry has valid data for dumping at interrupt time. + */ +struct hl_debugfs_cfg_access_entry { + ktime_t seconds_since_epoch; + enum debugfs_access_type debugfs_type; + u64 addr; + bool valid; +}; + +/** + * struct hl_debugfs_cfg_access - saves debugfs config region access requests history. + * @cfg_access_list: list of objects describing config region access requests. + * @head: next valid index to add new entry to in cfg_access_list. + */ +struct hl_debugfs_cfg_access { + struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN]; + u32 head; + spinlock_t lock; /* protects head and entries */ }; /** @@ -2506,7 +2609,7 @@ struct hl_state_dump_specs { * DEVICES */ -#define HL_STR_MAX 32 +#define HL_STR_MAX 64 #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) @@ -2663,11 +2766,16 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); * updated directly by the device. If false, the host memory being polled will * be updated by host CPU. Required so host knows whether or not the memory * might need to be byte-swapped before returning value to caller. + * + * On the first 4 polling iterations the macro goes to sleep for short period of + * time that gradually increases and reaches sleep_us on the fifth iteration. */ #define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ mem_written_by_device) \ ({ \ + u64 __sleep_step_us; \ ktime_t __timeout; \ + u8 __step = 8; \ \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ might_sleep_if(sleep_us); \ @@ -2685,8 +2793,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + __sleep_step_us = sleep_us >> __step; \ + if (__sleep_step_us) \ + usleep_range((__sleep_step_us >> 2) + 1, __sleep_step_us); \ + __step >>= 1; \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -2706,6 +2816,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); usr_intr.type = intr_type; \ INIT_LIST_HEAD(&usr_intr.wait_list_head); \ spin_lock_init(&usr_intr.wait_list_lock); \ + INIT_LIST_HEAD(&usr_intr.ts_list_head); \ + spin_lock_init(&usr_intr.ts_list_lock); \ }) struct hwmon_chip_info; @@ -3055,6 +3167,20 @@ struct fw_err_info { }; /** + * struct engine_err_info - engine error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then an engine event was discovered for the + * first time after the driver has finished booting-up. + * @event_info_available: indicates that an engine event info is now available. + */ +struct engine_err_info { + struct hl_info_engine_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + + +/** * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi_info: RAZWI information. @@ -3062,6 +3188,7 @@ struct fw_err_info { * @page_fault_info: page fault information. * @hw_err: (fatal) hardware error information. * @fw_err: firmware error information. + * @engine_err: engine error information. */ struct hl_error_info { struct cs_timeout_info cs_timeout; @@ -3070,6 +3197,7 @@ struct hl_error_info { struct page_fault_info page_fault_info; struct hw_err_info hw_err; struct fw_err_info fw_err; + struct engine_err_info engine_err; }; /** @@ -3111,14 +3239,28 @@ struct hl_reset_info { }; /** + * struct eq_heartbeat_debug_info - stores debug info to be used upon heartbeat failure. + * @last_pq_heartbeat_ts: timestamp of the last test packet that was sent to FW. + * This packet is the trigger in FW to send the EQ heartbeat event. + * @last_eq_heartbeat_ts: timestamp of the last EQ heartbeat event that was received from FW. + * @heartbeat_event_counter: number of heartbeat events received. + * @cpu_queue_id: used to read the queue pi/ci + */ +struct eq_heartbeat_debug_info { + time64_t last_pq_heartbeat_ts; + time64_t last_eq_heartbeat_ts; + u32 heartbeat_event_counter; + u32 cpu_queue_id; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar_phys: array of available PCIe bars physical addresses. * (required only for PCI address match mode) * @pcie_bar: array of available PCIe bars virtual addresses. * @rmmio: configuration area address on SRAM. - * @hclass: pointer to the habanalabs class. - * @cdev: related char device. + * @drm: related DRM device. * @cdev_ctrl: char device for control operations only (INFO IOCTL) * @dev: related kernel basic device structure. * @dev_ctrl: related kernel device structure for the control device @@ -3176,6 +3318,7 @@ struct hl_reset_info { * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. + * @debugfs_cfg_accesses: list of last debugfs config region accesses. * @cb_pool: list of pre allocated CBs. * @cb_pool_lock: protects the CB pool. * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. @@ -3199,6 +3342,9 @@ struct hl_reset_info { * @clk_throttling: holds information about current/previous clock throttling events * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. + * @heartbeat_debug_info: counters used to debug heartbeat failures. + * @hldio: describes habanalabs direct storage interaction interface. + * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling. * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_inner_major_ver: the major of current loaded preboot inner version. * @fw_inner_minor_ver: the minor of current loaded preboot inner version. @@ -3245,12 +3391,12 @@ struct hl_reset_info { * @rotator_binning: contains mask of rotators engines that is received from the f/w * which indicates which rotator engines are binned-out(Gaudi3 and above). * @id: device minor. - * @id_control: minor of the control device. - * @cdev_idx: char device index. Used for setting its name. + * @cdev_idx: char device index. * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. * @disabled: is device disabled. + * @cpld_shutdown: is cpld shutdown. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false @@ -3289,6 +3435,7 @@ struct hl_reset_info { * device. * @supports_ctx_switch: true if a ctx switch is required upon first submission. * @support_preboot_binning: true if we support read binning info from preboot. + * @eq_heartbeat_received: indication that eq heartbeat event has received from FW. * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing. * @fw_components: Controls which f/w components to load to the device. There are multiple f/w * stages and sometimes we want to stop at a certain stage. Used only for testing. @@ -3308,8 +3455,7 @@ struct hl_device { u64 pcie_bar_phys[HL_PCI_NUM_BARS]; void __iomem *pcie_bar[HL_PCI_NUM_BARS]; void __iomem *rmmio; - struct class *hclass; - struct cdev cdev; + struct drm_device drm; struct cdev cdev_ctrl; struct device *dev; struct device *dev_ctrl; @@ -3355,6 +3501,7 @@ struct hl_device { struct hwmon_chip_info *hl_chip_info; struct hl_dbg_device_entry hl_debugfs; + struct hl_debugfs_cfg_access debugfs_cfg_accesses; struct list_head cb_pool; spinlock_t cb_pool_lock; @@ -3389,6 +3536,12 @@ struct hl_device { struct hl_reset_info reset_info; + struct eq_heartbeat_debug_info heartbeat_debug_info; +#ifdef CONFIG_HL_HLDIO + struct hl_dio hldio; +#endif + cpumask_t irq_affinity_mask; + u32 *stream_master_qid_arr; u32 fw_inner_major_ver; u32 fw_inner_minor_ver; @@ -3418,11 +3571,11 @@ struct hl_device { u32 device_release_watchdog_timeout_sec; u32 rotator_binning; u16 id; - u16 id_control; u16 cdev_idx; u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; u8 disabled; + u8 cpld_shutdown; u8 late_init_done; u8 hwmon_initialized; u8 reset_on_lockup; @@ -3451,6 +3604,7 @@ struct hl_device { u8 reset_upon_device_release; u8 supports_ctx_switch; u8 support_preboot_binning; + u8 eq_heartbeat_received; /* Parameters for bring-up to be upstreamed */ u64 nic_ports_mask; @@ -3464,6 +3618,9 @@ struct hl_device { u8 heartbeat; }; +/* Retrieve PCI device name in case of a PCI device or dev name in simulator */ +#define HL_DEV_NAME(hdev) \ + ((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE") /** * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure @@ -3528,17 +3685,6 @@ struct hl_ioctl_desc { hl_ioctl_t *func; }; -static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major, u32 fw_sw_minor) -{ - if (hdev->fw_sw_major_ver < fw_sw_major) - return true; - if (hdev->fw_sw_major_ver > fw_sw_major) - return false; - if (hdev->fw_sw_minor_ver < fw_sw_minor) - return true; - return false; -} - /* * Kernel module functions that can be accessed by entire module */ @@ -3582,6 +3728,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size, return false; } +static inline struct hl_device *to_hl_device(struct drm_device *ddev) +{ + return container_of(ddev, struct hl_device, drm); +} + /** * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. * @address: The start address of the area we want to validate. @@ -3611,8 +3762,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t dma_addr_t *dma_handle, const char *caller); void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, const char *caller); -int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); -void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, +int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir); +void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar); @@ -3620,7 +3776,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); -int hl_device_open(struct inode *inode, struct file *filp); + +int hl_mmap(struct file *filp, struct vm_area_struct *vma); + +int hl_device_open(struct drm_device *drm, struct drm_file *file_priv); +void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv); + int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, enum hl_device_status *status); @@ -3649,11 +3810,13 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); -irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg); +irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg); irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg); +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -3802,6 +3965,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_ struct hl_hr_mmu_funcs *hr_func); int hl_mmu_if_set_funcs(struct hl_device *hdev); void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); +void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, @@ -3809,7 +3973,24 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr); u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr); bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); - +struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr); +void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info); +u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx); +u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx); +void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val); +void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr); +u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); +void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr); +int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr); +u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop); +u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx); +void hl_mmu_dr_flush(struct hl_ctx *ctx); +int hl_mmu_dr_init(struct hl_device *hdev); +void hl_mmu_dr_fini(struct hl_device *hdev); + +int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value); @@ -3883,6 +4064,8 @@ long hl_fw_get_max_power(struct hl_device *hdev); void hl_fw_set_max_power(struct hl_device *hdev); int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, u32 nonce); +int hl_fw_get_dev_info_signed(struct hl_device *hdev, + struct cpucp_dev_info_signed *dev_info_signed, u32 nonce); int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); @@ -3922,7 +4105,7 @@ char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); -void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg, struct hl_mem_mgr_fini_stats *stats); void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg); int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, void *args); @@ -3944,16 +4127,19 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_ u64 *event_mask); void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask); void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); +void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count); void hl_enable_err_info_capture(struct hl_error_info *captured_err_info); +void hl_init_cpu_for_irq(struct hl_device *hdev); +void hl_set_irq_affinity(struct hl_device *hdev, int irq); +void hl_eq_heartbeat_event_handle(struct hl_device *hdev); +void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); +void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask); #ifdef CONFIG_DEBUG_FS -void hl_debugfs_init(void); -void hl_debugfs_fini(void); int hl_debugfs_device_init(struct hl_device *hdev); void hl_debugfs_device_fini(struct hl_device *hdev); void hl_debugfs_add_device(struct hl_device *hdev); -void hl_debugfs_remove_device(struct hl_device *hdev); void hl_debugfs_add_file(struct hl_fpriv *hpriv); void hl_debugfs_remove_file(struct hl_fpriv *hpriv); void hl_debugfs_add_cb(struct hl_cb *cb); @@ -3969,17 +4155,10 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, unsigned long length); +void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev); #else -static inline void __init hl_debugfs_init(void) -{ -} - -static inline void hl_debugfs_fini(void) -{ -} - static inline int hl_debugfs_device_init(struct hl_device *hdev) { return 0; @@ -3993,10 +4172,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev) { } -static inline void hl_debugfs_remove_device(struct hl_device *hdev) -{ -} - static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) { } @@ -4056,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev, { } +static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) +{ +} + #endif /* Security */ @@ -4108,11 +4287,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, const u32 pb_blocks[], u32 blocks_array_size); /* IOCTLs */ -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); #endif /* HABANALABSP_H_ */ |
