diff options
Diffstat (limited to 'include/uapi/drm/habanalabs_accel.h')
| -rw-r--r-- | include/uapi/drm/habanalabs_accel.h | 211 |
1 files changed, 173 insertions, 38 deletions
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h index 331567ec9e79..a512dc4cffd0 100644 --- a/include/uapi/drm/habanalabs_accel.h +++ b/include/uapi/drm/habanalabs_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2022 HabanaLabs, Ltd. + * Copyright 2016-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -8,8 +8,7 @@ #ifndef HABANALABS_H_ #define HABANALABS_H_ -#include <linux/types.h> -#include <linux/ioctl.h> +#include <drm/drm.h> /* * Defines that are asic-specific but constitutes as ABI between kernel driver @@ -607,9 +606,9 @@ enum gaudi2_engine_id { /* * ASIC specific PLL index * - * Used to retrieve in frequency info of different IPs via - * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be - * used as an index in struct hl_pll_frequency_info + * Used to retrieve in frequency info of different IPs via HL_INFO_PLL_FREQUENCY under + * DRM_IOCTL_HL_INFO IOCTL. + * The enums need to be used as an index in struct hl_pll_frequency_info. */ enum hl_goya_pll_index { @@ -708,7 +707,8 @@ enum hl_server_type { HL_SERVER_GAUDI_HLS1H = 2, HL_SERVER_GAUDI_TYPE1 = 3, HL_SERVER_GAUDI_TYPE2 = 4, - HL_SERVER_GAUDI2_HLS2 = 5 + HL_SERVER_GAUDI2_HLS2 = 5, + HL_SERVER_GAUDI2_TYPE1 = 7 }; /* @@ -723,6 +723,10 @@ enum hl_server_type { * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened + * HL_NOTIFIER_EVENT_CRITICAL_HW_ERR - Indicates a HW error that requires SW abort and + * HW reset + * HL_NOTIFIER_EVENT_CRITICAL_FW_ERR - Indicates a FW error that requires SW abort and + * HW reset */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) @@ -733,6 +737,8 @@ enum hl_server_type { #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) #define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) #define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) +#define HL_NOTIFIER_EVENT_CRITICL_HW_ERR (1ULL << 9) +#define HL_NOTIFIER_EVENT_CRITICL_FW_ERR (1ULL << 10) /* Opcode for management ioctl * @@ -780,16 +786,29 @@ enum hl_server_type { * The address which accessing it caused the razwi. * Razwi initiator. * Razwi cause, was it a page fault or MMU access error. + * May return 0 even though no new data is available, in that case + * timestamp will be 0. * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd * HL_INFO_GET_EVENTS - Retrieve the last occurred events * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. + * May return 0 even though no new data is available, in that case + * timestamp will be 0. * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. + * May return 0 even though no new data is available, in that case + * timestamp will be 0. * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. + * HL_INFO_HW_ERR_EVENT - Retrieve information on the reported HW error. + * May return 0 even though no new data is available, in that case + * timestamp will be 0. + * HL_INFO_FW_ERR_EVENT - Retrieve information on the reported FW error. + * May return 0 even though no new data is available, in that case + * timestamp will be 0. + * HL_INFO_USER_ENGINE_ERR_EVENT - Retrieve the last engine id that reported an error. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -824,6 +843,10 @@ enum hl_server_type { #define HL_INFO_PAGE_FAULT_EVENT 33 #define HL_INFO_USER_MAPPINGS 34 #define HL_INFO_FW_GENERIC_REQ 35 +#define HL_INFO_HW_ERR_EVENT 36 +#define HL_INFO_FW_ERR_EVENT 37 +#define HL_INFO_USER_ENGINE_ERR_EVENT 38 +#define HL_INFO_DEV_SIGNED 40 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -863,11 +886,11 @@ enum hl_server_type { * @dram_enabled: Whether the DRAM is enabled. * @security_enabled: Whether security is enabled on device. * @mme_master_slave_mode: Indicate whether the MME is working in master/slave - * configuration. Relevant for Greco and later. + * configuration. Relevant for Gaudi2 and later. * @cpucp_version: The CPUCP f/w version. * @card_name: The card name as passed by the f/w. * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. - * Relevant for Greco and later. + * Relevant for Gaudi2 and later. * @dram_page_size: The DRAM physical page size. * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. * Relevant for Gaudi2 and later. @@ -875,6 +898,12 @@ enum hl_server_type { * application to use. Relevant for Gaudi2 and later. * @device_mem_alloc_default_page_size: default page size used in device memory allocation. * @revision_id: PCI revision ID of the ASIC. + * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. + * @rotator_enabled_mask: Bit-mask that represents which rotators are enabled. + * Relevant for Gaudi3 and later. + * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use + * in order to raise events toward FW. + * @reserved_dram_size: DRAM size reserved for driver and firmware. */ struct hl_info_hw_ip_info { __u64 sram_base_address; @@ -902,15 +931,20 @@ struct hl_info_hw_ip_info { __u64 dram_page_size; __u32 edma_enabled_mask; __u16 number_of_user_interrupts; - __u16 pad2; - __u64 reserved4; + __u8 reserved1; + __u8 reserved2; + __u64 reserved3; __u64 device_mem_alloc_default_page_size; + __u64 reserved4; __u64 reserved5; - __u64 reserved6; - __u32 reserved7; - __u8 reserved8; + __u32 reserved6; + __u8 reserved7; __u8 revision_id; - __u8 pad[2]; + __u16 tpc_interrupt_id; + __u32 rotator_enabled_mask; + __u32 reserved9; + __u64 engine_core_interrupt_reg_addr; + __u64 reserved_dram_size; }; struct hl_info_dram_usage { @@ -958,6 +992,7 @@ struct hl_info_reset_count { struct hl_info_time_sync { __u64 device_time; __u64 host_time; + __u64 tsc_time; }; /** @@ -1162,6 +1197,53 @@ struct hl_info_undefined_opcode_event { }; /** + * struct hl_info_hw_err_event - info about HW error + * @timestamp: timestamp of error occurrence + * @event_id: The async event ID (specific to each device type). + * @pad: size padding for u64 granularity. + */ +struct hl_info_hw_err_event { + __s64 timestamp; + __u16 event_id; + __u16 pad[3]; +}; + +/* FW error definition for event_type in struct hl_info_fw_err_event */ +enum hl_info_fw_err_type { + HL_INFO_FW_HEARTBEAT_ERR, + HL_INFO_FW_REPORTED_ERR, +}; + +/** + * struct hl_info_fw_err_event - info about FW error + * @timestamp: time-stamp of error occurrence + * @err_type: The type of event as defined in hl_info_fw_err_type. + * @event_id: The async event ID (specific to each device type, applicable only when event type is + * HL_INFO_FW_REPORTED_ERR). + * @pad: size padding for u64 granularity. + */ +struct hl_info_fw_err_event { + __s64 timestamp; + __u16 err_type; + __u16 event_id; + __u32 pad; +}; + +/** + * struct hl_info_engine_err_event - engine error info + * @timestamp: time-stamp of error occurrence + * @engine_id: engine id who reported the error. + * @error_count: Amount of errors reported. + * @pad: size padding for u64 granularity. + */ +struct hl_info_engine_err_event { + __s64 timestamp; + __u16 engine_id; + __u16 error_count; + __u32 pad; +}; + +/** * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size * (e.g. 0x2100000 means that 1MB and 32MB pages are supported). @@ -1175,6 +1257,7 @@ struct hl_info_dev_memalloc_page_sizes { #define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ #define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ #define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ +#define SEC_DEV_INFO_BUF_SZ 5120 /* * struct hl_info_sec_attest - attestation report of the boot @@ -1209,6 +1292,32 @@ struct hl_info_sec_attest { __u8 pad0[2]; }; +/* + * struct hl_info_signed - device information signed by a secured device. + * @nonce: number only used once. random number provided by host. this also passed to the quote + * command as a qualifying data. + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @info_sig_len: length of the attestation signature (bytes) + * @public_data: public key info signed info data (outPublic + name + qualifiedName) + * @certificate: certificate for the signing key + * @info_sig: signature of the info + nonce data. + * @dev_info_len: length of device info (bytes) + * @dev_info: device info as byte array. + */ +struct hl_info_signed { + __u32 nonce; + __u16 pub_data_len; + __u16 certificate_len; + __u8 info_sig_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __u16 dev_info_len; + __u8 dev_info[SEC_DEV_INFO_BUF_SZ]; + __u8 pad[2]; +}; + /** * struct hl_page_fault_info - page fault information. * @timestamp: timestamp of page fault. @@ -1344,7 +1453,7 @@ union hl_cb_args { * * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: * Indicates if the CB was allocated and mapped by userspace - * (relevant to greco and above). User allocated CB is a command buffer, + * (relevant to Gaudi2 and later). User allocated CB is a command buffer, * allocated by the user, via malloc (or similar). After allocating the * CB, the user invokes - “memory ioctl” to map the user memory into a * device virtual address. The user provides this address via the @@ -1369,7 +1478,7 @@ struct hl_cs_chunk { * a DRAM address of the internal CB. In Gaudi, this might also * represent a mapped host address of the CB. * - * Greco onwards: + * Gaudi2 onwards: * For H/W queue, this represents either a Handle of CB on the * Host, or an SRAM, a DRAM, or a mapped host address of the CB. * @@ -1486,17 +1595,31 @@ struct hl_cs_chunk { */ #define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 +/* + * The engines CS is merged into the existing CS ioctls. + * Use it to control engines modes. + */ +#define HL_CS_FLAGS_ENGINES_COMMAND 0x10000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 -/* HL_ENGINE_CORE_ values +/* + * enum hl_engine_command - engine command * - * HL_ENGINE_CORE_HALT: engine core halt - * HL_ENGINE_CORE_RUN: engine core run + * @HL_ENGINE_CORE_HALT: engine core halt + * @HL_ENGINE_CORE_RUN: engine core run + * @HL_ENGINE_STALL: user engine/s stall + * @HL_ENGINE_RESUME: user engine/s resume */ -#define HL_ENGINE_CORE_HALT (1 << 0) -#define HL_ENGINE_CORE_RUN (1 << 1) +enum hl_engine_command { + HL_ENGINE_CORE_HALT = 1, + HL_ENGINE_CORE_RUN = 2, + HL_ENGINE_STALL = 3, + HL_ENGINE_RESUME = 4, + HL_ENGINE_COMMAND_MAX +}; struct hl_cs_in { @@ -1520,6 +1643,18 @@ struct hl_cs_in { /* the core command to be sent towards engine cores */ __u32 core_command; }; + + /* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engines */ + __u64 engines; + + /* number of engines in engines array */ + __u32 num_engines; + + /* the engine command to be sent towards engines */ + __u32 engine_command; + }; }; union { @@ -2056,6 +2191,13 @@ struct hl_debug_args { __u32 ctx_id; }; +#define HL_IOCTL_INFO 0x00 +#define HL_IOCTL_CB 0x01 +#define HL_IOCTL_CS 0x02 +#define HL_IOCTL_WAIT_CS 0x03 +#define HL_IOCTL_MEMORY 0x04 +#define HL_IOCTL_DEBUG 0x05 + /* * Various information operations such as: * - H/W IP information @@ -2070,8 +2212,7 @@ struct hl_debug_args { * definitions of structures in kernel and userspace, e.g. in case of old * userspace and new kernel driver */ -#define HL_IOCTL_INFO \ - _IOWR('H', 0x01, struct hl_info_args) +#define DRM_IOCTL_HL_INFO DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_INFO, struct hl_info_args) /* * Command Buffer @@ -2092,8 +2233,7 @@ struct hl_debug_args { * and won't be returned to user. * */ -#define HL_IOCTL_CB \ - _IOWR('H', 0x02, union hl_cb_args) +#define DRM_IOCTL_HL_CB DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CB, union hl_cb_args) /* * Command Submission @@ -2115,7 +2255,7 @@ struct hl_debug_args { * internal. The driver will get completion notifications from the device only * on JOBS which are enqueued in the external queues. * - * Greco onwards: + * Gaudi2 onwards: * There is a single type of queue for all types of engines, either DMA engines * for transfers from/to the host or inside the device, or compute engines. * The driver will get completion notifications from the device for all queues. @@ -2145,8 +2285,7 @@ struct hl_debug_args { * and only if CS N and CS N-1 are exactly the same (same CBs for the same * queues). */ -#define HL_IOCTL_CS \ - _IOWR('H', 0x03, union hl_cs_args) +#define DRM_IOCTL_HL_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CS, union hl_cs_args) /* * Wait for Command Submission @@ -2178,9 +2317,7 @@ struct hl_debug_args { * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the * device was reset (EIO) */ - -#define HL_IOCTL_WAIT_CS \ - _IOWR('H', 0x04, union hl_wait_cs_args) +#define DRM_IOCTL_HL_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_WAIT_CS, union hl_wait_cs_args) /* * Memory @@ -2197,8 +2334,7 @@ struct hl_debug_args { * There is an option for the user to specify the requested virtual address. * */ -#define HL_IOCTL_MEMORY \ - _IOWR('H', 0x05, union hl_mem_args) +#define DRM_IOCTL_HL_MEMORY DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_MEMORY, union hl_mem_args) /* * Debug @@ -2224,10 +2360,9 @@ struct hl_debug_args { * The driver can decide to "kick out" the user if he abuses this interface. * */ -#define HL_IOCTL_DEBUG \ - _IOWR('H', 0x06, struct hl_debug_args) +#define DRM_IOCTL_HL_DEBUG DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_DEBUG, struct hl_debug_args) -#define HL_COMMAND_START 0x01 -#define HL_COMMAND_END 0x07 +#define HL_COMMAND_START (DRM_COMMAND_BASE + HL_IOCTL_INFO) +#define HL_COMMAND_END (DRM_COMMAND_BASE + HL_IOCTL_DEBUG + 1) #endif /* HABANALABS_H_ */ |
