summaryrefslogtreecommitdiff
path: root/include/uapi/drm/habanalabs_accel.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/uapi/drm/habanalabs_accel.h')
-rw-r--r--include/uapi/drm/habanalabs_accel.h211
1 files changed, 173 insertions, 38 deletions
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index 331567ec9e79..a512dc4cffd0 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
- * Copyright 2016-2022 HabanaLabs, Ltd.
+ * Copyright 2016-2023 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -8,8 +8,7 @@
#ifndef HABANALABS_H_
#define HABANALABS_H_
-#include <linux/types.h>
-#include <linux/ioctl.h>
+#include <drm/drm.h>
/*
* Defines that are asic-specific but constitutes as ABI between kernel driver
@@ -607,9 +606,9 @@ enum gaudi2_engine_id {
/*
* ASIC specific PLL index
*
- * Used to retrieve in frequency info of different IPs via
- * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be
- * used as an index in struct hl_pll_frequency_info
+ * Used to retrieve in frequency info of different IPs via HL_INFO_PLL_FREQUENCY under
+ * DRM_IOCTL_HL_INFO IOCTL.
+ * The enums need to be used as an index in struct hl_pll_frequency_info.
*/
enum hl_goya_pll_index {
@@ -708,7 +707,8 @@ enum hl_server_type {
HL_SERVER_GAUDI_HLS1H = 2,
HL_SERVER_GAUDI_TYPE1 = 3,
HL_SERVER_GAUDI_TYPE2 = 4,
- HL_SERVER_GAUDI2_HLS2 = 5
+ HL_SERVER_GAUDI2_HLS2 = 5,
+ HL_SERVER_GAUDI2_TYPE1 = 7
};
/*
@@ -723,6 +723,10 @@ enum hl_server_type {
* HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error
* HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened
* HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened
+ * HL_NOTIFIER_EVENT_CRITICAL_HW_ERR - Indicates a HW error that requires SW abort and
+ * HW reset
+ * HL_NOTIFIER_EVENT_CRITICAL_FW_ERR - Indicates a FW error that requires SW abort and
+ * HW reset
*/
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
@@ -733,6 +737,8 @@ enum hl_server_type {
#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6)
#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7)
#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8)
+#define HL_NOTIFIER_EVENT_CRITICL_HW_ERR (1ULL << 9)
+#define HL_NOTIFIER_EVENT_CRITICL_FW_ERR (1ULL << 10)
/* Opcode for management ioctl
*
@@ -780,16 +786,29 @@ enum hl_server_type {
* The address which accessing it caused the razwi.
* Razwi initiator.
* Razwi cause, was it a page fault or MMU access error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
* HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot.
* HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
* HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
* HL_INFO_GET_EVENTS - Retrieve the last occurred events
* HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic.
* HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event.
* HL_INFO_FW_GENERIC_REQ - Send generic request to FW.
+ * HL_INFO_HW_ERR_EVENT - Retrieve information on the reported HW error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
+ * HL_INFO_FW_ERR_EVENT - Retrieve information on the reported FW error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
+ * HL_INFO_USER_ENGINE_ERR_EVENT - Retrieve the last engine id that reported an error.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -824,6 +843,10 @@ enum hl_server_type {
#define HL_INFO_PAGE_FAULT_EVENT 33
#define HL_INFO_USER_MAPPINGS 34
#define HL_INFO_FW_GENERIC_REQ 35
+#define HL_INFO_HW_ERR_EVENT 36
+#define HL_INFO_FW_ERR_EVENT 37
+#define HL_INFO_USER_ENGINE_ERR_EVENT 38
+#define HL_INFO_DEV_SIGNED 40
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@@ -863,11 +886,11 @@ enum hl_server_type {
* @dram_enabled: Whether the DRAM is enabled.
* @security_enabled: Whether security is enabled on device.
* @mme_master_slave_mode: Indicate whether the MME is working in master/slave
- * configuration. Relevant for Greco and later.
+ * configuration. Relevant for Gaudi2 and later.
* @cpucp_version: The CPUCP f/w version.
* @card_name: The card name as passed by the f/w.
* @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled.
- * Relevant for Greco and later.
+ * Relevant for Gaudi2 and later.
* @dram_page_size: The DRAM physical page size.
* @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled.
* Relevant for Gaudi2 and later.
@@ -875,6 +898,12 @@ enum hl_server_type {
* application to use. Relevant for Gaudi2 and later.
* @device_mem_alloc_default_page_size: default page size used in device memory allocation.
* @revision_id: PCI revision ID of the ASIC.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
+ * @rotator_enabled_mask: Bit-mask that represents which rotators are enabled.
+ * Relevant for Gaudi3 and later.
+ * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use
+ * in order to raise events toward FW.
+ * @reserved_dram_size: DRAM size reserved for driver and firmware.
*/
struct hl_info_hw_ip_info {
__u64 sram_base_address;
@@ -902,15 +931,20 @@ struct hl_info_hw_ip_info {
__u64 dram_page_size;
__u32 edma_enabled_mask;
__u16 number_of_user_interrupts;
- __u16 pad2;
- __u64 reserved4;
+ __u8 reserved1;
+ __u8 reserved2;
+ __u64 reserved3;
__u64 device_mem_alloc_default_page_size;
+ __u64 reserved4;
__u64 reserved5;
- __u64 reserved6;
- __u32 reserved7;
- __u8 reserved8;
+ __u32 reserved6;
+ __u8 reserved7;
__u8 revision_id;
- __u8 pad[2];
+ __u16 tpc_interrupt_id;
+ __u32 rotator_enabled_mask;
+ __u32 reserved9;
+ __u64 engine_core_interrupt_reg_addr;
+ __u64 reserved_dram_size;
};
struct hl_info_dram_usage {
@@ -958,6 +992,7 @@ struct hl_info_reset_count {
struct hl_info_time_sync {
__u64 device_time;
__u64 host_time;
+ __u64 tsc_time;
};
/**
@@ -1162,6 +1197,53 @@ struct hl_info_undefined_opcode_event {
};
/**
+ * struct hl_info_hw_err_event - info about HW error
+ * @timestamp: timestamp of error occurrence
+ * @event_id: The async event ID (specific to each device type).
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_hw_err_event {
+ __s64 timestamp;
+ __u16 event_id;
+ __u16 pad[3];
+};
+
+/* FW error definition for event_type in struct hl_info_fw_err_event */
+enum hl_info_fw_err_type {
+ HL_INFO_FW_HEARTBEAT_ERR,
+ HL_INFO_FW_REPORTED_ERR,
+};
+
+/**
+ * struct hl_info_fw_err_event - info about FW error
+ * @timestamp: time-stamp of error occurrence
+ * @err_type: The type of event as defined in hl_info_fw_err_type.
+ * @event_id: The async event ID (specific to each device type, applicable only when event type is
+ * HL_INFO_FW_REPORTED_ERR).
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_fw_err_event {
+ __s64 timestamp;
+ __u16 err_type;
+ __u16 event_id;
+ __u32 pad;
+};
+
+/**
+ * struct hl_info_engine_err_event - engine error info
+ * @timestamp: time-stamp of error occurrence
+ * @engine_id: engine id who reported the error.
+ * @error_count: Amount of errors reported.
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_engine_err_event {
+ __s64 timestamp;
+ __u16 engine_id;
+ __u16 error_count;
+ __u32 pad;
+};
+
+/**
* struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information.
* @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size
* (e.g. 0x2100000 means that 1MB and 32MB pages are supported).
@@ -1175,6 +1257,7 @@ struct hl_info_dev_memalloc_page_sizes {
#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
+#define SEC_DEV_INFO_BUF_SZ 5120
/*
* struct hl_info_sec_attest - attestation report of the boot
@@ -1209,6 +1292,32 @@ struct hl_info_sec_attest {
__u8 pad0[2];
};
+/*
+ * struct hl_info_signed - device information signed by a secured device.
+ * @nonce: number only used once. random number provided by host. this also passed to the quote
+ * command as a qualifying data.
+ * @pub_data_len: length of the public data (bytes)
+ * @certificate_len: length of the certificate (bytes)
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @public_data: public key info signed info data (outPublic + name + qualifiedName)
+ * @certificate: certificate for the signing key
+ * @info_sig: signature of the info + nonce data.
+ * @dev_info_len: length of device info (bytes)
+ * @dev_info: device info as byte array.
+ */
+struct hl_info_signed {
+ __u32 nonce;
+ __u16 pub_data_len;
+ __u16 certificate_len;
+ __u8 info_sig_len;
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+ __u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+ __u16 dev_info_len;
+ __u8 dev_info[SEC_DEV_INFO_BUF_SZ];
+ __u8 pad[2];
+};
+
/**
* struct hl_page_fault_info - page fault information.
* @timestamp: timestamp of page fault.
@@ -1344,7 +1453,7 @@ union hl_cb_args {
*
* HL_CS_CHUNK_FLAGS_USER_ALLOC_CB:
* Indicates if the CB was allocated and mapped by userspace
- * (relevant to greco and above). User allocated CB is a command buffer,
+ * (relevant to Gaudi2 and later). User allocated CB is a command buffer,
* allocated by the user, via malloc (or similar). After allocating the
* CB, the user invokes - “memory ioctl” to map the user memory into a
* device virtual address. The user provides this address via the
@@ -1369,7 +1478,7 @@ struct hl_cs_chunk {
* a DRAM address of the internal CB. In Gaudi, this might also
* represent a mapped host address of the CB.
*
- * Greco onwards:
+ * Gaudi2 onwards:
* For H/W queue, this represents either a Handle of CB on the
* Host, or an SRAM, a DRAM, or a mapped host address of the CB.
*
@@ -1486,17 +1595,31 @@ struct hl_cs_chunk {
*/
#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000
+/*
+ * The engines CS is merged into the existing CS ioctls.
+ * Use it to control engines modes.
+ */
+#define HL_CS_FLAGS_ENGINES_COMMAND 0x10000
+
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
-/* HL_ENGINE_CORE_ values
+/*
+ * enum hl_engine_command - engine command
*
- * HL_ENGINE_CORE_HALT: engine core halt
- * HL_ENGINE_CORE_RUN: engine core run
+ * @HL_ENGINE_CORE_HALT: engine core halt
+ * @HL_ENGINE_CORE_RUN: engine core run
+ * @HL_ENGINE_STALL: user engine/s stall
+ * @HL_ENGINE_RESUME: user engine/s resume
*/
-#define HL_ENGINE_CORE_HALT (1 << 0)
-#define HL_ENGINE_CORE_RUN (1 << 1)
+enum hl_engine_command {
+ HL_ENGINE_CORE_HALT = 1,
+ HL_ENGINE_CORE_RUN = 2,
+ HL_ENGINE_STALL = 3,
+ HL_ENGINE_RESUME = 4,
+ HL_ENGINE_COMMAND_MAX
+};
struct hl_cs_in {
@@ -1520,6 +1643,18 @@ struct hl_cs_in {
/* the core command to be sent towards engine cores */
__u32 core_command;
};
+
+ /* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */
+ struct {
+ /* this holds address of array of uint32 for engines */
+ __u64 engines;
+
+ /* number of engines in engines array */
+ __u32 num_engines;
+
+ /* the engine command to be sent towards engines */
+ __u32 engine_command;
+ };
};
union {
@@ -2056,6 +2191,13 @@ struct hl_debug_args {
__u32 ctx_id;
};
+#define HL_IOCTL_INFO 0x00
+#define HL_IOCTL_CB 0x01
+#define HL_IOCTL_CS 0x02
+#define HL_IOCTL_WAIT_CS 0x03
+#define HL_IOCTL_MEMORY 0x04
+#define HL_IOCTL_DEBUG 0x05
+
/*
* Various information operations such as:
* - H/W IP information
@@ -2070,8 +2212,7 @@ struct hl_debug_args {
* definitions of structures in kernel and userspace, e.g. in case of old
* userspace and new kernel driver
*/
-#define HL_IOCTL_INFO \
- _IOWR('H', 0x01, struct hl_info_args)
+#define DRM_IOCTL_HL_INFO DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_INFO, struct hl_info_args)
/*
* Command Buffer
@@ -2092,8 +2233,7 @@ struct hl_debug_args {
* and won't be returned to user.
*
*/
-#define HL_IOCTL_CB \
- _IOWR('H', 0x02, union hl_cb_args)
+#define DRM_IOCTL_HL_CB DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CB, union hl_cb_args)
/*
* Command Submission
@@ -2115,7 +2255,7 @@ struct hl_debug_args {
* internal. The driver will get completion notifications from the device only
* on JOBS which are enqueued in the external queues.
*
- * Greco onwards:
+ * Gaudi2 onwards:
* There is a single type of queue for all types of engines, either DMA engines
* for transfers from/to the host or inside the device, or compute engines.
* The driver will get completion notifications from the device for all queues.
@@ -2145,8 +2285,7 @@ struct hl_debug_args {
* and only if CS N and CS N-1 are exactly the same (same CBs for the same
* queues).
*/
-#define HL_IOCTL_CS \
- _IOWR('H', 0x03, union hl_cs_args)
+#define DRM_IOCTL_HL_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CS, union hl_cs_args)
/*
* Wait for Command Submission
@@ -2178,9 +2317,7 @@ struct hl_debug_args {
* HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
* device was reset (EIO)
*/
-
-#define HL_IOCTL_WAIT_CS \
- _IOWR('H', 0x04, union hl_wait_cs_args)
+#define DRM_IOCTL_HL_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_WAIT_CS, union hl_wait_cs_args)
/*
* Memory
@@ -2197,8 +2334,7 @@ struct hl_debug_args {
* There is an option for the user to specify the requested virtual address.
*
*/
-#define HL_IOCTL_MEMORY \
- _IOWR('H', 0x05, union hl_mem_args)
+#define DRM_IOCTL_HL_MEMORY DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_MEMORY, union hl_mem_args)
/*
* Debug
@@ -2224,10 +2360,9 @@ struct hl_debug_args {
* The driver can decide to "kick out" the user if he abuses this interface.
*
*/
-#define HL_IOCTL_DEBUG \
- _IOWR('H', 0x06, struct hl_debug_args)
+#define DRM_IOCTL_HL_DEBUG DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_DEBUG, struct hl_debug_args)
-#define HL_COMMAND_START 0x01
-#define HL_COMMAND_END 0x07
+#define HL_COMMAND_START (DRM_COMMAND_BASE + HL_IOCTL_INFO)
+#define HL_COMMAND_END (DRM_COMMAND_BASE + HL_IOCTL_DEBUG + 1)
#endif /* HABANALABS_H_ */